v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-16 06:10:09 -07:00
This commit is contained in:
Alexandr Nesterenko 2017-10-01 15:47:25 -04:00
parent fc4d5559bd
commit 89b9ffb78d
6 changed files with 70 additions and 31 deletions

View File

@ -1,6 +1,8 @@
from pol import Server
import sys
from pol.server import Server
from settings import DATABASES, SNAPSHOT_DIR, DOWNLOADER_USER_AGENT, DEBUG
port = sys.argv[1] if len(sys.argv) >= 2 else 1234
Server(port, DATABASES['default'], SNAPSHOT_DIR, DOWNLOADER_USER_AGENT, DEBUG).run()
Server(port, None, SNAPSHOT_DIR, DOWNLOADER_USER_AGENT).run() # DATABASES['default']

View File

@ -13,6 +13,8 @@ from contextlib import closing
from settings import DATABASES, DOWNLOADER_USER_AGENT
from twisted.logger import Logger
from .db import get_conn
log = Logger()
@ -24,9 +26,8 @@ class Feed(object):
FIELD_IDS = {'title': 1, 'description': 2, 'link': 3}
def __init__(self, db_creds, log):
def __init__(self, db_creds):
self.db_creds = db_creds
self.log = log
def save_post(self, conn, created, feed_id, post_fields):

View File

@ -22,11 +22,11 @@ class LogHandler(object):
globalLogBeginner.beginLoggingTo([self.print_log], discardBuffer=True, redirectStandardIO=False)
def print_log(self, event):
if event['log_level'].name == 'error' or 'isError' in event and event['isError']:
def print_log(event):
if 'isError' in event and event['isError']:
sys.stdout.write(bcolors.FAIL + formatEventAsClassicLogText(event) + bcolors.ENDC)
sys.stderr.write(formatEventAsClassicLogText(event))
sys.stderr.flush()
sys.stdout.write(bcolors.FAIL + formatEventAsClassicLogText(event) + bcolors.ENDC)
else:
sys.stdout.write(formatEventAsClassicLogText(event))
sys.stdout.flush()

View File

@ -1,3 +1,4 @@
from __future__ import print_function
from datetime import datetime
from hashlib import md5
import json
@ -7,9 +8,9 @@ import re
from lxml import etree
from twisted.web import server, resource
from twisted.internet import reactor, endpoints,
from twisted.internet import reactor, endpoints
from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError, HTTPConnectionPool
from twisted.web.server import
from twisted.web.server import NOT_DONE_YET
from twisted.web.http_headers import Headers
from twisted.web.html import escape
twisted_headers = Headers
@ -23,14 +24,21 @@ from scrapy.http import Headers
from scrapy.responsetypes import responsetypes
from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
from pol.log import LogHandler
from .feed import Feed
from twisted.logger import Logger
log = Logger()
class Downloader(object):
def __init__(self, debug, stat_tool=None, mem_mon=None):
def __init__(self, debug, stat_tool=None, mem_mon=None, limiter=None):
self.debug = debug
self.stat_tool = stat_tool
self.mem_mon = mem_mon
self.limiter = limiter
def html2json(self, el):
return [
@ -110,7 +118,7 @@ class Downloader(object):
return respcls(url=url, status=status, headers=headers, body=body)
def error_html(self, msg):
return "<html><body>%s</body></html" % escape(msg).replace("\n", "<br/>\n")
return "<html><body>%s</body></html" % msg.replace("\n", "<br/>\n")
def downloadError(self, error, request=None, url=None, response=None, feed_config=None):
# read for details: https://stackoverflow.com/questions/29423986/twisted-giving-twisted-web-client-partialdownloaderror-200-ok
@ -125,7 +133,7 @@ class Downloader(object):
request.write('Downloader error: ' + error.getErrorMessage())
request.write('Traceback: ' + error.getTraceback())
else:
request.write(self.error_html('Something wrong. Contact us by email: politepol.com@gmail.com \n Scary mantra: ' + error.getErrorMessage()))
request.write(self.error_html('<h1>PolitePol says: "Something wrong"</h1> <p><b>Try to refresh page or contact us by email: politepol.com@gmail.com</b>\n(Help us to improve our service with your feedback)</p> <p><i>Scary mantra: %s</i></p>' % escape(error.getErrorMessage())))
sys.stderr.write('\n'.join([str(datetime.utcnow()), request.uri, url, 'Downloader error: ' + error.getErrorMessage(), 'Traceback: ' + error.getTraceback()]))
request.finish()
@ -135,17 +143,16 @@ class Downloader(object):
if not feed_id:
feed_id = 0
s_url = url
log.info('Stat: ip={request.ip} feed_id={request.feed_id} url="{request.url}" error="{request.ex_msg}"', request=RequestStat(
ip = request.getHeader('x-real-ip') or request.client.host,
feed_id = feed_id,
post_cnt=0,
new_post_cnt=0,
url=s_url,
ex_msg=error.getErrorMessage(),
ex_callstack=error.getTraceback()
),
stat=True
)
if self.stat_tool:
self.stat_tool.trace(
ip = request.getHeader('x-real-ip') or request.client.host,
feed_id = feed_id,
post_cnt=0,
new_post_cnt=0,
url=s_url,
ex_msg=error.getErrorMessage(),
ex_callstack=error.getTraceback()
)
except:
traceback.print_exc(file=sys.stdout)
@ -208,13 +215,14 @@ class Site(resource.Resource):
feed_regexp = re.compile('^/feed1?/(\d{1,10})$')
def __init__(self, db_creds, snapshot_dir, user_agent, debug):
def __init__(self, db_creds, snapshot_dir, user_agent, debug=False, limiter=None):
self.db_creds = db_creds
self.snapshot_dir = snapshot_dir
self.user_agent = user_agent
self.limiter = limiter
self.downloader = Downloader(debug)
self.feed = Feed(db_creds, log)
self.feed = Feed(db_creds)
def startRequest(self, request, url, feed_config = None):
agent = BrowserLikeRedirectAgent(
@ -251,7 +259,7 @@ class Site(resource.Resource):
elif self.feed_regexp.match(request.uri) is not None: # feed
feed_id = self.feed_regexp.match(request.uri).groups()[0]
time_left = check_feed_request_time_limit(request.uri)
time_left = self.limiter.check_request_time_limit(request.uri) if self.limiter else 0
if time_left:
request.setResponseCode(429)
request.setHeader('Retry-After', str(time_left) + ' seconds')
@ -271,16 +279,20 @@ class Site(resource.Resource):
class Server(object):
def __init__(self, port, db_creds, snapshot_dir, user_agent, debug):
def __init__(self, port, db_creds, snapshot_dir, user_agent, debug=False, limiter=None):
self.port = port
self.db_creds = db_creds
self.snapshot_dir = snapshot_dir
self.user_agent = user_agent
self.debug = debug
self.limiter = limiter
def setMemMonitor(_mem_mon=None)
self.log_handler = LogHandler()
def setMemMonitor(self, _mem_mon=None):
global mem_mon
mem_mon = _mem_mon
def run(self):
endpoints.serverFromString(reactor, "tcp:%s" % self.port).listen(server.Site(Site(self.db_creds, self.snapshot_dir, self.user_agent, self.debug)))
endpoints.serverFromString(reactor, "tcp:%s" % self.port).listen(server.Site(Site(self.db_creds, self.snapshot_dir, self.user_agent, self.debug, self.limiter)))
reactor.run()

View File

@ -4,5 +4,6 @@ from tests.test_downloader import MFTests
def main():
ts = MFTests()
ts.test_log_handler()
ts.test_server()
main()

View File

@ -1,20 +1,33 @@
from __future__ import print_function
from twisted.internet import reactor, defer
import os
from twisted.web.server import Site
from twisted.web.static import File
from twisted.internet import reactor, defer, endpoints
from twisted.logger import Logger
from pol.log import LogHandler
from pol.server import Server
class MFTests(object):
def __init__(self):
self.log = Logger()
def start_static(self):
resource = File(os.getcwd() + '/tests/pages')
factory = Site(resource)
endpoint = endpoints.TcP4ServerEndpoint(reactor, 0)
endpoint.listen(factory)
# reactor.run()
def send_request(self):
pass
def stop_callback(self, none):
reactor.stop()
pass
def test_log_handler(self):
handler = LogHandler()
@ -30,3 +43,13 @@ class MFTests(object):
reactor.run()
def test_server(self):
d = defer.Deferred()
reactor.callLater(3, d.callback, None)
d.addCallback(self.stop_callback)
#d.addCallback(self.send_request)
d.addErrback(lambda err: print("callback error: %s\ncallback traceback: %s" % (err.getErrorMessage(), err.getTraceback())))
Server(port=1234, db_creds=None, snapshot_dir='~/tmp', user_agent='', debug=False).run()