From 89b9ffb78d2fb29b06ed63b4506185f03ebcfe23 Mon Sep 17 00:00:00 2001 From: Alexandr Nesterenko Date: Sun, 1 Oct 2017 15:47:25 -0400 Subject: [PATCH] tests --- downloader.py | 6 +++-- pol/feed.py | 5 ++-- pol/log.py | 6 ++--- pol/server.py | 56 ++++++++++++++++++++++++---------------- test.py | 1 + tests/test_downloader.py | 27 +++++++++++++++++-- 6 files changed, 70 insertions(+), 31 deletions(-) diff --git a/downloader.py b/downloader.py index 5a58e56..52d9ae6 100644 --- a/downloader.py +++ b/downloader.py @@ -1,6 +1,8 @@ -from pol import Server +import sys +from pol.server import Server +from settings import DATABASES, SNAPSHOT_DIR, DOWNLOADER_USER_AGENT, DEBUG port = sys.argv[1] if len(sys.argv) >= 2 else 1234 -Server(port, DATABASES['default'], SNAPSHOT_DIR, DOWNLOADER_USER_AGENT, DEBUG).run() +Server(port, None, SNAPSHOT_DIR, DOWNLOADER_USER_AGENT).run() # DATABASES['default'] diff --git a/pol/feed.py b/pol/feed.py index dc8ab1d..7273073 100755 --- a/pol/feed.py +++ b/pol/feed.py @@ -13,6 +13,8 @@ from contextlib import closing from settings import DATABASES, DOWNLOADER_USER_AGENT from twisted.logger import Logger +from .db import get_conn + log = Logger() @@ -24,9 +26,8 @@ class Feed(object): FIELD_IDS = {'title': 1, 'description': 2, 'link': 3} - def __init__(self, db_creds, log): + def __init__(self, db_creds): self.db_creds = db_creds - self.log = log def save_post(self, conn, created, feed_id, post_fields): diff --git a/pol/log.py b/pol/log.py index 91640c4..f039a62 100755 --- a/pol/log.py +++ b/pol/log.py @@ -22,11 +22,11 @@ class LogHandler(object): globalLogBeginner.beginLoggingTo([self.print_log], discardBuffer=True, redirectStandardIO=False) - def print_log(self, event): - if event['log_level'].name == 'error' or 'isError' in event and event['isError']: + def print_log(event): + if 'isError' in event and event['isError']: + sys.stdout.write(bcolors.FAIL + formatEventAsClassicLogText(event) + bcolors.ENDC) sys.stderr.write(formatEventAsClassicLogText(event)) sys.stderr.flush() - sys.stdout.write(bcolors.FAIL + formatEventAsClassicLogText(event) + bcolors.ENDC) else: sys.stdout.write(formatEventAsClassicLogText(event)) sys.stdout.flush() diff --git a/pol/server.py b/pol/server.py index 6415c06..400f8b3 100755 --- a/pol/server.py +++ b/pol/server.py @@ -1,3 +1,4 @@ +from __future__ import print_function from datetime import datetime from hashlib import md5 import json @@ -7,9 +8,9 @@ import re from lxml import etree from twisted.web import server, resource -from twisted.internet import reactor, endpoints, +from twisted.internet import reactor, endpoints from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError, HTTPConnectionPool -from twisted.web.server import +from twisted.web.server import NOT_DONE_YET from twisted.web.http_headers import Headers from twisted.web.html import escape twisted_headers = Headers @@ -23,14 +24,21 @@ from scrapy.http import Headers from scrapy.responsetypes import responsetypes from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory +from pol.log import LogHandler from .feed import Feed +from twisted.logger import Logger + + +log = Logger() + class Downloader(object): - def __init__(self, debug, stat_tool=None, mem_mon=None): + def __init__(self, debug, stat_tool=None, mem_mon=None, limiter=None): self.debug = debug self.stat_tool = stat_tool self.mem_mon = mem_mon + self.limiter = limiter def html2json(self, el): return [ @@ -110,7 +118,7 @@ class Downloader(object): return respcls(url=url, status=status, headers=headers, body=body) def error_html(self, msg): - return "%s\n") + return "%s\n") def downloadError(self, error, request=None, url=None, response=None, feed_config=None): # read for details: https://stackoverflow.com/questions/29423986/twisted-giving-twisted-web-client-partialdownloaderror-200-ok @@ -125,7 +133,7 @@ class Downloader(object): request.write('Downloader error: ' + error.getErrorMessage()) request.write('Traceback: ' + error.getTraceback()) else: - request.write(self.error_html('Something wrong. Contact us by email: politepol.com@gmail.com \n Scary mantra: ' + error.getErrorMessage())) + request.write(self.error_html('

PolitePol says: "Something wrong"

Try to refresh page or contact us by email: politepol.com@gmail.com\n(Help us to improve our service with your feedback)

Scary mantra: %s

' % escape(error.getErrorMessage()))) sys.stderr.write('\n'.join([str(datetime.utcnow()), request.uri, url, 'Downloader error: ' + error.getErrorMessage(), 'Traceback: ' + error.getTraceback()])) request.finish() @@ -135,17 +143,16 @@ class Downloader(object): if not feed_id: feed_id = 0 s_url = url - log.info('Stat: ip={request.ip} feed_id={request.feed_id} url="{request.url}" error="{request.ex_msg}"', request=RequestStat( - ip = request.getHeader('x-real-ip') or request.client.host, - feed_id = feed_id, - post_cnt=0, - new_post_cnt=0, - url=s_url, - ex_msg=error.getErrorMessage(), - ex_callstack=error.getTraceback() - ), - stat=True - ) + if self.stat_tool: + self.stat_tool.trace( + ip = request.getHeader('x-real-ip') or request.client.host, + feed_id = feed_id, + post_cnt=0, + new_post_cnt=0, + url=s_url, + ex_msg=error.getErrorMessage(), + ex_callstack=error.getTraceback() + ) except: traceback.print_exc(file=sys.stdout) @@ -208,13 +215,14 @@ class Site(resource.Resource): feed_regexp = re.compile('^/feed1?/(\d{1,10})$') - def __init__(self, db_creds, snapshot_dir, user_agent, debug): + def __init__(self, db_creds, snapshot_dir, user_agent, debug=False, limiter=None): self.db_creds = db_creds self.snapshot_dir = snapshot_dir self.user_agent = user_agent + self.limiter = limiter self.downloader = Downloader(debug) - self.feed = Feed(db_creds, log) + self.feed = Feed(db_creds) def startRequest(self, request, url, feed_config = None): agent = BrowserLikeRedirectAgent( @@ -251,7 +259,7 @@ class Site(resource.Resource): elif self.feed_regexp.match(request.uri) is not None: # feed feed_id = self.feed_regexp.match(request.uri).groups()[0] - time_left = check_feed_request_time_limit(request.uri) + time_left = self.limiter.check_request_time_limit(request.uri) if self.limiter else 0 if time_left: request.setResponseCode(429) request.setHeader('Retry-After', str(time_left) + ' seconds') @@ -271,16 +279,20 @@ class Site(resource.Resource): class Server(object): - def __init__(self, port, db_creds, snapshot_dir, user_agent, debug): + def __init__(self, port, db_creds, snapshot_dir, user_agent, debug=False, limiter=None): self.port = port self.db_creds = db_creds self.snapshot_dir = snapshot_dir self.user_agent = user_agent + self.debug = debug + self.limiter = limiter - def setMemMonitor(_mem_mon=None) + self.log_handler = LogHandler() + + def setMemMonitor(self, _mem_mon=None): global mem_mon mem_mon = _mem_mon def run(self): - endpoints.serverFromString(reactor, "tcp:%s" % self.port).listen(server.Site(Site(self.db_creds, self.snapshot_dir, self.user_agent, self.debug))) + endpoints.serverFromString(reactor, "tcp:%s" % self.port).listen(server.Site(Site(self.db_creds, self.snapshot_dir, self.user_agent, self.debug, self.limiter))) reactor.run() \ No newline at end of file diff --git a/test.py b/test.py index 8dc6db4..12e5bfb 100644 --- a/test.py +++ b/test.py @@ -4,5 +4,6 @@ from tests.test_downloader import MFTests def main(): ts = MFTests() ts.test_log_handler() + ts.test_server() main() \ No newline at end of file diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 046140e..30ddb93 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -1,20 +1,33 @@ from __future__ import print_function -from twisted.internet import reactor, defer +import os + +from twisted.web.server import Site +from twisted.web.static import File +from twisted.internet import reactor, defer, endpoints from twisted.logger import Logger from pol.log import LogHandler +from pol.server import Server class MFTests(object): def __init__(self): self.log = Logger() + + def start_static(self): + resource = File(os.getcwd() + '/tests/pages') + factory = Site(resource) + endpoint = endpoints.TcP4ServerEndpoint(reactor, 0) + endpoint.listen(factory) + # reactor.run() + + def send_request(self): pass def stop_callback(self, none): reactor.stop() - pass def test_log_handler(self): handler = LogHandler() @@ -30,3 +43,13 @@ class MFTests(object): reactor.run() + def test_server(self): + d = defer.Deferred() + reactor.callLater(3, d.callback, None) + d.addCallback(self.stop_callback) + #d.addCallback(self.send_request) + d.addErrback(lambda err: print("callback error: %s\ncallback traceback: %s" % (err.getErrorMessage(), err.getTraceback()))) + + Server(port=1234, db_creds=None, snapshot_dir='~/tmp', user_agent='', debug=False).run() + +