v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-16 14:20:10 -07:00
This commit is contained in:
Alexandr Nesterenko 2017-10-01 15:47:25 -04:00
parent fc4d5559bd
commit 89b9ffb78d
6 changed files with 70 additions and 31 deletions

View File

@ -1,6 +1,8 @@
from pol import Server import sys
from pol.server import Server
from settings import DATABASES, SNAPSHOT_DIR, DOWNLOADER_USER_AGENT, DEBUG
port = sys.argv[1] if len(sys.argv) >= 2 else 1234 port = sys.argv[1] if len(sys.argv) >= 2 else 1234
Server(port, DATABASES['default'], SNAPSHOT_DIR, DOWNLOADER_USER_AGENT, DEBUG).run() Server(port, None, SNAPSHOT_DIR, DOWNLOADER_USER_AGENT).run() # DATABASES['default']

View File

@ -13,6 +13,8 @@ from contextlib import closing
from settings import DATABASES, DOWNLOADER_USER_AGENT from settings import DATABASES, DOWNLOADER_USER_AGENT
from twisted.logger import Logger from twisted.logger import Logger
from .db import get_conn
log = Logger() log = Logger()
@ -24,9 +26,8 @@ class Feed(object):
FIELD_IDS = {'title': 1, 'description': 2, 'link': 3} FIELD_IDS = {'title': 1, 'description': 2, 'link': 3}
def __init__(self, db_creds, log): def __init__(self, db_creds):
self.db_creds = db_creds self.db_creds = db_creds
self.log = log
def save_post(self, conn, created, feed_id, post_fields): def save_post(self, conn, created, feed_id, post_fields):

View File

@ -22,11 +22,11 @@ class LogHandler(object):
globalLogBeginner.beginLoggingTo([self.print_log], discardBuffer=True, redirectStandardIO=False) globalLogBeginner.beginLoggingTo([self.print_log], discardBuffer=True, redirectStandardIO=False)
def print_log(self, event): def print_log(event):
if event['log_level'].name == 'error' or 'isError' in event and event['isError']: if 'isError' in event and event['isError']:
sys.stdout.write(bcolors.FAIL + formatEventAsClassicLogText(event) + bcolors.ENDC)
sys.stderr.write(formatEventAsClassicLogText(event)) sys.stderr.write(formatEventAsClassicLogText(event))
sys.stderr.flush() sys.stderr.flush()
sys.stdout.write(bcolors.FAIL + formatEventAsClassicLogText(event) + bcolors.ENDC)
else: else:
sys.stdout.write(formatEventAsClassicLogText(event)) sys.stdout.write(formatEventAsClassicLogText(event))
sys.stdout.flush() sys.stdout.flush()

View File

@ -1,3 +1,4 @@
from __future__ import print_function
from datetime import datetime from datetime import datetime
from hashlib import md5 from hashlib import md5
import json import json
@ -7,9 +8,9 @@ import re
from lxml import etree from lxml import etree
from twisted.web import server, resource from twisted.web import server, resource
from twisted.internet import reactor, endpoints, from twisted.internet import reactor, endpoints
from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError, HTTPConnectionPool from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError, HTTPConnectionPool
from twisted.web.server import from twisted.web.server import NOT_DONE_YET
from twisted.web.http_headers import Headers from twisted.web.http_headers import Headers
from twisted.web.html import escape from twisted.web.html import escape
twisted_headers = Headers twisted_headers = Headers
@ -23,14 +24,21 @@ from scrapy.http import Headers
from scrapy.responsetypes import responsetypes from scrapy.responsetypes import responsetypes
from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
from pol.log import LogHandler
from .feed import Feed from .feed import Feed
from twisted.logger import Logger
log = Logger()
class Downloader(object): class Downloader(object):
def __init__(self, debug, stat_tool=None, mem_mon=None): def __init__(self, debug, stat_tool=None, mem_mon=None, limiter=None):
self.debug = debug self.debug = debug
self.stat_tool = stat_tool self.stat_tool = stat_tool
self.mem_mon = mem_mon self.mem_mon = mem_mon
self.limiter = limiter
def html2json(self, el): def html2json(self, el):
return [ return [
@ -110,7 +118,7 @@ class Downloader(object):
return respcls(url=url, status=status, headers=headers, body=body) return respcls(url=url, status=status, headers=headers, body=body)
def error_html(self, msg): def error_html(self, msg):
return "<html><body>%s</body></html" % escape(msg).replace("\n", "<br/>\n") return "<html><body>%s</body></html" % msg.replace("\n", "<br/>\n")
def downloadError(self, error, request=None, url=None, response=None, feed_config=None): def downloadError(self, error, request=None, url=None, response=None, feed_config=None):
# read for details: https://stackoverflow.com/questions/29423986/twisted-giving-twisted-web-client-partialdownloaderror-200-ok # read for details: https://stackoverflow.com/questions/29423986/twisted-giving-twisted-web-client-partialdownloaderror-200-ok
@ -125,7 +133,7 @@ class Downloader(object):
request.write('Downloader error: ' + error.getErrorMessage()) request.write('Downloader error: ' + error.getErrorMessage())
request.write('Traceback: ' + error.getTraceback()) request.write('Traceback: ' + error.getTraceback())
else: else:
request.write(self.error_html('Something wrong. Contact us by email: politepol.com@gmail.com \n Scary mantra: ' + error.getErrorMessage())) request.write(self.error_html('<h1>PolitePol says: "Something wrong"</h1> <p><b>Try to refresh page or contact us by email: politepol.com@gmail.com</b>\n(Help us to improve our service with your feedback)</p> <p><i>Scary mantra: %s</i></p>' % escape(error.getErrorMessage())))
sys.stderr.write('\n'.join([str(datetime.utcnow()), request.uri, url, 'Downloader error: ' + error.getErrorMessage(), 'Traceback: ' + error.getTraceback()])) sys.stderr.write('\n'.join([str(datetime.utcnow()), request.uri, url, 'Downloader error: ' + error.getErrorMessage(), 'Traceback: ' + error.getTraceback()]))
request.finish() request.finish()
@ -135,7 +143,8 @@ class Downloader(object):
if not feed_id: if not feed_id:
feed_id = 0 feed_id = 0
s_url = url s_url = url
log.info('Stat: ip={request.ip} feed_id={request.feed_id} url="{request.url}" error="{request.ex_msg}"', request=RequestStat( if self.stat_tool:
self.stat_tool.trace(
ip = request.getHeader('x-real-ip') or request.client.host, ip = request.getHeader('x-real-ip') or request.client.host,
feed_id = feed_id, feed_id = feed_id,
post_cnt=0, post_cnt=0,
@ -143,8 +152,6 @@ class Downloader(object):
url=s_url, url=s_url,
ex_msg=error.getErrorMessage(), ex_msg=error.getErrorMessage(),
ex_callstack=error.getTraceback() ex_callstack=error.getTraceback()
),
stat=True
) )
except: except:
traceback.print_exc(file=sys.stdout) traceback.print_exc(file=sys.stdout)
@ -208,13 +215,14 @@ class Site(resource.Resource):
feed_regexp = re.compile('^/feed1?/(\d{1,10})$') feed_regexp = re.compile('^/feed1?/(\d{1,10})$')
def __init__(self, db_creds, snapshot_dir, user_agent, debug): def __init__(self, db_creds, snapshot_dir, user_agent, debug=False, limiter=None):
self.db_creds = db_creds self.db_creds = db_creds
self.snapshot_dir = snapshot_dir self.snapshot_dir = snapshot_dir
self.user_agent = user_agent self.user_agent = user_agent
self.limiter = limiter
self.downloader = Downloader(debug) self.downloader = Downloader(debug)
self.feed = Feed(db_creds, log) self.feed = Feed(db_creds)
def startRequest(self, request, url, feed_config = None): def startRequest(self, request, url, feed_config = None):
agent = BrowserLikeRedirectAgent( agent = BrowserLikeRedirectAgent(
@ -251,7 +259,7 @@ class Site(resource.Resource):
elif self.feed_regexp.match(request.uri) is not None: # feed elif self.feed_regexp.match(request.uri) is not None: # feed
feed_id = self.feed_regexp.match(request.uri).groups()[0] feed_id = self.feed_regexp.match(request.uri).groups()[0]
time_left = check_feed_request_time_limit(request.uri) time_left = self.limiter.check_request_time_limit(request.uri) if self.limiter else 0
if time_left: if time_left:
request.setResponseCode(429) request.setResponseCode(429)
request.setHeader('Retry-After', str(time_left) + ' seconds') request.setHeader('Retry-After', str(time_left) + ' seconds')
@ -271,16 +279,20 @@ class Site(resource.Resource):
class Server(object): class Server(object):
def __init__(self, port, db_creds, snapshot_dir, user_agent, debug): def __init__(self, port, db_creds, snapshot_dir, user_agent, debug=False, limiter=None):
self.port = port self.port = port
self.db_creds = db_creds self.db_creds = db_creds
self.snapshot_dir = snapshot_dir self.snapshot_dir = snapshot_dir
self.user_agent = user_agent self.user_agent = user_agent
self.debug = debug
self.limiter = limiter
def setMemMonitor(_mem_mon=None) self.log_handler = LogHandler()
def setMemMonitor(self, _mem_mon=None):
global mem_mon global mem_mon
mem_mon = _mem_mon mem_mon = _mem_mon
def run(self): def run(self):
endpoints.serverFromString(reactor, "tcp:%s" % self.port).listen(server.Site(Site(self.db_creds, self.snapshot_dir, self.user_agent, self.debug))) endpoints.serverFromString(reactor, "tcp:%s" % self.port).listen(server.Site(Site(self.db_creds, self.snapshot_dir, self.user_agent, self.debug, self.limiter)))
reactor.run() reactor.run()

View File

@ -4,5 +4,6 @@ from tests.test_downloader import MFTests
def main(): def main():
ts = MFTests() ts = MFTests()
ts.test_log_handler() ts.test_log_handler()
ts.test_server()
main() main()

View File

@ -1,20 +1,33 @@
from __future__ import print_function from __future__ import print_function
from twisted.internet import reactor, defer import os
from twisted.web.server import Site
from twisted.web.static import File
from twisted.internet import reactor, defer, endpoints
from twisted.logger import Logger from twisted.logger import Logger
from pol.log import LogHandler from pol.log import LogHandler
from pol.server import Server
class MFTests(object): class MFTests(object):
def __init__(self): def __init__(self):
self.log = Logger() self.log = Logger()
def start_static(self):
resource = File(os.getcwd() + '/tests/pages')
factory = Site(resource)
endpoint = endpoints.TcP4ServerEndpoint(reactor, 0)
endpoint.listen(factory)
# reactor.run()
def send_request(self):
pass pass
def stop_callback(self, none): def stop_callback(self, none):
reactor.stop() reactor.stop()
pass
def test_log_handler(self): def test_log_handler(self):
handler = LogHandler() handler = LogHandler()
@ -30,3 +43,13 @@ class MFTests(object):
reactor.run() reactor.run()
def test_server(self):
d = defer.Deferred()
reactor.callLater(3, d.callback, None)
d.addCallback(self.stop_callback)
#d.addCallback(self.send_request)
d.addErrback(lambda err: print("callback error: %s\ncallback traceback: %s" % (err.getErrorMessage(), err.getTraceback())))
Server(port=1234, db_creds=None, snapshot_dir='~/tmp', user_agent='', debug=False).run()