From ae54af5ff3e782575c39e9feb45badae79fc63ac Mon Sep 17 00:00:00 2001 From: Alexandr Nesterenko Date: Mon, 25 Jul 2016 15:25:53 -0700 Subject: [PATCH] limit requsts with redis + debug mode --- downloader.py | 48 ++++++++++++++++++++++----- feed.py | 9 +++-- frontend/frontend/settings.py.example | 2 ++ 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/downloader.py b/downloader.py index 280b4e2..2426b28 100644 --- a/downloader.py +++ b/downloader.py @@ -1,8 +1,10 @@ import json +import time, sys +from datetime import datetime from twisted.web import server, resource from twisted.internet import reactor, endpoints -from twisted.web.client import HTTPClientFactory, _makeGetterFactory +from twisted.web.client import HTTPClientFactory, _makeGetterFactory from twisted.web.server import NOT_DONE_YET from scrapy.http.response.text import TextResponse @@ -17,9 +19,24 @@ import re from feed import startFeedRequest -from settings import DOWNLOADER_USER_AGENT +from settings import DOWNLOADER_USER_AGENT, FEED_REQUEST_PERIOD_LIMIT, DEBUG +if FEED_REQUEST_PERIOD_LIMIT: + import redis + +def check_feed_request_time_limit(url): + if FEED_REQUEST_PERIOD_LIMIT: + r = redis.StrictRedis(host='localhost', port=6379, db=0) + previous_timestamp = int(r.get(url)) + if previous_timestamp: + time_passed = int(time.time()) - previous_timestamp + if time_passed <= FEED_REQUEST_PERIOD_LIMIT: + # time left to wait + return FEED_REQUEST_PERIOD_LIMIT - time_passed + r.set(url, int(time.time())) + return 0 + def getPageFactory(url, contextFactory=None, *args, **kwargs): """ Download a web page as a string. @@ -92,14 +109,20 @@ def downloadDone(response_str, request=None, page_factory=None, url=None): request.finish() def downloadError(error, request=None, page_factory=None): - request.write('Downloader error: ' + error.value) + if DEBUG: + request.write('Downloader error: ' + error.getErrorMessage()) + request.write('Traceback: ' + error.getTraceback()) + else: + request.write('Something wrong') + sys.stderr.write(datetime.datetime.now()) + sys.stderr.write('\n'.join('Downloader error: ' + error.getErrorMessage(), 'Traceback: ' + error.getTraceback())) request.finish() class Downloader(resource.Resource): isLeaf = True - feed_regexp = re.compile('^/feed1?/(\d+)$') + feed_regexp = re.compile('^/feed1?/(\d{1,10})$') def startRequest(self, request, url): page_factory = getPageFactory(url, @@ -126,16 +149,23 @@ class Downloader(resource.Resource): ''' Render page for frontend or RSS feed ''' - if 'url' in request.args: + if 'url' in request.args: # page for frontend url = request.args['url'][0] self.startRequest(request, url) return NOT_DONE_YET - elif self.feed_regexp.match(request.uri) is not None: + elif self.feed_regexp.match(request.uri) is not None: # feed feed_id = self.feed_regexp.match(request.uri).groups()[0] - startFeedRequest(request, feed_id) - return NOT_DONE_YET - else: + + time_left = check_feed_request_time_limit(request.uri) + if time_left: + request.setResponseCode(429) + request.setHeader('Retry-After', str(time_left) + ' seconds') + return 'Too Many Requests' + else: + startFeedRequest(request, feed_id) + return NOT_DONE_YET + else: # neither page and feed return 'Url is required' diff --git a/feed.py b/feed.py index 70d906d..4a75065 100644 --- a/feed.py +++ b/feed.py @@ -108,8 +108,13 @@ def _downloadDone(response_str, request=None, page_factory=None, feed_config=Non request.finish() def _downloadError(error, request=None, page_factory=None): - request.write('Downloader error: ' + error.getErrorMessage()) - request.write('Traceback: ' + error.getTraceback()) + if DEBUG: + request.write('Downloader error: ' + error.getErrorMessage()) + request.write('Traceback: ' + error.getTraceback()) + else: + request.write('Something wrong') + sys.stderr.write(datetime.datetime.now()) + sys.stderr.write('\n'.join('Downloader error: ' + error.getErrorMessage(), 'Traceback: ' + error.getTraceback())) request.finish() def startFeedRequest(request, feed_id): diff --git a/frontend/frontend/settings.py.example b/frontend/frontend/settings.py.example index f70d6fb..f9c067f 100644 --- a/frontend/frontend/settings.py.example +++ b/frontend/frontend/settings.py.example @@ -166,3 +166,5 @@ DOWNLOADER_PAGE_URL = '/downloader?url=' FEED_PAGE_URL = '/feed/' FEED1_PAGE_URL = '/feed1/' DOWNLOADER_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36' +# limit of seconds in which user can access separate feed +FEED_REQUEST_PERIOD_LIMIT = 0