v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-16 14:20:10 -07:00

limit requsts with redis + debug mode

This commit is contained in:
Alexandr Nesterenko 2016-07-25 15:25:53 -07:00
parent 1dda85f257
commit ae54af5ff3
3 changed files with 48 additions and 11 deletions

View File

@ -1,8 +1,10 @@
import json import json
import time, sys
from datetime import datetime
from twisted.web import server, resource from twisted.web import server, resource
from twisted.internet import reactor, endpoints from twisted.internet import reactor, endpoints
from twisted.web.client import HTTPClientFactory, _makeGetterFactory from twisted.web.client import HTTPClientFactory, _makeGetterFactory
from twisted.web.server import NOT_DONE_YET from twisted.web.server import NOT_DONE_YET
from scrapy.http.response.text import TextResponse from scrapy.http.response.text import TextResponse
@ -17,9 +19,24 @@ import re
from feed import startFeedRequest from feed import startFeedRequest
from settings import DOWNLOADER_USER_AGENT from settings import DOWNLOADER_USER_AGENT, FEED_REQUEST_PERIOD_LIMIT, DEBUG
if FEED_REQUEST_PERIOD_LIMIT:
import redis
def check_feed_request_time_limit(url):
if FEED_REQUEST_PERIOD_LIMIT:
r = redis.StrictRedis(host='localhost', port=6379, db=0)
previous_timestamp = int(r.get(url))
if previous_timestamp:
time_passed = int(time.time()) - previous_timestamp
if time_passed <= FEED_REQUEST_PERIOD_LIMIT:
# time left to wait
return FEED_REQUEST_PERIOD_LIMIT - time_passed
r.set(url, int(time.time()))
return 0
def getPageFactory(url, contextFactory=None, *args, **kwargs): def getPageFactory(url, contextFactory=None, *args, **kwargs):
""" """
Download a web page as a string. Download a web page as a string.
@ -92,14 +109,20 @@ def downloadDone(response_str, request=None, page_factory=None, url=None):
request.finish() request.finish()
def downloadError(error, request=None, page_factory=None): def downloadError(error, request=None, page_factory=None):
request.write('Downloader error: ' + error.value) if DEBUG:
request.write('Downloader error: ' + error.getErrorMessage())
request.write('Traceback: ' + error.getTraceback())
else:
request.write('Something wrong')
sys.stderr.write(datetime.datetime.now())
sys.stderr.write('\n'.join('Downloader error: ' + error.getErrorMessage(), 'Traceback: ' + error.getTraceback()))
request.finish() request.finish()
class Downloader(resource.Resource): class Downloader(resource.Resource):
isLeaf = True isLeaf = True
feed_regexp = re.compile('^/feed1?/(\d+)$') feed_regexp = re.compile('^/feed1?/(\d{1,10})$')
def startRequest(self, request, url): def startRequest(self, request, url):
page_factory = getPageFactory(url, page_factory = getPageFactory(url,
@ -126,16 +149,23 @@ class Downloader(resource.Resource):
''' '''
Render page for frontend or RSS feed Render page for frontend or RSS feed
''' '''
if 'url' in request.args: if 'url' in request.args: # page for frontend
url = request.args['url'][0] url = request.args['url'][0]
self.startRequest(request, url) self.startRequest(request, url)
return NOT_DONE_YET return NOT_DONE_YET
elif self.feed_regexp.match(request.uri) is not None: elif self.feed_regexp.match(request.uri) is not None: # feed
feed_id = self.feed_regexp.match(request.uri).groups()[0] feed_id = self.feed_regexp.match(request.uri).groups()[0]
startFeedRequest(request, feed_id)
return NOT_DONE_YET time_left = check_feed_request_time_limit(request.uri)
else: if time_left:
request.setResponseCode(429)
request.setHeader('Retry-After', str(time_left) + ' seconds')
return 'Too Many Requests'
else:
startFeedRequest(request, feed_id)
return NOT_DONE_YET
else: # neither page and feed
return 'Url is required' return 'Url is required'

View File

@ -108,8 +108,13 @@ def _downloadDone(response_str, request=None, page_factory=None, feed_config=Non
request.finish() request.finish()
def _downloadError(error, request=None, page_factory=None): def _downloadError(error, request=None, page_factory=None):
request.write('Downloader error: ' + error.getErrorMessage()) if DEBUG:
request.write('Traceback: ' + error.getTraceback()) request.write('Downloader error: ' + error.getErrorMessage())
request.write('Traceback: ' + error.getTraceback())
else:
request.write('Something wrong')
sys.stderr.write(datetime.datetime.now())
sys.stderr.write('\n'.join('Downloader error: ' + error.getErrorMessage(), 'Traceback: ' + error.getTraceback()))
request.finish() request.finish()
def startFeedRequest(request, feed_id): def startFeedRequest(request, feed_id):

View File

@ -166,3 +166,5 @@ DOWNLOADER_PAGE_URL = '/downloader?url='
FEED_PAGE_URL = '/feed/' FEED_PAGE_URL = '/feed/'
FEED1_PAGE_URL = '/feed1/' FEED1_PAGE_URL = '/feed1/'
DOWNLOADER_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36' DOWNLOADER_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'
# limit of seconds in which user can access separate feed
FEED_REQUEST_PERIOD_LIMIT = 0