v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-18 15:20:09 -07:00
This commit is contained in:
Alexandr Nesterenko 2017-10-19 20:28:00 -04:00
parent 0385a09008
commit bdc115db2d

View File

@ -2,6 +2,7 @@ from __future__ import print_function
from datetime import datetime from datetime import datetime
from hashlib import md5 from hashlib import md5
import json import json
import pickle
import time, sys, traceback import time, sys, traceback
import re import re
from urlparse import urlparse from urlparse import urlparse
@ -180,21 +181,24 @@ class Downloader(object):
print('Response <%s> ready (%s bytes)' % (url, len(response_str))) print('Response <%s> ready (%s bytes)' % (url, len(response_str)))
response = self.buildScrapyResponse(response, response_str, url) response = self.buildScrapyResponse(response, response_str, url)
response = HttpCompressionMiddleware().process_response(Request(url), response, None) if selector_defer:
selector_defer.callback(response)
else:
self.writeResponse(request, response, feed_config)
self.run_memon()
def writeResponse(self, request, response, feed_config):
response = HttpCompressionMiddleware().process_response(Request(response.url), response, None)
response = DecompressionMiddleware().process_response(None, response, None) response = DecompressionMiddleware().process_response(None, response, None)
if selector_defer: if (isinstance(response, TextResponse)):
selector_defer.callback(response.selector) ip = request.getHeader('x-real-ip') or request.client.host
else: response_str = self.prepare_response_str(response.selector, response.headers, response.body_as_unicode(), response.url, feed_config, ip)
if (isinstance(response, TextResponse)): if feed_config:
ip = request.getHeader('x-real-ip') or request.client.host request.setHeader(b"Content-Type", b'text/xml; charset=utf-8')
response_str = self.prepare_response_str(response.selector, response.headers, response.body_as_unicode(), url, feed_config, ip)
if feed_config:
request.setHeader(b"Content-Type", b'text/xml; charset=utf-8')
request.write(response_str) request.write(response_str)
request.finish() request.finish()
self.run_memon()
def prepare_response_str(self, selector, headers, page_unicode, url, feed_config, ip=None): def prepare_response_str(self, selector, headers, page_unicode, url, feed_config, ip=None):
if feed_config: if feed_config:
@ -231,15 +235,12 @@ class Site(resource.Resource):
self.downloader = Downloader(self.feed, debug, snapshot_dir, stat_tool, memon) self.downloader = Downloader(self.feed, debug, snapshot_dir, stat_tool, memon)
def startRequest(self, request, url, feed_config = None, selector_defer=None): def startRequest(self, request, url, feed_config = None, selector_defer=None):
response_str = self.tryLocalPage(url) sresponse = self.tryLocalPage(url)
if response_str: if sresponse:
response_str = response_str.decode('utf-8') if selector_defer:
selector = Selector(text=response_str) selector_defer.callback(sresponse)
response_str = self.downloader.prepare_response_str(selector, {}, response_str, url, feed_config) else:
request.setHeader(b"Content-Type", b'text/xml; charset=utf-8') self.downloader.writeResponse(request, sresponse, feed_config)
request.write(response_str)
request.finish()
print('Request <GET %s> local' % (url,))
else: else:
agent = BrowserLikeRedirectAgent( agent = BrowserLikeRedirectAgent(
Agent(reactor, Agent(reactor,
@ -268,7 +269,7 @@ class Site(resource.Resource):
domain = urlparse(url).netloc domain = urlparse(url).netloc
try: try:
with open('/home/taroved/pages/' + m + '.' + domain) as f: with open('/home/taroved/pages/' + m + '.' + domain) as f:
return f.read() return pickle.load(f)
except IOError: except IOError:
return None return None
return None return None