mirror of
https://github.com/taroved/pol
synced 2025-05-18 15:20:09 -07:00
pickle
This commit is contained in:
parent
0385a09008
commit
bdc115db2d
@ -2,6 +2,7 @@ from __future__ import print_function
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
import json
|
import json
|
||||||
|
import pickle
|
||||||
import time, sys, traceback
|
import time, sys, traceback
|
||||||
import re
|
import re
|
||||||
from urlparse import urlparse
|
from urlparse import urlparse
|
||||||
@ -180,21 +181,24 @@ class Downloader(object):
|
|||||||
print('Response <%s> ready (%s bytes)' % (url, len(response_str)))
|
print('Response <%s> ready (%s bytes)' % (url, len(response_str)))
|
||||||
response = self.buildScrapyResponse(response, response_str, url)
|
response = self.buildScrapyResponse(response, response_str, url)
|
||||||
|
|
||||||
response = HttpCompressionMiddleware().process_response(Request(url), response, None)
|
if selector_defer:
|
||||||
|
selector_defer.callback(response)
|
||||||
|
else:
|
||||||
|
self.writeResponse(request, response, feed_config)
|
||||||
|
self.run_memon()
|
||||||
|
|
||||||
|
def writeResponse(self, request, response, feed_config):
|
||||||
|
response = HttpCompressionMiddleware().process_response(Request(response.url), response, None)
|
||||||
response = DecompressionMiddleware().process_response(None, response, None)
|
response = DecompressionMiddleware().process_response(None, response, None)
|
||||||
|
|
||||||
if selector_defer:
|
if (isinstance(response, TextResponse)):
|
||||||
selector_defer.callback(response.selector)
|
ip = request.getHeader('x-real-ip') or request.client.host
|
||||||
else:
|
response_str = self.prepare_response_str(response.selector, response.headers, response.body_as_unicode(), response.url, feed_config, ip)
|
||||||
if (isinstance(response, TextResponse)):
|
if feed_config:
|
||||||
ip = request.getHeader('x-real-ip') or request.client.host
|
request.setHeader(b"Content-Type", b'text/xml; charset=utf-8')
|
||||||
response_str = self.prepare_response_str(response.selector, response.headers, response.body_as_unicode(), url, feed_config, ip)
|
|
||||||
if feed_config:
|
|
||||||
request.setHeader(b"Content-Type", b'text/xml; charset=utf-8')
|
|
||||||
|
|
||||||
request.write(response_str)
|
request.write(response_str)
|
||||||
request.finish()
|
request.finish()
|
||||||
self.run_memon()
|
|
||||||
|
|
||||||
def prepare_response_str(self, selector, headers, page_unicode, url, feed_config, ip=None):
|
def prepare_response_str(self, selector, headers, page_unicode, url, feed_config, ip=None):
|
||||||
if feed_config:
|
if feed_config:
|
||||||
@ -231,15 +235,12 @@ class Site(resource.Resource):
|
|||||||
self.downloader = Downloader(self.feed, debug, snapshot_dir, stat_tool, memon)
|
self.downloader = Downloader(self.feed, debug, snapshot_dir, stat_tool, memon)
|
||||||
|
|
||||||
def startRequest(self, request, url, feed_config = None, selector_defer=None):
|
def startRequest(self, request, url, feed_config = None, selector_defer=None):
|
||||||
response_str = self.tryLocalPage(url)
|
sresponse = self.tryLocalPage(url)
|
||||||
if response_str:
|
if sresponse:
|
||||||
response_str = response_str.decode('utf-8')
|
if selector_defer:
|
||||||
selector = Selector(text=response_str)
|
selector_defer.callback(sresponse)
|
||||||
response_str = self.downloader.prepare_response_str(selector, {}, response_str, url, feed_config)
|
else:
|
||||||
request.setHeader(b"Content-Type", b'text/xml; charset=utf-8')
|
self.downloader.writeResponse(request, sresponse, feed_config)
|
||||||
request.write(response_str)
|
|
||||||
request.finish()
|
|
||||||
print('Request <GET %s> local' % (url,))
|
|
||||||
else:
|
else:
|
||||||
agent = BrowserLikeRedirectAgent(
|
agent = BrowserLikeRedirectAgent(
|
||||||
Agent(reactor,
|
Agent(reactor,
|
||||||
@ -268,7 +269,7 @@ class Site(resource.Resource):
|
|||||||
domain = urlparse(url).netloc
|
domain = urlparse(url).netloc
|
||||||
try:
|
try:
|
||||||
with open('/home/taroved/pages/' + m + '.' + domain) as f:
|
with open('/home/taroved/pages/' + m + '.' + domain) as f:
|
||||||
return f.read()
|
return pickle.load(f)
|
||||||
except IOError:
|
except IOError:
|
||||||
return None
|
return None
|
||||||
return None
|
return None
|
||||||
|
Loading…
x
Reference in New Issue
Block a user