mirror of
https://github.com/taroved/pol
synced 2025-05-16 06:10:09 -07:00
compression
This commit is contained in:
parent
3cb81dbcc7
commit
f898beb47c
@ -12,9 +12,11 @@ from twisted.web.html import escape
|
||||
twisted_headers = Headers
|
||||
|
||||
from scrapy.http.response.text import TextResponse
|
||||
from scrapy.downloadermiddlewares.httpcompression import HttpCompressionMiddleware
|
||||
from scrapy.downloadermiddlewares.decompression import DecompressionMiddleware
|
||||
from scrapy.selector import Selector
|
||||
|
||||
from scrapy.http.request import Request
|
||||
from scrapy.http import Headers
|
||||
from scrapy.responsetypes import responsetypes
|
||||
from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
|
||||
@ -127,6 +129,9 @@ def buildScrapyResponse(response, body, url):
|
||||
respcls = responsetypes.from_args(headers=headers, url=url)
|
||||
return respcls(url=url, status=status, headers=headers, body=body)
|
||||
|
||||
def buildScrapyRequest(url):
|
||||
return Request(url)
|
||||
|
||||
def downloadStarted(response, request, url, feed_config):
|
||||
d = readBody(response)
|
||||
d.addCallback(downloadDone, request=request, response=response, feed_config=feed_config)
|
||||
@ -139,6 +144,7 @@ def downloadDone(response_str, request, response, feed_config):
|
||||
print 'Response <%s> ready (%s bytes)' % (url, len(response_str))
|
||||
response = buildScrapyResponse(response, response_str, url)
|
||||
|
||||
response = HttpCompressionMiddleware().process_response(Request(url), response, None)
|
||||
response = DecompressionMiddleware().process_response(None, response, None)
|
||||
|
||||
if (isinstance(response, TextResponse)):
|
||||
|
@ -6,6 +6,7 @@ django-pipeline==1.5.4
|
||||
mysqlclient==1.3.7
|
||||
w3lib==1.12.0
|
||||
feedgenerator==1.8
|
||||
brotli=0.6.0
|
||||
#sudo apt-get install nodejs npm
|
||||
#sudo npm install -g less
|
||||
#sudo ln -s /usr/bin/nodejs /usr/bin/node
|
||||
|
3
tests.py
3
tests.py
@ -30,8 +30,7 @@ def element_to_string(element, fields=None):
|
||||
def test1_get_inner_html():
|
||||
root = etree.fromstring('<a>1<b>2</b>3<c>4</c>5</a>')
|
||||
assert element_to_unicode(root, 'utf-8') == u'1<b>2</b>3<c>4</c>5'
|
||||
|
||||
ids = [1,2,3,5,6,8,44,54,99,100,101,103,113,118,120,123,124,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,249,250,251,252,253,255,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410] # 254 timeout 344 pp gatevway timeout
|
||||
ids = [1,54,100,131,134,140,146,159,162,166,168,175,176,183,189,190,192,204,205,226,230,236,244,251,253,260,261,263,271,272,273,275,277,279,280,308,311,312,313,315,316,317,318,327,332,333,334,335,337,338,340,347,350,352,354,355,356,357,358,359,360,361,362,363,369,371,373,376,385,399,402,405,406,410,411,412,422,427,448,467,470,471,472,473,477,479,481,512,514,519,522,523,524,526,527,528,529,532,533,536,538,547,557,587,592,597,598,599,600,606,607,608,615,616,617,618,628,629,641,642,643,645,646,647,648,649,653,658,660,673,676,678,680,681,683,685,704,709,710,717,718,719,728,730,732,735,744,745,746,749,757,758,759,772,776,777,778,779,783,784,785,786,789,790,791,792,793,794,795,797,798,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,835,836,839,840,842,843,844,845,846,847,848,849,850,851,852,853,854,855,861,862,863,864,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,889,890,891,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,923,924,926,927,928,929,930,931,933,934,935,936,937,938,939,940,941,942,943,944,947,948,949,950]
|
||||
domain = "politepol.com"
|
||||
|
||||
def parse_feed0(text):
|
||||
|
Loading…
x
Reference in New Issue
Block a user