v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-16 06:10:09 -07:00

compression

This commit is contained in:
Alexandr Nesterenko 2017-08-25 20:10:18 +00:00
parent 3cb81dbcc7
commit f898beb47c
3 changed files with 8 additions and 2 deletions

View File

@ -12,9 +12,11 @@ from twisted.web.html import escape
twisted_headers = Headers
from scrapy.http.response.text import TextResponse
from scrapy.downloadermiddlewares.httpcompression import HttpCompressionMiddleware
from scrapy.downloadermiddlewares.decompression import DecompressionMiddleware
from scrapy.selector import Selector
from scrapy.http.request import Request
from scrapy.http import Headers
from scrapy.responsetypes import responsetypes
from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
@ -127,6 +129,9 @@ def buildScrapyResponse(response, body, url):
respcls = responsetypes.from_args(headers=headers, url=url)
return respcls(url=url, status=status, headers=headers, body=body)
def buildScrapyRequest(url):
return Request(url)
def downloadStarted(response, request, url, feed_config):
d = readBody(response)
d.addCallback(downloadDone, request=request, response=response, feed_config=feed_config)
@ -139,6 +144,7 @@ def downloadDone(response_str, request, response, feed_config):
print 'Response <%s> ready (%s bytes)' % (url, len(response_str))
response = buildScrapyResponse(response, response_str, url)
response = HttpCompressionMiddleware().process_response(Request(url), response, None)
response = DecompressionMiddleware().process_response(None, response, None)
if (isinstance(response, TextResponse)):

View File

@ -6,6 +6,7 @@ django-pipeline==1.5.4
mysqlclient==1.3.7
w3lib==1.12.0
feedgenerator==1.8
brotli=0.6.0
#sudo apt-get install nodejs npm
#sudo npm install -g less
#sudo ln -s /usr/bin/nodejs /usr/bin/node

View File

@ -30,8 +30,7 @@ def element_to_string(element, fields=None):
def test1_get_inner_html():
root = etree.fromstring('<a>1<b>2</b>3<c>4</c>5</a>')
assert element_to_unicode(root, 'utf-8') == u'1<b>2</b>3<c>4</c>5'
ids = [1,2,3,5,6,8,44,54,99,100,101,103,113,118,120,123,124,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,249,250,251,252,253,255,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410] # 254 timeout 344 pp gatevway timeout
ids = [1,54,100,131,134,140,146,159,162,166,168,175,176,183,189,190,192,204,205,226,230,236,244,251,253,260,261,263,271,272,273,275,277,279,280,308,311,312,313,315,316,317,318,327,332,333,334,335,337,338,340,347,350,352,354,355,356,357,358,359,360,361,362,363,369,371,373,376,385,399,402,405,406,410,411,412,422,427,448,467,470,471,472,473,477,479,481,512,514,519,522,523,524,526,527,528,529,532,533,536,538,547,557,587,592,597,598,599,600,606,607,608,615,616,617,618,628,629,641,642,643,645,646,647,648,649,653,658,660,673,676,678,680,681,683,685,704,709,710,717,718,719,728,730,732,735,744,745,746,749,757,758,759,772,776,777,778,779,783,784,785,786,789,790,791,792,793,794,795,797,798,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,835,836,839,840,842,843,844,845,846,847,848,849,850,851,852,853,854,855,861,862,863,864,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,889,890,891,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,923,924,926,927,928,929,930,931,933,934,935,936,937,938,939,940,941,942,943,944,947,948,949,950]
domain = "politepol.com"
def parse_feed0(text):