mirror of
https://github.com/taroved/pol
synced 2025-05-28 12:00:09 -07:00
xpathes in progress
This commit is contained in:
parent
a08a1b3675
commit
0bde3df8a9
@ -1,9 +1,10 @@
|
||||
import json
|
||||
import time, sys
|
||||
from hashlib import md5
|
||||
from datetime import datetime
|
||||
|
||||
from twisted.web import server, resource
|
||||
from twisted.internet import reactor, endpoints
|
||||
from twisted.internet import reactor, endpoints, defer
|
||||
from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError
|
||||
from twisted.web.server import NOT_DONE_YET
|
||||
from twisted.web.http_headers import Headers
|
||||
@ -22,7 +23,7 @@ import re
|
||||
|
||||
from feed import getFeedData, buildFeed
|
||||
|
||||
from settings import DOWNLOADER_USER_AGENT, FEED_REQUEST_PERIOD_LIMIT, DEBUG
|
||||
from settings import DOWNLOADER_USER_AGENT, FEED_REQUEST_PERIOD_LIMIT, DEBUG, SNAPSHOT_DIR
|
||||
|
||||
|
||||
if FEED_REQUEST_PERIOD_LIMIT:
|
||||
@ -55,6 +56,8 @@ def setBaseAndRemoveScriptsAndMore(response, url):
|
||||
|
||||
tree = response.selector.root.getroottree()
|
||||
|
||||
snapshot_time = str(time.time())
|
||||
|
||||
# set base url to html document
|
||||
head = tree.xpath("//head")
|
||||
if head:
|
||||
@ -96,10 +99,13 @@ def setBaseAndRemoveScriptsAndMore(response, url):
|
||||
# append html2json js object
|
||||
jsobj = html2json(tree.getroot())
|
||||
script = etree.Element('script', {'type': 'text/javascript'})
|
||||
script.text = 'var html2json = ' + json.dumps(jsobj) + ';'
|
||||
script.text = '\n'.join((
|
||||
'var html2json = ' + json.dumps(jsobj) + ';',
|
||||
'var snapshot_time = "' + snapshot_time + '";'
|
||||
))
|
||||
body[0].append(script)
|
||||
|
||||
return etree.tostring(tree, method='html')
|
||||
return (etree.tostring(tree, method='html'), snapshot_time)
|
||||
|
||||
def buildScrapyResponse(response, body, url):
|
||||
status = response.code
|
||||
@ -126,7 +132,15 @@ def downloadDone(response_str, request, response, feed_config):
|
||||
response_str = buildFeed(response, feed_config)
|
||||
request.setHeader(b"Content-Type", b'text/xml')
|
||||
else:
|
||||
response_str = setBaseAndRemoveScriptsAndMore(response, url)
|
||||
response_str, snapshot_time = setBaseAndRemoveScriptsAndMore(response, url)
|
||||
file_name = SNAPSHOT_DIR + '/' + snapshot_time + '_' + md5(url).hexdigest()
|
||||
# import pdb;pdb.set_trace()
|
||||
with open(file_name, 'w') as f:
|
||||
f.write(url + '\n')
|
||||
for k, v in response.headers.iteritems():
|
||||
for vv in v:
|
||||
f.write('%s: %s\n' % (k, vv))
|
||||
f.write('\n\n' + response_str)
|
||||
|
||||
request.write(response_str)
|
||||
request.finish()
|
||||
@ -137,7 +151,10 @@ def error_html(msg):
|
||||
def downloadError(error, request=None, url=None, response=None, feed_config=None):
|
||||
# read for details: https://stackoverflow.com/questions/29423986/twisted-giving-twisted-web-client-partialdownloaderror-200-ok
|
||||
if error.type is PartialDownloadError and error.value.status == '200':
|
||||
downloadDone(error.value.response, request, response, feed_config)
|
||||
d = defer.Deferred()
|
||||
reactor.callLater(0, d.callback, error.value.response) # error.value.response is response_str
|
||||
d.addCallback(downloadDone, request=request, response=response, feed_config=feed_config)
|
||||
d.addErrback(downloadError, request=request, url=url, response=response, feed_config=feed_config)
|
||||
return
|
||||
|
||||
if DEBUG:
|
||||
@ -166,7 +183,6 @@ class Downloader(resource.Resource):
|
||||
None
|
||||
)
|
||||
print 'Request <GET %s> started' % (url,)
|
||||
response_ref = []
|
||||
d.addCallback(downloadStarted, request=request, url=url, feed_config=feed_config)
|
||||
d.addErrback(downloadError, request=request, url=url)
|
||||
|
||||
|
@ -171,3 +171,5 @@ FEED1_PAGE_URL = '/feed1/'
|
||||
DOWNLOADER_USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'
|
||||
# limit of seconds in which user can access separate feed
|
||||
FEED_REQUEST_PERIOD_LIMIT = 0
|
||||
|
||||
SNAPSHOT_DIR = '/tmp'
|
||||
|
@ -38,6 +38,23 @@ function check_pathes(pathes) {
|
||||
var _config = ['', {}];
|
||||
var _active = false;
|
||||
|
||||
function updateSelector(name, messages) {
|
||||
var control_group = $('#ste-'+ name).parent().parent();
|
||||
if ('error' in messages) {
|
||||
control_group.removeClass('info').addClass('error');
|
||||
control_group.find('.help-inline').text(messages['error']);
|
||||
}
|
||||
else {
|
||||
control_group.removeClass('error').addClass('info');
|
||||
control_group.find('.help-inline').text(messages['count']);
|
||||
}
|
||||
}
|
||||
|
||||
// show status and error messages
|
||||
function updateUIMessages(data) {
|
||||
|
||||
}
|
||||
|
||||
function updateUI(config) {
|
||||
console.log(config);
|
||||
_config = config;
|
||||
@ -89,6 +106,7 @@ window.ET = {
|
||||
'init': init_tool,
|
||||
'check': check_pathes,
|
||||
'updateUI': updateUI,
|
||||
'getUIConfig': getUIConfig,
|
||||
'active': active
|
||||
};
|
||||
|
||||
|
@ -405,8 +405,11 @@ function onCreateButtonClick() {
|
||||
if (active) {
|
||||
//freeze UI
|
||||
loader(true);
|
||||
createFeed().then(function(feed_page_url){
|
||||
window.location.href = feed_page_url;
|
||||
createFeed().then(function(data){
|
||||
if (typeof(data) == 'string'))
|
||||
window.location.href = data; // feed_page_url
|
||||
else
|
||||
ET.updateUI(data);
|
||||
}, function(error){
|
||||
//unfreez UI
|
||||
loader(false);
|
||||
@ -416,16 +419,21 @@ function onCreateButtonClick() {
|
||||
}
|
||||
|
||||
function createFeed() {
|
||||
// gather selected tag-ids
|
||||
var name_ids = {};
|
||||
selected_any = gatherSelectedTagIds(name_ids);
|
||||
if (!ET.active()) {
|
||||
// gather selected tag-ids
|
||||
var name_ids = {};
|
||||
selected_any = gatherSelectedTagIds(name_ids);
|
||||
}
|
||||
|
||||
if (selected_any)
|
||||
return new Promise(function(resolve, reject){
|
||||
$.ajax({
|
||||
type: 'POST',
|
||||
url: "/setup_create_feed",
|
||||
data: JSON.stringify({ html: iframeHtmlJson, names: name_ids, url:$('#create').data('page-url') }),
|
||||
url: EI.active() ? "/setup_create_feed_ext" :"/setup_create_feed",
|
||||
data: JSON.stringify(ET.active
|
||||
? { selectors: ET.getUIConfig(), snapshot_time: snapshot_time, url:$('#create').data('page-url') }
|
||||
: { html: iframeHtmlJson, names: name_ids, url:$('#create').data('page-url') }
|
||||
),
|
||||
contentType: "application/json; charset=utf-8",
|
||||
headers: {"X-CSRFToken": getCookie('csrftoken')},
|
||||
success: function(data){
|
||||
@ -471,7 +479,9 @@ $(document).ready(function(){
|
||||
// attach iframe elements event handlers
|
||||
$('iframe').contents().on('click', '*[tag-id]', onIframeElementClick);
|
||||
$('iframe').contents().on('mouseenter mouseleave', '*[tag-id]', onIframeElementHover);
|
||||
iframeHtmlJson = $('iframe')[0].contentWindow.html2json;
|
||||
var iframe_window = $('iframe')[0].contentWindow;
|
||||
iframeHtmlJson = iframe_window.html2json;
|
||||
snapshot_time = iframe_window.snapshot_time;
|
||||
loader(false);
|
||||
});
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user