v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-16 14:20:10 -07:00

xpath editor

This commit is contained in:
Alexandr Nesterenko 2017-08-06 16:07:59 +00:00
parent afd10fdf6d
commit cefe9e79d4
14 changed files with 194 additions and 66 deletions

View File

@ -215,7 +215,8 @@ class Downloader(resource.Resource):
else: # neither page and feed else: # neither page and feed
return 'Url is required' return 'Url is required'
port = sys.argv[1] if len(sys.argv) >= 2 else 1234
endpoints.serverFromString(reactor, "tcp:1234").listen(server.Site(Downloader())) endpoints.serverFromString(reactor, "tcp:%s" % port).listen(server.Site(Downloader()))
print 'Server starting at http://localhost:1234' print 'Server starting at http://localhost:%s' % port
reactor.run() reactor.run()

14
feed.py
View File

@ -92,10 +92,11 @@ def _build_link(html, doc_url, url):
def buildFeed(response, feed_config): def buildFeed(response, feed_config):
response.selector.remove_namespaces() response.selector.remove_namespaces()
tree = response.selector.root.getroottree() selector = response.selector
tree = selector.root.getroottree()
# get data from html # get data from html
items = [] items = []
for node in tree.xpath(feed_config['xpath']): for node in selector.xpath(feed_config['xpath']):
item = {} item = {}
required_count = 0 required_count = 0
required_found = 0 required_found = 0
@ -103,13 +104,14 @@ def buildFeed(response, feed_config):
if field_name in feed_config['fields']: if field_name in feed_config['fields']:
if feed_config['required'][field_name]: if feed_config['required'][field_name]:
required_count += 1 required_count += 1
element_or_attr = node.xpath(feed_config['fields'][field_name])
if element_or_attr: extracted = node.xpath(feed_config['fields'][field_name]).extract()
item[field_name] = element_to_unicode(element_or_attr[0], response.encoding) if extracted:
item[field_name] = u''.join(extracted)
if feed_config['required'][field_name]: if feed_config['required'][field_name]:
required_found += 1 required_found += 1
if field_name == 'link': if field_name == 'link':
item['link'] = _build_link(response.body_as_unicode(), feed_config['uri'], item['link']) item['link'] = _build_link(response.body_as_unicode(), feed_config['uri'], item[field_name])
if required_count == required_found: if required_count == required_found:
items.append(item) items.append(item)

View File

@ -8,7 +8,7 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: PACKAGE VERSION\n" "Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n" "Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2017-07-26 20:30+0300\n" "POT-Creation-Date: 2017-08-06 00:07+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n" "Language-Team: LANGUAGE <LL@li.org>\n"
@ -116,50 +116,58 @@ msgstr "and"
msgid "setup.description" msgid "setup.description"
msgstr "Description" msgstr "Description"
#: templates/frontend/setup.html:63 #: templates/frontend/setup.html:65
msgid "setup.item_xpath" msgid "setup.item_xpath"
msgstr "Post xpath" msgstr "Post xpath"
#: templates/frontend/setup.html:65 templates/frontend/setup.html.py:76 #: templates/frontend/setup.html:67 templates/frontend/setup.html.py:78
#: templates/frontend/setup.html:87 templates/frontend/setup.html.py:98 #: templates/frontend/setup.html:89 templates/frontend/setup.html.py:100
msgid "setup.skipped" msgid "setup.skipped"
msgstr "Skipped" msgstr "Skipped"
#: templates/frontend/setup.html:66 templates/frontend/setup.html.py:77 #: templates/frontend/setup.html:68 templates/frontend/setup.html.py:79
#: templates/frontend/setup.html:88 templates/frontend/setup.html.py:99 #: templates/frontend/setup.html:90 templates/frontend/setup.html.py:101
msgid "setup.Selected" msgid "setup.Selected"
msgstr "Selected %s items" msgstr "Selected %s items"
#: templates/frontend/setup.html:70 #: templates/frontend/setup.html:72
msgid "setup.title_xpath" msgid "setup.title_xpath"
msgstr "Title xpath" msgstr "Title xpath"
#: templates/frontend/setup.html:73 templates/frontend/setup.html.py:84 #: templates/frontend/setup.html:75 templates/frontend/setup.html.py:86
#: templates/frontend/setup.html:95 #: templates/frontend/setup.html:97
msgid "setup.Required" msgid "setup.Required"
msgstr "Required" msgstr "Required"
#: templates/frontend/setup.html:74 templates/frontend/setup.html.py:85 #: templates/frontend/setup.html:76 templates/frontend/setup.html.py:87
#: templates/frontend/setup.html:96 #: templates/frontend/setup.html:98
msgid "setup.Optional" msgid "setup.Optional"
msgstr "Optional" msgstr "Optional"
#: templates/frontend/setup.html:81 #: templates/frontend/setup.html:83
msgid "setup.link_xpath" msgid "setup.link_xpath"
msgstr "Link xpath" msgstr "Link xpath"
#: templates/frontend/setup.html:92 #: templates/frontend/setup.html:94
msgid "setup.description_xpath" msgid "setup.description_xpath"
msgstr "Description xpath" msgstr "Description xpath"
#: templates/frontend/setup.html:105 #: templates/frontend/setup.html:107
msgid "setup.clicker_tip" msgid "setup.clicker_tip"
msgstr "Visual constructor" msgstr "Visual constructor"
#: templates/frontend/setup.html:108 #: templates/frontend/setup.html:109
msgid "setup.confirm_loose"
msgstr "You will loose your changes. Are you shure?"
#: templates/frontend/setup.html:110
msgid "setup.extended_tip" msgid "setup.extended_tip"
msgstr "Selectors editor" msgstr "Selectors editor"
#: templates/frontend/setup.html:110 #: templates/frontend/setup.html:112
msgid "setup.check"
msgstr "Check"
#: templates/frontend/setup.html:113
msgid "setup.create" msgid "setup.create"
msgstr "Create" msgstr "Create"

View File

@ -8,7 +8,7 @@ msgid ""
msgstr "" msgstr ""
"Project-Id-Version: PACKAGE VERSION\n" "Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n" "Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2017-07-26 20:30+0300\n" "POT-Creation-Date: 2017-08-06 00:07+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n" "Language-Team: LANGUAGE <LL@li.org>\n"
@ -117,50 +117,58 @@ msgstr "и"
msgid "setup.description" msgid "setup.description"
msgstr "Описание" msgstr "Описание"
#: templates/frontend/setup.html:63 #: templates/frontend/setup.html:65
msgid "setup.item_xpath" msgid "setup.item_xpath"
msgstr "Xpath поста" msgstr "Xpath поста"
#: templates/frontend/setup.html:65 templates/frontend/setup.html.py:76 #: templates/frontend/setup.html:67 templates/frontend/setup.html.py:78
#: templates/frontend/setup.html:87 templates/frontend/setup.html.py:98 #: templates/frontend/setup.html:89 templates/frontend/setup.html.py:100
msgid "setup.skipped" msgid "setup.skipped"
msgstr "Пропущено" msgstr "Пропущено"
#: templates/frontend/setup.html:66 templates/frontend/setup.html.py:77 #: templates/frontend/setup.html:68 templates/frontend/setup.html.py:79
#: templates/frontend/setup.html:88 templates/frontend/setup.html.py:99 #: templates/frontend/setup.html:90 templates/frontend/setup.html.py:101
msgid "setup.Selected" msgid "setup.Selected"
msgstr "Выбрано %s элементов" msgstr "Выбрано %s элементов"
#: templates/frontend/setup.html:70 #: templates/frontend/setup.html:72
msgid "setup.title_xpath" msgid "setup.title_xpath"
msgstr "Xpath названия" msgstr "Xpath названия"
#: templates/frontend/setup.html:73 templates/frontend/setup.html.py:84 #: templates/frontend/setup.html:75 templates/frontend/setup.html.py:86
#: templates/frontend/setup.html:95 #: templates/frontend/setup.html:97
msgid "setup.Required" msgid "setup.Required"
msgstr "Обязательное" msgstr "Обязательное"
#: templates/frontend/setup.html:74 templates/frontend/setup.html.py:85 #: templates/frontend/setup.html:76 templates/frontend/setup.html.py:87
#: templates/frontend/setup.html:96 #: templates/frontend/setup.html:98
msgid "setup.Optional" msgid "setup.Optional"
msgstr "Опциональное" msgstr "Опциональное"
#: templates/frontend/setup.html:81 #: templates/frontend/setup.html:83
msgid "setup.link_xpath" msgid "setup.link_xpath"
msgstr "Xpath ссылки" msgstr "Xpath ссылки"
#: templates/frontend/setup.html:92 #: templates/frontend/setup.html:94
msgid "setup.description_xpath" msgid "setup.description_xpath"
msgstr "Xpath описания" msgstr "Xpath описания"
#: templates/frontend/setup.html:105 #: templates/frontend/setup.html:107
msgid "setup.clicker_tip" msgid "setup.clicker_tip"
msgstr "Визуальный конструктор" msgstr "Визуальный конструктор"
#: templates/frontend/setup.html:108 #: templates/frontend/setup.html:109
msgid "setup.confirm_loose"
msgstr "Вы потеряете изменения. Вы уверены?"
#: templates/frontend/setup.html:110
msgid "setup.extended_tip" msgid "setup.extended_tip"
msgstr "Редактор селекторов" msgstr "Редактор селекторов"
#: templates/frontend/setup.html:110 #: templates/frontend/setup.html:112
msgid "setup.check"
msgstr "Проверить"
#: templates/frontend/setup.html:113
msgid "setup.create" msgid "setup.create"
msgstr "Создать" msgstr "Создать"

View File

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('frontend', '0003_field_required'),
]
operations = [
migrations.AddField(
model_name='feed',
name='edited',
field=models.BooleanField(default=False),
),
]

View File

@ -3,6 +3,7 @@ from django.db import models
class Feed(models.Model): class Feed(models.Model):
uri = models.CharField(max_length=2000) uri = models.CharField(max_length=2000)
xpath = models.CharField(max_length=2000) xpath = models.CharField(max_length=2000)
edited = models.BooleanField(default=False)
created = models.DateTimeField(auto_now_add=True) created = models.DateTimeField(auto_now_add=True)
class Field(models.Model): class Field(models.Model):

View File

@ -19,6 +19,7 @@ def build_xpath_results(selectors, file_name):
feed_result = None feed_result = None
field_results = {} field_results = {}
success = True
post_elems = None post_elems = None
try: try:
doc = Selector(text=html) doc = Selector(text=html)
@ -32,7 +33,6 @@ def build_xpath_results(selectors, file_name):
for name, xpath in field_xpathes.iteritems(): for name, xpath in field_xpathes.iteritems():
if not (name in field_results): if not (name in field_results):
field_results[name] = {} field_results[name] = {}
# import pdb;pdb.set_trace()
xpath = xpath.strip() xpath = xpath.strip()
try: try:
extracts = elem.xpath(xpath).extract() extracts = elem.xpath(xpath).extract()
@ -43,6 +43,7 @@ def build_xpath_results(selectors, file_name):
if not extracts: if not extracts:
selected_required = False selected_required = False
except ValueError as ex: except ValueError as ex:
success = False
field_results[name]['error'] = ex.message field_results[name]['error'] = ex.message
for name, xpath in field_xpathes.iteritems(): for name, xpath in field_xpathes.iteritems():
@ -67,9 +68,11 @@ def build_xpath_results(selectors, file_name):
if not (name in field_results): if not (name in field_results):
field_results[name] = {} field_results[name] = {}
field_results[name]['error'] = ex.message field_results[name]['error'] = ex.message
success = False
except ValueError as ex: except ValueError as ex:
feed_result = {'error': ex.message} feed_result = {'error': ex.message}
success = False
return [feed_result, field_results] return [[feed_result, field_results], success]

View File

@ -68,6 +68,8 @@ function updateUIMessages(data) {
else else
updateSelector(name, {}); updateSelector(name, {});
}); });
hide_check_show_create(true);
} }
function updateUI(config) { function updateUI(config) {
@ -86,11 +88,13 @@ function showIcon(show) {
function getUIConfig() { function getUIConfig() {
var cfg = [ var cfg = [
$('#ste-parent').val(), $('#ste-parent').val().trim(),
{} {}
]; ];
['title', 'description', 'link'].forEach(function(name){ ['title', 'description', 'link'].forEach(function(name){
cfg[1][name] = $('#ste-'+ name).val(); var xpath = $('#ste-'+ name).val();
if (xpath.trim().length > 0)
cfg[1][name] = xpath;
}); });
return cfg; return cfg;
} }
@ -139,6 +143,38 @@ function show_ext(show) {
_active = show; _active = show;
} }
function hide_check_show_create(hide) {
$("#check")[0].style.display = !hide ? 'inline-block' : 'none';
$("#create")[0].style.display = hide ? 'inline-block' : 'none';
}
function validateSelectors() {
if (true) {
var selectors = getUIConfig();
return new Promise(function(resolve, reject){
$.ajax({
type: 'POST',
url: "/setup_validate_selectors",
data: JSON.stringify({ selectors: selectors, snapshot_time: snapshot_time, url:$('#create').data('page-url') }),
contentType: "application/json; charset=utf-8",
dataType: "json",
headers: {"X-CSRFToken": getCookie('csrftoken')},
success: function(data){
resolve(data)
},
failure: function(errMsg) {
reject(errMsg);
}
});
});
}
else {
return new Promise(function(resolve, reject){
setTimeout(function(){ resolve({}); }, 0);
});
}
}
$(document).ready(function(){ $(document).ready(function(){
$("#st-ext-trigger").click(function(){ $("#st-ext-trigger").click(function(){
show_ext(true); show_ext(true);
@ -154,7 +190,20 @@ $(document).ready(function(){
}); });
$("input[id^='ste-']").keyup(function(){ $("input[id^='ste-']").keyup(function(){
$("#check")[0].style.display = changed() ? 'inline-block' : 'none'; hide_check_show_create(!changed())
});
$("#check").click(function(){
loader(true);
validateSelectors().then(function(res){
ET.updateUIMessages(res.messages);
hide_check_show_create(res.success);
//unfreez UI
loader(false);
}, function(errMsg){
console.error(errMsg);
//unfreez UI
loader(false);
});
}); });
/*var cfg = read('xpathes') /*var cfg = read('xpathes')

View File

@ -268,6 +268,8 @@ function getCookie(name) {
return cookieValue; return cookieValue;
} }
window.getCookie = getCookie;
// html2json [tag_name, {attributes_dict}, [children]] // html2json [tag_name, {attributes_dict}, [children]]
var iframeHtmlJson = null; var iframeHtmlJson = null;
@ -407,9 +409,14 @@ function onCreateButtonClick() {
loader(true); loader(true);
createFeed().then(function(data){ createFeed().then(function(data){
if (ET.active()) { if (ET.active()) {
ET.updateUIMessages(JSON.parse(data)); var res = JSON.parse(data);
//unfreez UI if (res.success)
loader(false); window.location.href = res.url; // feed_page_url
else {
ET.updateUIMessages(res.messages);
//unfreez UI
loader(false);
}
} }
else else
window.location.href = data; // feed_page_url window.location.href = data; // feed_page_url
@ -425,13 +432,6 @@ function createFeed() {
var selectors = null; var selectors = null;
if (ET.active()) { if (ET.active()) {
selectors = ET.getUIConfig(); selectors = ET.getUIConfig();
selectors[0] = selectors[0].trim();
for (var name in selectors[1]) {
var xpath = selectors[1][name];
if (xpath.trim().length == 0)
delete selectors[1][name];
}
} }
else { else {
// gather selected tag-ids // gather selected tag-ids
@ -472,6 +472,7 @@ function createFeed() {
function loader(show) { function loader(show) {
document.getElementById("loader-bg").style.display = show ? "block" : "none"; document.getElementById("loader-bg").style.display = show ? "block" : "none";
} }
window.loader = loader;
$(document).ready(function(){ $(document).ready(function(){
// skip non setup page // skip non setup page

View File

@ -27,19 +27,28 @@
/* Portrait tablet to landscape and desktop */ /* Portrait tablet to landscape and desktop */
@media (min-width: 768px) and (max-width: 979px) { @media (min-width: 768px) and (max-width: 979px) {
#st-extended .form-horizontal .controls { #st-extended .form-horizontal .controls {
width: 700px width: 410px
}
#st-extended .form-horizontal .controls .input-xxlarge {
width: 270px
} }
} }
/* Landscape phone to portrait tablet */ /* Landscape phone to portrait tablet */
@media (max-width: 767px) { @media (max-width: 767px) {
#st-extended .form-horizontal .controls { #st-extended .form-horizontal .controls {
width: 400px width: 340px
}
#st-extended .form-horizontal .controls .input-xxlarge {
width: 200px
} }
} }
/* Landscape phones and down */ /* Landscape phones and down */
@media (max-width: 480px) { @media (max-width: 480px) {
#st-extended .form-horizontal .controls { #st-extended .form-horizontal .controls {
width: 250px width: 340px
}
#st-extended .form-horizontal .controls .input-xxlarge {
width: 200px
} }
} }
</style> </style>
@ -106,10 +115,10 @@
<div style="float:right"> <div style="float:right">
<a id="st-clicker-trigger" href="javascript:void(0)" style="display:none" class="has-tooltip" title="{% trans 'setup.clicker_tip' %}" <a id="st-clicker-trigger" href="javascript:void(0)" style="display:none" class="has-tooltip" title="{% trans 'setup.clicker_tip' %}"
data-trigger="hover focus manual" data-animation="true" data-trigger="hover focus manual" data-animation="true"
confirm-text="You may loose your changes. Are you shure?"><img src="{% static 'frontend/images/target48.png' %}" /></a> confirm-text="{% trans 'setup.confirm_loose' %}"><img src="{% static 'frontend/images/target48.png' %}" /></a>
<a id="st-ext-trigger" href="javascript:void(0)" style="display:none" class="has-tooltip" title="{% trans 'setup.extended_tip' %}" <a id="st-ext-trigger" href="javascript:void(0)" style="display:none" class="has-tooltip" title="{% trans 'setup.extended_tip' %}"
data-trigger="hover focus manual" data-animation="true"><img src="{% static 'frontend/images/wrench48.png' %}" /></a> data-trigger="hover focus manual" data-animation="true"><img src="{% static 'frontend/images/wrench48.png' %}" /></a>
<button id="check" class="btn btn-large btn-primary" style="display:none">{% trans 'setup.check' %} <i class="icon-check icon-white" style="margin-top: 3px"></i></button> <button id="check" class="btn btn-large" style="display:none">{% trans 'setup.check' %} <i class="icon-check" style="margin-top: 3px"></i></button>
<button id="create" class="btn btn-large btn-primary disabled" data-page-url="{{ page_url }}" data-feed-page-url="{{ feed_page_url }}">{% trans 'setup.create' %} <i class="icon-arrow-right icon-white" style="margin-top: 3px"></i></button> <button id="create" class="btn btn-large btn-primary disabled" data-page-url="{{ page_url }}" data-feed-page-url="{{ feed_page_url }}">{% trans 'setup.create' %} <i class="icon-arrow-right icon-white" style="margin-top: 3px"></i></button>
</div> </div>

View File

@ -30,3 +30,4 @@ urlpatterns = i18n_patterns(
urlpatterns.append(url(r'^setup_get_selected_ids$', views.setup_get_selected_ids, name='setup_get_selected_ids')) urlpatterns.append(url(r'^setup_get_selected_ids$', views.setup_get_selected_ids, name='setup_get_selected_ids'))
urlpatterns.append(url(r'^setup_create_feed$', views.setup_create_feed, name='setup_create_feed')) urlpatterns.append(url(r'^setup_create_feed$', views.setup_create_feed, name='setup_create_feed'))
urlpatterns.append(url(r'^setup_create_feed_ext$', views.setup_create_feed_ext, name='setup_create_feed_ext')) urlpatterns.append(url(r'^setup_create_feed_ext$', views.setup_create_feed_ext, name='setup_create_feed_ext'))
urlpatterns.append(url(r'^setup_validate_selectors$', views.setup_validate_selectors, name='setup_validate_selectors'))

View File

@ -89,16 +89,16 @@ def setup_get_selected_ids(request):
def _get_link_xpath(title_xpath): def _get_link_xpath(title_xpath):
if title_xpath == './child::node()': if title_xpath == './child::node()':
return './ancestor-or-self::node()[name()="a"]/@href' return './ancestor-or-self::node()/@href'
else: else:
xpath = title_xpath[:len(title_xpath)-len('/child::node()')] xpath = title_xpath[:len(title_xpath)-len('/child::node()')]
return xpath +'/ancestor-or-self::node()/@href' return xpath +'/ancestor-or-self::node()/@href'
def _create_feed(url, xpathes): def _create_feed(url, xpathes, edited=False):
feed_xpath = xpathes[0] feed_xpath = xpathes[0]
item_xpathes = xpathes[1] item_xpathes = xpathes[1]
feed = Feed(uri=url, xpath=feed_xpath) feed = Feed(uri=url, xpath=feed_xpath, edited=edited)
feed.save() feed.save()
fields = Field.objects.all() fields = Field.objects.all()
@ -153,7 +153,7 @@ def _validate_selectors(selectors):
item_xpathes_out[field.name] = item_xpathes[field.name] item_xpathes_out[field.name] = item_xpathes[field.name]
return [feed_xpath, item_xpathes_out] return [feed_xpath, item_xpathes_out]
def setup_create_feed_ext(request): def setup_validate_selectors(request):
if request.method == 'POST': if request.method == 'POST':
obj = json.loads(request.body) obj = json.loads(request.body)
if 'selectors' not in obj or 'snapshot_time' not in obj: if 'selectors' not in obj or 'snapshot_time' not in obj:
@ -170,9 +170,35 @@ def setup_create_feed_ext(request):
if not validated_selectors: if not validated_selectors:
return HttpResponseBadRequest('selectors are invalid') return HttpResponseBadRequest('selectors are invalid')
results = build_xpath_results(validated_selectors, file_name) results, success = build_xpath_results(validated_selectors, file_name)
return HttpResponse(json.dumps(results)) return HttpResponse(json.dumps({'success': success, 'messages': results}))
def setup_create_feed_ext(request):
if request.method == 'POST':
obj = json.loads(request.body)
if 'selectors' not in obj or 'snapshot_time' not in obj or 'url' not in obj:
return HttpResponseBadRequest('"selectors", "snapshot_time" and "url" are required')
selectors = obj['selectors']
file_name = obj['snapshot_time']
if not re.match('^\d{10}\.\d+_[\da-f]{32}', file_name):
return HttpResponseBadRequest('"snapshot_time" is invalid')
validated_selectors = _validate_selectors(selectors)
if not validated_selectors:
return HttpResponseBadRequest('selectors are invalid')
results, success = build_xpath_results(validated_selectors, file_name)
if success:
url = obj['url']
feed_id = _create_feed(url, validated_selectors, True)
return HttpResponse(json.dumps({'success': True, 'url': reverse('preview', args=(feed_id,))}))
else:
return HttpResponse(json.dumps({'success': False, 'messages': results}))
def preview(request, feed_id): def preview(request, feed_id):
if request.method == 'GET': if request.method == 'GET':