v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-16 06:10:09 -07:00

xpath editor

This commit is contained in:
Alexandr Nesterenko 2017-08-06 16:07:59 +00:00
parent afd10fdf6d
commit cefe9e79d4
14 changed files with 194 additions and 66 deletions

View File

@ -215,7 +215,8 @@ class Downloader(resource.Resource):
else: # neither page and feed
return 'Url is required'
port = sys.argv[1] if len(sys.argv) >= 2 else 1234
endpoints.serverFromString(reactor, "tcp:1234").listen(server.Site(Downloader()))
print 'Server starting at http://localhost:1234'
endpoints.serverFromString(reactor, "tcp:%s" % port).listen(server.Site(Downloader()))
print 'Server starting at http://localhost:%s' % port
reactor.run()

14
feed.py
View File

@ -92,10 +92,11 @@ def _build_link(html, doc_url, url):
def buildFeed(response, feed_config):
response.selector.remove_namespaces()
tree = response.selector.root.getroottree()
selector = response.selector
tree = selector.root.getroottree()
# get data from html
items = []
for node in tree.xpath(feed_config['xpath']):
for node in selector.xpath(feed_config['xpath']):
item = {}
required_count = 0
required_found = 0
@ -103,13 +104,14 @@ def buildFeed(response, feed_config):
if field_name in feed_config['fields']:
if feed_config['required'][field_name]:
required_count += 1
element_or_attr = node.xpath(feed_config['fields'][field_name])
if element_or_attr:
item[field_name] = element_to_unicode(element_or_attr[0], response.encoding)
extracted = node.xpath(feed_config['fields'][field_name]).extract()
if extracted:
item[field_name] = u''.join(extracted)
if feed_config['required'][field_name]:
required_found += 1
if field_name == 'link':
item['link'] = _build_link(response.body_as_unicode(), feed_config['uri'], item['link'])
item['link'] = _build_link(response.body_as_unicode(), feed_config['uri'], item[field_name])
if required_count == required_found:
items.append(item)

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2017-07-26 20:30+0300\n"
"POT-Creation-Date: 2017-08-06 00:07+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@ -116,50 +116,58 @@ msgstr "and"
msgid "setup.description"
msgstr "Description"
#: templates/frontend/setup.html:63
#: templates/frontend/setup.html:65
msgid "setup.item_xpath"
msgstr "Post xpath"
#: templates/frontend/setup.html:65 templates/frontend/setup.html.py:76
#: templates/frontend/setup.html:87 templates/frontend/setup.html.py:98
#: templates/frontend/setup.html:67 templates/frontend/setup.html.py:78
#: templates/frontend/setup.html:89 templates/frontend/setup.html.py:100
msgid "setup.skipped"
msgstr "Skipped"
#: templates/frontend/setup.html:66 templates/frontend/setup.html.py:77
#: templates/frontend/setup.html:88 templates/frontend/setup.html.py:99
#: templates/frontend/setup.html:68 templates/frontend/setup.html.py:79
#: templates/frontend/setup.html:90 templates/frontend/setup.html.py:101
msgid "setup.Selected"
msgstr "Selected %s items"
#: templates/frontend/setup.html:70
#: templates/frontend/setup.html:72
msgid "setup.title_xpath"
msgstr "Title xpath"
#: templates/frontend/setup.html:73 templates/frontend/setup.html.py:84
#: templates/frontend/setup.html:95
#: templates/frontend/setup.html:75 templates/frontend/setup.html.py:86
#: templates/frontend/setup.html:97
msgid "setup.Required"
msgstr "Required"
#: templates/frontend/setup.html:74 templates/frontend/setup.html.py:85
#: templates/frontend/setup.html:96
#: templates/frontend/setup.html:76 templates/frontend/setup.html.py:87
#: templates/frontend/setup.html:98
msgid "setup.Optional"
msgstr "Optional"
#: templates/frontend/setup.html:81
#: templates/frontend/setup.html:83
msgid "setup.link_xpath"
msgstr "Link xpath"
#: templates/frontend/setup.html:92
#: templates/frontend/setup.html:94
msgid "setup.description_xpath"
msgstr "Description xpath"
#: templates/frontend/setup.html:105
#: templates/frontend/setup.html:107
msgid "setup.clicker_tip"
msgstr "Visual constructor"
#: templates/frontend/setup.html:108
#: templates/frontend/setup.html:109
msgid "setup.confirm_loose"
msgstr "You will loose your changes. Are you shure?"
#: templates/frontend/setup.html:110
msgid "setup.extended_tip"
msgstr "Selectors editor"
#: templates/frontend/setup.html:110
#: templates/frontend/setup.html:112
msgid "setup.check"
msgstr "Check"
#: templates/frontend/setup.html:113
msgid "setup.create"
msgstr "Create"

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2017-07-26 20:30+0300\n"
"POT-Creation-Date: 2017-08-06 00:07+0000\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language-Team: LANGUAGE <LL@li.org>\n"
@ -117,50 +117,58 @@ msgstr "и"
msgid "setup.description"
msgstr "Описание"
#: templates/frontend/setup.html:63
#: templates/frontend/setup.html:65
msgid "setup.item_xpath"
msgstr "Xpath поста"
#: templates/frontend/setup.html:65 templates/frontend/setup.html.py:76
#: templates/frontend/setup.html:87 templates/frontend/setup.html.py:98
#: templates/frontend/setup.html:67 templates/frontend/setup.html.py:78
#: templates/frontend/setup.html:89 templates/frontend/setup.html.py:100
msgid "setup.skipped"
msgstr "Пропущено"
#: templates/frontend/setup.html:66 templates/frontend/setup.html.py:77
#: templates/frontend/setup.html:88 templates/frontend/setup.html.py:99
#: templates/frontend/setup.html:68 templates/frontend/setup.html.py:79
#: templates/frontend/setup.html:90 templates/frontend/setup.html.py:101
msgid "setup.Selected"
msgstr "Выбрано %s элементов"
#: templates/frontend/setup.html:70
#: templates/frontend/setup.html:72
msgid "setup.title_xpath"
msgstr "Xpath названия"
#: templates/frontend/setup.html:73 templates/frontend/setup.html.py:84
#: templates/frontend/setup.html:95
#: templates/frontend/setup.html:75 templates/frontend/setup.html.py:86
#: templates/frontend/setup.html:97
msgid "setup.Required"
msgstr "Обязательное"
#: templates/frontend/setup.html:74 templates/frontend/setup.html.py:85
#: templates/frontend/setup.html:96
#: templates/frontend/setup.html:76 templates/frontend/setup.html.py:87
#: templates/frontend/setup.html:98
msgid "setup.Optional"
msgstr "Опциональное"
#: templates/frontend/setup.html:81
#: templates/frontend/setup.html:83
msgid "setup.link_xpath"
msgstr "Xpath ссылки"
#: templates/frontend/setup.html:92
#: templates/frontend/setup.html:94
msgid "setup.description_xpath"
msgstr "Xpath описания"
#: templates/frontend/setup.html:105
#: templates/frontend/setup.html:107
msgid "setup.clicker_tip"
msgstr "Визуальный конструктор"
#: templates/frontend/setup.html:108
#: templates/frontend/setup.html:109
msgid "setup.confirm_loose"
msgstr "Вы потеряете изменения. Вы уверены?"
#: templates/frontend/setup.html:110
msgid "setup.extended_tip"
msgstr "Редактор селекторов"
#: templates/frontend/setup.html:110
#: templates/frontend/setup.html:112
msgid "setup.check"
msgstr "Проверить"
#: templates/frontend/setup.html:113
msgid "setup.create"
msgstr "Создать"

View File

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('frontend', '0003_field_required'),
]
operations = [
migrations.AddField(
model_name='feed',
name='edited',
field=models.BooleanField(default=False),
),
]

View File

@ -3,6 +3,7 @@ from django.db import models
class Feed(models.Model):
uri = models.CharField(max_length=2000)
xpath = models.CharField(max_length=2000)
edited = models.BooleanField(default=False)
created = models.DateTimeField(auto_now_add=True)
class Field(models.Model):

View File

@ -19,6 +19,7 @@ def build_xpath_results(selectors, file_name):
feed_result = None
field_results = {}
success = True
post_elems = None
try:
doc = Selector(text=html)
@ -32,7 +33,6 @@ def build_xpath_results(selectors, file_name):
for name, xpath in field_xpathes.iteritems():
if not (name in field_results):
field_results[name] = {}
# import pdb;pdb.set_trace()
xpath = xpath.strip()
try:
extracts = elem.xpath(xpath).extract()
@ -43,6 +43,7 @@ def build_xpath_results(selectors, file_name):
if not extracts:
selected_required = False
except ValueError as ex:
success = False
field_results[name]['error'] = ex.message
for name, xpath in field_xpathes.iteritems():
@ -67,9 +68,11 @@ def build_xpath_results(selectors, file_name):
if not (name in field_results):
field_results[name] = {}
field_results[name]['error'] = ex.message
success = False
except ValueError as ex:
feed_result = {'error': ex.message}
success = False
return [feed_result, field_results]
return [[feed_result, field_results], success]

View File

@ -68,6 +68,8 @@ function updateUIMessages(data) {
else
updateSelector(name, {});
});
hide_check_show_create(true);
}
function updateUI(config) {
@ -86,11 +88,13 @@ function showIcon(show) {
function getUIConfig() {
var cfg = [
$('#ste-parent').val(),
$('#ste-parent').val().trim(),
{}
];
['title', 'description', 'link'].forEach(function(name){
cfg[1][name] = $('#ste-'+ name).val();
var xpath = $('#ste-'+ name).val();
if (xpath.trim().length > 0)
cfg[1][name] = xpath;
});
return cfg;
}
@ -139,6 +143,38 @@ function show_ext(show) {
_active = show;
}
function hide_check_show_create(hide) {
$("#check")[0].style.display = !hide ? 'inline-block' : 'none';
$("#create")[0].style.display = hide ? 'inline-block' : 'none';
}
function validateSelectors() {
if (true) {
var selectors = getUIConfig();
return new Promise(function(resolve, reject){
$.ajax({
type: 'POST',
url: "/setup_validate_selectors",
data: JSON.stringify({ selectors: selectors, snapshot_time: snapshot_time, url:$('#create').data('page-url') }),
contentType: "application/json; charset=utf-8",
dataType: "json",
headers: {"X-CSRFToken": getCookie('csrftoken')},
success: function(data){
resolve(data)
},
failure: function(errMsg) {
reject(errMsg);
}
});
});
}
else {
return new Promise(function(resolve, reject){
setTimeout(function(){ resolve({}); }, 0);
});
}
}
$(document).ready(function(){
$("#st-ext-trigger").click(function(){
show_ext(true);
@ -154,7 +190,20 @@ $(document).ready(function(){
});
$("input[id^='ste-']").keyup(function(){
$("#check")[0].style.display = changed() ? 'inline-block' : 'none';
hide_check_show_create(!changed())
});
$("#check").click(function(){
loader(true);
validateSelectors().then(function(res){
ET.updateUIMessages(res.messages);
hide_check_show_create(res.success);
//unfreez UI
loader(false);
}, function(errMsg){
console.error(errMsg);
//unfreez UI
loader(false);
});
});
/*var cfg = read('xpathes')

View File

@ -268,6 +268,8 @@ function getCookie(name) {
return cookieValue;
}
window.getCookie = getCookie;
// html2json [tag_name, {attributes_dict}, [children]]
var iframeHtmlJson = null;
@ -407,9 +409,14 @@ function onCreateButtonClick() {
loader(true);
createFeed().then(function(data){
if (ET.active()) {
ET.updateUIMessages(JSON.parse(data));
//unfreez UI
loader(false);
var res = JSON.parse(data);
if (res.success)
window.location.href = res.url; // feed_page_url
else {
ET.updateUIMessages(res.messages);
//unfreez UI
loader(false);
}
}
else
window.location.href = data; // feed_page_url
@ -425,13 +432,6 @@ function createFeed() {
var selectors = null;
if (ET.active()) {
selectors = ET.getUIConfig();
selectors[0] = selectors[0].trim();
for (var name in selectors[1]) {
var xpath = selectors[1][name];
if (xpath.trim().length == 0)
delete selectors[1][name];
}
}
else {
// gather selected tag-ids
@ -472,6 +472,7 @@ function createFeed() {
function loader(show) {
document.getElementById("loader-bg").style.display = show ? "block" : "none";
}
window.loader = loader;
$(document).ready(function(){
// skip non setup page

View File

@ -27,19 +27,28 @@
/* Portrait tablet to landscape and desktop */
@media (min-width: 768px) and (max-width: 979px) {
#st-extended .form-horizontal .controls {
width: 700px
width: 410px
}
#st-extended .form-horizontal .controls .input-xxlarge {
width: 270px
}
}
/* Landscape phone to portrait tablet */
@media (max-width: 767px) {
#st-extended .form-horizontal .controls {
width: 400px
width: 340px
}
#st-extended .form-horizontal .controls .input-xxlarge {
width: 200px
}
}
/* Landscape phones and down */
@media (max-width: 480px) {
#st-extended .form-horizontal .controls {
width: 250px
width: 340px
}
#st-extended .form-horizontal .controls .input-xxlarge {
width: 200px
}
}
</style>
@ -106,10 +115,10 @@
<div style="float:right">
<a id="st-clicker-trigger" href="javascript:void(0)" style="display:none" class="has-tooltip" title="{% trans 'setup.clicker_tip' %}"
data-trigger="hover focus manual" data-animation="true"
confirm-text="You may loose your changes. Are you shure?"><img src="{% static 'frontend/images/target48.png' %}" /></a>
confirm-text="{% trans 'setup.confirm_loose' %}"><img src="{% static 'frontend/images/target48.png' %}" /></a>
<a id="st-ext-trigger" href="javascript:void(0)" style="display:none" class="has-tooltip" title="{% trans 'setup.extended_tip' %}"
data-trigger="hover focus manual" data-animation="true"><img src="{% static 'frontend/images/wrench48.png' %}" /></a>
<button id="check" class="btn btn-large btn-primary" style="display:none">{% trans 'setup.check' %} <i class="icon-check icon-white" style="margin-top: 3px"></i></button>
<button id="check" class="btn btn-large" style="display:none">{% trans 'setup.check' %} <i class="icon-check" style="margin-top: 3px"></i></button>
<button id="create" class="btn btn-large btn-primary disabled" data-page-url="{{ page_url }}" data-feed-page-url="{{ feed_page_url }}">{% trans 'setup.create' %} <i class="icon-arrow-right icon-white" style="margin-top: 3px"></i></button>
</div>

View File

@ -30,3 +30,4 @@ urlpatterns = i18n_patterns(
urlpatterns.append(url(r'^setup_get_selected_ids$', views.setup_get_selected_ids, name='setup_get_selected_ids'))
urlpatterns.append(url(r'^setup_create_feed$', views.setup_create_feed, name='setup_create_feed'))
urlpatterns.append(url(r'^setup_create_feed_ext$', views.setup_create_feed_ext, name='setup_create_feed_ext'))
urlpatterns.append(url(r'^setup_validate_selectors$', views.setup_validate_selectors, name='setup_validate_selectors'))

View File

@ -89,16 +89,16 @@ def setup_get_selected_ids(request):
def _get_link_xpath(title_xpath):
if title_xpath == './child::node()':
return './ancestor-or-self::node()[name()="a"]/@href'
return './ancestor-or-self::node()/@href'
else:
xpath = title_xpath[:len(title_xpath)-len('/child::node()')]
return xpath +'/ancestor-or-self::node()/@href'
def _create_feed(url, xpathes):
def _create_feed(url, xpathes, edited=False):
feed_xpath = xpathes[0]
item_xpathes = xpathes[1]
feed = Feed(uri=url, xpath=feed_xpath)
feed = Feed(uri=url, xpath=feed_xpath, edited=edited)
feed.save()
fields = Field.objects.all()
@ -153,7 +153,7 @@ def _validate_selectors(selectors):
item_xpathes_out[field.name] = item_xpathes[field.name]
return [feed_xpath, item_xpathes_out]
def setup_create_feed_ext(request):
def setup_validate_selectors(request):
if request.method == 'POST':
obj = json.loads(request.body)
if 'selectors' not in obj or 'snapshot_time' not in obj:
@ -170,9 +170,35 @@ def setup_create_feed_ext(request):
if not validated_selectors:
return HttpResponseBadRequest('selectors are invalid')
results = build_xpath_results(validated_selectors, file_name)
results, success = build_xpath_results(validated_selectors, file_name)
return HttpResponse(json.dumps(results))
return HttpResponse(json.dumps({'success': success, 'messages': results}))
def setup_create_feed_ext(request):
if request.method == 'POST':
obj = json.loads(request.body)
if 'selectors' not in obj or 'snapshot_time' not in obj or 'url' not in obj:
return HttpResponseBadRequest('"selectors", "snapshot_time" and "url" are required')
selectors = obj['selectors']
file_name = obj['snapshot_time']
if not re.match('^\d{10}\.\d+_[\da-f]{32}', file_name):
return HttpResponseBadRequest('"snapshot_time" is invalid')
validated_selectors = _validate_selectors(selectors)
if not validated_selectors:
return HttpResponseBadRequest('selectors are invalid')
results, success = build_xpath_results(validated_selectors, file_name)
if success:
url = obj['url']
feed_id = _create_feed(url, validated_selectors, True)
return HttpResponse(json.dumps({'success': True, 'url': reverse('preview', args=(feed_id,))}))
else:
return HttpResponse(json.dumps({'success': False, 'messages': results}))
def preview(request, feed_id):
if request.method == 'GET':