v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-27 19:40:09 -07:00

add link field

This commit is contained in:
Alexandr Nesterenko 2017-07-17 12:37:33 -07:00
parent 64e66b9f83
commit f1589f3e5e
5 changed files with 53 additions and 22 deletions

41
feed.py
View File

@ -15,7 +15,7 @@ url_hash_regexp = re.compile('(#.*)?$')
POST_TIME_DISTANCE = 15 # minutes, RSS Feed Reader skip same titles created in 10 min interval
FIELD_IDS = {'title': 1, 'description': 2, 'title_link': 3}
FIELD_IDS = {'title': 1, 'description': 2, 'link': 3}
def save_post(conn, created, feed_id, post_fields):
cur = conn.cursor()
@ -36,7 +36,7 @@ def fill_time(feed_id, items):
for item in items:
#create md5
h = md5('')
for key in ['title', 'description', 'title_link']:
for key in ['title', 'description', 'link']:
if key in item:
h.update(item[key].encode('utf-8'))
item['md5'] = h.hexdigest()
@ -70,6 +70,9 @@ def fill_time(feed_id, items):
cur_time -= datetime.timedelta(minutes=POST_TIME_DISTANCE)
def element_to_string(element):
if isinstance(element, basestring): # attribute
return element
s = [element.text] if element.text else []
for sub_element in element:
s.append(etree.tostring(sub_element))
@ -87,21 +90,21 @@ def buildFeed(response, feed_config):
items = []
for node in tree.xpath(feed_config['xpath']):
item = {}
title_link = None
for field_name in ['title', 'description']:
required_count = 0
required_found = 0
for field_name in ['title', 'description', 'link']:
if field_name in feed_config['fields']:
element = node.xpath(feed_config['fields'][field_name])
if element:
item[field_name] = element_to_string(element[0])
# get item link
if field_name == 'title':
anchor = element[0].xpath('ancestor-or-self::node()[name()="a"]')
if anchor and anchor[0].get('href'):
title_link = _build_link(response.body_as_unicode(), feed_config['uri'], anchor[0].get('href'))
if feed_config['required'][field_name]:
required_count += 1
element_or_attr = node.xpath(feed_config['fields'][field_name])
if element_or_attr:
item[field_name] = element_to_string(element_or_attr[0])
if feed_config['required'][field_name]:
required_found += 1
if field_name == 'link':
item['link'] = _build_link(response.body_as_unicode(), feed_config['uri'], item['link'])
if len(item) == len(feed_config['fields']): # all fields are required
if title_link:
item['title_link'] = title_link
if required_count == required_found:
items.append(item)
title = response.selector.xpath('//title/text()').extract()
@ -121,8 +124,8 @@ def buildFeed(response, feed_config):
title = item['title'] if 'title' in item else ''
desc = item['description'] if 'description' in item else ''
time = item['time']
if 'title_link' in item:
link = item['title_link']
if 'link' in item:
link = item['link']
else:
link = url_hash_regexp.sub('#' + md5((title+desc).encode('utf-8')).hexdigest(), feed_config['uri'])
feed.add_item(
@ -140,7 +143,7 @@ def getFeedData(request, feed_id):
db = get_conn()
with db:
cur = db.cursor()
cur.execute("""select f.uri, f.xpath, fi.name, ff.xpath from frontend_feed f
cur.execute("""select f.uri, f.xpath, fi.name, ff.xpath, fi.required from frontend_feed f
right join frontend_feedfield ff on ff.feed_id=f.id
left join frontend_field fi on fi.id=ff.field_id
where f.id=%s""", (feed_id,))
@ -152,7 +155,9 @@ def getFeedData(request, feed_id):
feed['uri'] = row[0]
feed['xpath'] = row[1]
feed['fields'] = {}
feed['required'] = {}
feed['fields'][row[2]] = row[3]
feed['required'][row[2]] = row[4]
if feed:
return [feed['uri'], feed]

View File

@ -1,21 +1,24 @@
[
{
"fields": {
"name": "title"
"name": "title",
"required": 1
},
"model": "frontend.field",
"pk": 1
},
{
"fields": {
"name": "description"
"name": "description",
"required": 1
},
"model": "frontend.field",
"pk": 2
},
{
"fields": {
"name": "link"
"name": "link",
"required": 0
},
"model": "frontend.field",
"pk": 3

View File

@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('frontend', '0002_auto_20170711_2119'),
]
operations = [
migrations.AddField(
model_name='field',
name='required',
field=models.BooleanField(default=True),
),
]

View File

@ -7,6 +7,7 @@ class Feed(models.Model):
class Field(models.Model):
name = models.CharField(max_length=200)
required = models.BooleanField(default=True)
class FeedField(models.Model):
feed = models.ForeignKey(Feed, on_delete=models.CASCADE)

View File

@ -84,11 +84,14 @@ def _create_feed(url, xpathes):
feed.save()
fields = Field.objects.all()
for field in fields:
if field.name in item_xpathes:
ff = FeedField(feed=feed, field=field, xpath=item_xpathes[field.name])
ff.save()
if field.name == 'link' and 'title' in item_xpathes:
ff = FeedField(feed=feed, field=field, xpath='('+ item_xpathes['title'] +')[1]/ancestor-or-self::node()[name()="a"]/@href')
ff.save()
return feed.id