mirror of
https://github.com/taroved/pol
synced 2025-05-27 19:40:09 -07:00
add link field
This commit is contained in:
parent
64e66b9f83
commit
f1589f3e5e
41
feed.py
41
feed.py
@ -15,7 +15,7 @@ url_hash_regexp = re.compile('(#.*)?$')
|
||||
|
||||
POST_TIME_DISTANCE = 15 # minutes, RSS Feed Reader skip same titles created in 10 min interval
|
||||
|
||||
FIELD_IDS = {'title': 1, 'description': 2, 'title_link': 3}
|
||||
FIELD_IDS = {'title': 1, 'description': 2, 'link': 3}
|
||||
|
||||
def save_post(conn, created, feed_id, post_fields):
|
||||
cur = conn.cursor()
|
||||
@ -36,7 +36,7 @@ def fill_time(feed_id, items):
|
||||
for item in items:
|
||||
#create md5
|
||||
h = md5('')
|
||||
for key in ['title', 'description', 'title_link']:
|
||||
for key in ['title', 'description', 'link']:
|
||||
if key in item:
|
||||
h.update(item[key].encode('utf-8'))
|
||||
item['md5'] = h.hexdigest()
|
||||
@ -70,6 +70,9 @@ def fill_time(feed_id, items):
|
||||
cur_time -= datetime.timedelta(minutes=POST_TIME_DISTANCE)
|
||||
|
||||
def element_to_string(element):
|
||||
if isinstance(element, basestring): # attribute
|
||||
return element
|
||||
|
||||
s = [element.text] if element.text else []
|
||||
for sub_element in element:
|
||||
s.append(etree.tostring(sub_element))
|
||||
@ -87,21 +90,21 @@ def buildFeed(response, feed_config):
|
||||
items = []
|
||||
for node in tree.xpath(feed_config['xpath']):
|
||||
item = {}
|
||||
title_link = None
|
||||
for field_name in ['title', 'description']:
|
||||
required_count = 0
|
||||
required_found = 0
|
||||
for field_name in ['title', 'description', 'link']:
|
||||
if field_name in feed_config['fields']:
|
||||
element = node.xpath(feed_config['fields'][field_name])
|
||||
if element:
|
||||
item[field_name] = element_to_string(element[0])
|
||||
# get item link
|
||||
if field_name == 'title':
|
||||
anchor = element[0].xpath('ancestor-or-self::node()[name()="a"]')
|
||||
if anchor and anchor[0].get('href'):
|
||||
title_link = _build_link(response.body_as_unicode(), feed_config['uri'], anchor[0].get('href'))
|
||||
if feed_config['required'][field_name]:
|
||||
required_count += 1
|
||||
element_or_attr = node.xpath(feed_config['fields'][field_name])
|
||||
if element_or_attr:
|
||||
item[field_name] = element_to_string(element_or_attr[0])
|
||||
if feed_config['required'][field_name]:
|
||||
required_found += 1
|
||||
if field_name == 'link':
|
||||
item['link'] = _build_link(response.body_as_unicode(), feed_config['uri'], item['link'])
|
||||
|
||||
if len(item) == len(feed_config['fields']): # all fields are required
|
||||
if title_link:
|
||||
item['title_link'] = title_link
|
||||
if required_count == required_found:
|
||||
items.append(item)
|
||||
|
||||
title = response.selector.xpath('//title/text()').extract()
|
||||
@ -121,8 +124,8 @@ def buildFeed(response, feed_config):
|
||||
title = item['title'] if 'title' in item else ''
|
||||
desc = item['description'] if 'description' in item else ''
|
||||
time = item['time']
|
||||
if 'title_link' in item:
|
||||
link = item['title_link']
|
||||
if 'link' in item:
|
||||
link = item['link']
|
||||
else:
|
||||
link = url_hash_regexp.sub('#' + md5((title+desc).encode('utf-8')).hexdigest(), feed_config['uri'])
|
||||
feed.add_item(
|
||||
@ -140,7 +143,7 @@ def getFeedData(request, feed_id):
|
||||
db = get_conn()
|
||||
with db:
|
||||
cur = db.cursor()
|
||||
cur.execute("""select f.uri, f.xpath, fi.name, ff.xpath from frontend_feed f
|
||||
cur.execute("""select f.uri, f.xpath, fi.name, ff.xpath, fi.required from frontend_feed f
|
||||
right join frontend_feedfield ff on ff.feed_id=f.id
|
||||
left join frontend_field fi on fi.id=ff.field_id
|
||||
where f.id=%s""", (feed_id,))
|
||||
@ -152,7 +155,9 @@ def getFeedData(request, feed_id):
|
||||
feed['uri'] = row[0]
|
||||
feed['xpath'] = row[1]
|
||||
feed['fields'] = {}
|
||||
feed['required'] = {}
|
||||
feed['fields'][row[2]] = row[3]
|
||||
feed['required'][row[2]] = row[4]
|
||||
|
||||
if feed:
|
||||
return [feed['uri'], feed]
|
||||
|
@ -1,21 +1,24 @@
|
||||
[
|
||||
{
|
||||
"fields": {
|
||||
"name": "title"
|
||||
"name": "title",
|
||||
"required": 1
|
||||
},
|
||||
"model": "frontend.field",
|
||||
"pk": 1
|
||||
},
|
||||
{
|
||||
"fields": {
|
||||
"name": "description"
|
||||
"name": "description",
|
||||
"required": 1
|
||||
},
|
||||
"model": "frontend.field",
|
||||
"pk": 2
|
||||
},
|
||||
{
|
||||
"fields": {
|
||||
"name": "link"
|
||||
"name": "link",
|
||||
"required": 0
|
||||
},
|
||||
"model": "frontend.field",
|
||||
"pk": 3
|
||||
|
19
frontend/frontend/migrations/0003_field_required.py
Normal file
19
frontend/frontend/migrations/0003_field_required.py
Normal file
@ -0,0 +1,19 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
|
||||
dependencies = [
|
||||
('frontend', '0002_auto_20170711_2119'),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AddField(
|
||||
model_name='field',
|
||||
name='required',
|
||||
field=models.BooleanField(default=True),
|
||||
),
|
||||
]
|
@ -7,6 +7,7 @@ class Feed(models.Model):
|
||||
|
||||
class Field(models.Model):
|
||||
name = models.CharField(max_length=200)
|
||||
required = models.BooleanField(default=True)
|
||||
|
||||
class FeedField(models.Model):
|
||||
feed = models.ForeignKey(Feed, on_delete=models.CASCADE)
|
||||
|
@ -84,11 +84,14 @@ def _create_feed(url, xpathes):
|
||||
feed.save()
|
||||
|
||||
fields = Field.objects.all()
|
||||
|
||||
|
||||
for field in fields:
|
||||
if field.name in item_xpathes:
|
||||
ff = FeedField(feed=feed, field=field, xpath=item_xpathes[field.name])
|
||||
ff.save()
|
||||
if field.name == 'link' and 'title' in item_xpathes:
|
||||
ff = FeedField(feed=feed, field=field, xpath='('+ item_xpathes['title'] +')[1]/ancestor-or-self::node()[name()="a"]/@href')
|
||||
ff.save()
|
||||
|
||||
return feed.id
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user