mirror of
https://github.com/taroved/pol
synced 2025-05-29 12:30:09 -07:00
add link field
This commit is contained in:
parent
64e66b9f83
commit
f1589f3e5e
41
feed.py
41
feed.py
@ -15,7 +15,7 @@ url_hash_regexp = re.compile('(#.*)?$')
|
|||||||
|
|
||||||
POST_TIME_DISTANCE = 15 # minutes, RSS Feed Reader skip same titles created in 10 min interval
|
POST_TIME_DISTANCE = 15 # minutes, RSS Feed Reader skip same titles created in 10 min interval
|
||||||
|
|
||||||
FIELD_IDS = {'title': 1, 'description': 2, 'title_link': 3}
|
FIELD_IDS = {'title': 1, 'description': 2, 'link': 3}
|
||||||
|
|
||||||
def save_post(conn, created, feed_id, post_fields):
|
def save_post(conn, created, feed_id, post_fields):
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
@ -36,7 +36,7 @@ def fill_time(feed_id, items):
|
|||||||
for item in items:
|
for item in items:
|
||||||
#create md5
|
#create md5
|
||||||
h = md5('')
|
h = md5('')
|
||||||
for key in ['title', 'description', 'title_link']:
|
for key in ['title', 'description', 'link']:
|
||||||
if key in item:
|
if key in item:
|
||||||
h.update(item[key].encode('utf-8'))
|
h.update(item[key].encode('utf-8'))
|
||||||
item['md5'] = h.hexdigest()
|
item['md5'] = h.hexdigest()
|
||||||
@ -70,6 +70,9 @@ def fill_time(feed_id, items):
|
|||||||
cur_time -= datetime.timedelta(minutes=POST_TIME_DISTANCE)
|
cur_time -= datetime.timedelta(minutes=POST_TIME_DISTANCE)
|
||||||
|
|
||||||
def element_to_string(element):
|
def element_to_string(element):
|
||||||
|
if isinstance(element, basestring): # attribute
|
||||||
|
return element
|
||||||
|
|
||||||
s = [element.text] if element.text else []
|
s = [element.text] if element.text else []
|
||||||
for sub_element in element:
|
for sub_element in element:
|
||||||
s.append(etree.tostring(sub_element))
|
s.append(etree.tostring(sub_element))
|
||||||
@ -87,21 +90,21 @@ def buildFeed(response, feed_config):
|
|||||||
items = []
|
items = []
|
||||||
for node in tree.xpath(feed_config['xpath']):
|
for node in tree.xpath(feed_config['xpath']):
|
||||||
item = {}
|
item = {}
|
||||||
title_link = None
|
required_count = 0
|
||||||
for field_name in ['title', 'description']:
|
required_found = 0
|
||||||
|
for field_name in ['title', 'description', 'link']:
|
||||||
if field_name in feed_config['fields']:
|
if field_name in feed_config['fields']:
|
||||||
element = node.xpath(feed_config['fields'][field_name])
|
if feed_config['required'][field_name]:
|
||||||
if element:
|
required_count += 1
|
||||||
item[field_name] = element_to_string(element[0])
|
element_or_attr = node.xpath(feed_config['fields'][field_name])
|
||||||
# get item link
|
if element_or_attr:
|
||||||
if field_name == 'title':
|
item[field_name] = element_to_string(element_or_attr[0])
|
||||||
anchor = element[0].xpath('ancestor-or-self::node()[name()="a"]')
|
if feed_config['required'][field_name]:
|
||||||
if anchor and anchor[0].get('href'):
|
required_found += 1
|
||||||
title_link = _build_link(response.body_as_unicode(), feed_config['uri'], anchor[0].get('href'))
|
if field_name == 'link':
|
||||||
|
item['link'] = _build_link(response.body_as_unicode(), feed_config['uri'], item['link'])
|
||||||
|
|
||||||
if len(item) == len(feed_config['fields']): # all fields are required
|
if required_count == required_found:
|
||||||
if title_link:
|
|
||||||
item['title_link'] = title_link
|
|
||||||
items.append(item)
|
items.append(item)
|
||||||
|
|
||||||
title = response.selector.xpath('//title/text()').extract()
|
title = response.selector.xpath('//title/text()').extract()
|
||||||
@ -121,8 +124,8 @@ def buildFeed(response, feed_config):
|
|||||||
title = item['title'] if 'title' in item else ''
|
title = item['title'] if 'title' in item else ''
|
||||||
desc = item['description'] if 'description' in item else ''
|
desc = item['description'] if 'description' in item else ''
|
||||||
time = item['time']
|
time = item['time']
|
||||||
if 'title_link' in item:
|
if 'link' in item:
|
||||||
link = item['title_link']
|
link = item['link']
|
||||||
else:
|
else:
|
||||||
link = url_hash_regexp.sub('#' + md5((title+desc).encode('utf-8')).hexdigest(), feed_config['uri'])
|
link = url_hash_regexp.sub('#' + md5((title+desc).encode('utf-8')).hexdigest(), feed_config['uri'])
|
||||||
feed.add_item(
|
feed.add_item(
|
||||||
@ -140,7 +143,7 @@ def getFeedData(request, feed_id):
|
|||||||
db = get_conn()
|
db = get_conn()
|
||||||
with db:
|
with db:
|
||||||
cur = db.cursor()
|
cur = db.cursor()
|
||||||
cur.execute("""select f.uri, f.xpath, fi.name, ff.xpath from frontend_feed f
|
cur.execute("""select f.uri, f.xpath, fi.name, ff.xpath, fi.required from frontend_feed f
|
||||||
right join frontend_feedfield ff on ff.feed_id=f.id
|
right join frontend_feedfield ff on ff.feed_id=f.id
|
||||||
left join frontend_field fi on fi.id=ff.field_id
|
left join frontend_field fi on fi.id=ff.field_id
|
||||||
where f.id=%s""", (feed_id,))
|
where f.id=%s""", (feed_id,))
|
||||||
@ -152,7 +155,9 @@ def getFeedData(request, feed_id):
|
|||||||
feed['uri'] = row[0]
|
feed['uri'] = row[0]
|
||||||
feed['xpath'] = row[1]
|
feed['xpath'] = row[1]
|
||||||
feed['fields'] = {}
|
feed['fields'] = {}
|
||||||
|
feed['required'] = {}
|
||||||
feed['fields'][row[2]] = row[3]
|
feed['fields'][row[2]] = row[3]
|
||||||
|
feed['required'][row[2]] = row[4]
|
||||||
|
|
||||||
if feed:
|
if feed:
|
||||||
return [feed['uri'], feed]
|
return [feed['uri'], feed]
|
||||||
|
@ -1,21 +1,24 @@
|
|||||||
[
|
[
|
||||||
{
|
{
|
||||||
"fields": {
|
"fields": {
|
||||||
"name": "title"
|
"name": "title",
|
||||||
|
"required": 1
|
||||||
},
|
},
|
||||||
"model": "frontend.field",
|
"model": "frontend.field",
|
||||||
"pk": 1
|
"pk": 1
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"fields": {
|
"fields": {
|
||||||
"name": "description"
|
"name": "description",
|
||||||
|
"required": 1
|
||||||
},
|
},
|
||||||
"model": "frontend.field",
|
"model": "frontend.field",
|
||||||
"pk": 2
|
"pk": 2
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"fields": {
|
"fields": {
|
||||||
"name": "link"
|
"name": "link",
|
||||||
|
"required": 0
|
||||||
},
|
},
|
||||||
"model": "frontend.field",
|
"model": "frontend.field",
|
||||||
"pk": 3
|
"pk": 3
|
||||||
|
19
frontend/frontend/migrations/0003_field_required.py
Normal file
19
frontend/frontend/migrations/0003_field_required.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from django.db import migrations, models
|
||||||
|
|
||||||
|
|
||||||
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
('frontend', '0002_auto_20170711_2119'),
|
||||||
|
]
|
||||||
|
|
||||||
|
operations = [
|
||||||
|
migrations.AddField(
|
||||||
|
model_name='field',
|
||||||
|
name='required',
|
||||||
|
field=models.BooleanField(default=True),
|
||||||
|
),
|
||||||
|
]
|
@ -7,6 +7,7 @@ class Feed(models.Model):
|
|||||||
|
|
||||||
class Field(models.Model):
|
class Field(models.Model):
|
||||||
name = models.CharField(max_length=200)
|
name = models.CharField(max_length=200)
|
||||||
|
required = models.BooleanField(default=True)
|
||||||
|
|
||||||
class FeedField(models.Model):
|
class FeedField(models.Model):
|
||||||
feed = models.ForeignKey(Feed, on_delete=models.CASCADE)
|
feed = models.ForeignKey(Feed, on_delete=models.CASCADE)
|
||||||
|
@ -84,11 +84,14 @@ def _create_feed(url, xpathes):
|
|||||||
feed.save()
|
feed.save()
|
||||||
|
|
||||||
fields = Field.objects.all()
|
fields = Field.objects.all()
|
||||||
|
|
||||||
for field in fields:
|
for field in fields:
|
||||||
if field.name in item_xpathes:
|
if field.name in item_xpathes:
|
||||||
ff = FeedField(feed=feed, field=field, xpath=item_xpathes[field.name])
|
ff = FeedField(feed=feed, field=field, xpath=item_xpathes[field.name])
|
||||||
ff.save()
|
ff.save()
|
||||||
|
if field.name == 'link' and 'title' in item_xpathes:
|
||||||
|
ff = FeedField(feed=feed, field=field, xpath='('+ item_xpathes['title'] +')[1]/ancestor-or-self::node()[name()="a"]/@href')
|
||||||
|
ff.save()
|
||||||
|
|
||||||
return feed.id
|
return feed.id
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user