From 51dcd46df6a6e56aaa1d1efc6bc47f6b56e3a70b Mon Sep 17 00:00:00 2001 From: Alexandr Nesterenko Date: Wed, 3 May 2017 11:48:20 -0700 Subject: [PATCH] md5 hash suffix to rss links --- feed.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/feed.py b/feed.py index 4a75065..eb42663 100644 --- a/feed.py +++ b/feed.py @@ -14,6 +14,8 @@ import w3lib.url import w3lib.html from lxml import etree +import re +from hashlib import md5 from feedgenerator import Rss201rev2Feed, Enclosure import datetime @@ -21,6 +23,7 @@ import datetime import MySQLdb from settings import DATABASES, DOWNLOADER_USER_AGENT +url_hash_regexp = re.compile('(#.*)?$') def _getPageFactory(url, contextFactory=None, *args, **kwargs): """ @@ -84,12 +87,17 @@ def _buildFeed(response, feed_config): "Url: " + feed_config['uri'], language="en", ) - for item in items: + title = item['title'] if 'title' in item else '' + desc = item['description'] if 'description' in item else '' + if item['title_link']: + link = item['title_link'] + else: + link = url_hash_regexp.sub('#' + md5((title+desc).encode('utf-8')).hexdigest(), feed_config['uri']) feed.add_item( - title=item['title'] if 'title' in item else '', - link = item['title_link'] if 'title_link' in item else feed_config['uri'], - description=item['description'] if 'description' in item else '', + title = title, + link = link, + description = desc, #enclosure=Enclosure(fields[4], "32000", "image/jpeg") if 4 in fields else None, #"Image" pubdate=datetime.datetime.now() )