mirror of
https://github.com/taroved/pol
synced 2025-05-21 08:30:24 -07:00
feed generator is working
This commit is contained in:
parent
54e6dd0462
commit
ce8432fb55
@ -13,7 +13,9 @@ from scrapy.http import Headers
|
|||||||
from scrapy.responsetypes import responsetypes
|
from scrapy.responsetypes import responsetypes
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
import re
|
||||||
|
|
||||||
|
from feed import startFeedRequest
|
||||||
|
|
||||||
def getPageFactory(url, contextFactory=None, *args, **kwargs):
|
def getPageFactory(url, contextFactory=None, *args, **kwargs):
|
||||||
"""
|
"""
|
||||||
@ -87,14 +89,15 @@ def downloadDone(response_str, request=None, page_factory=None, url=None):
|
|||||||
request.finish()
|
request.finish()
|
||||||
|
|
||||||
def downloadError(error, request=None, page_factory=None):
|
def downloadError(error, request=None, page_factory=None):
|
||||||
#import pdb; pdb.set_trace()
|
|
||||||
request.write('Downloader error: ' + error.value)
|
request.write('Downloader error: ' + error.value)
|
||||||
request.finish()
|
request.finish()
|
||||||
|
|
||||||
|
|
||||||
class Counter(resource.Resource):
|
class Downloader(resource.Resource):
|
||||||
isLeaf = True
|
isLeaf = True
|
||||||
|
|
||||||
|
feed_regexp = re.compile('^/feed/(\d+)$')
|
||||||
|
|
||||||
def startRequest(self, request, url):
|
def startRequest(self, request, url):
|
||||||
page_factory = getPageFactory(url,
|
page_factory = getPageFactory(url,
|
||||||
headers={
|
headers={
|
||||||
@ -118,18 +121,21 @@ class Counter(resource.Resource):
|
|||||||
|
|
||||||
def render_GET(self, request):
|
def render_GET(self, request):
|
||||||
'''
|
'''
|
||||||
Render page for frontend
|
Render page for frontend or RSS feed
|
||||||
'''
|
'''
|
||||||
if 'url' in request.args:
|
if 'url' in request.args:
|
||||||
url = request.args['url'][0]
|
url = request.args['url'][0]
|
||||||
|
|
||||||
self.startRequest(request, url)
|
self.startRequest(request, url)
|
||||||
return NOT_DONE_YET
|
return NOT_DONE_YET
|
||||||
|
elif self.feed_regexp.match(request.uri) is not None:
|
||||||
|
feed_id = self.feed_regexp.match(request.uri).groups()[0]
|
||||||
|
startFeedRequest(request, feed_id)
|
||||||
|
return NOT_DONE_YET
|
||||||
else:
|
else:
|
||||||
return 'Url is required'
|
return 'Url is required'
|
||||||
|
|
||||||
|
|
||||||
|
endpoints.serverFromString(reactor, "tcp:1234").listen(server.Site(Downloader()))
|
||||||
endpoints.serverFromString(reactor, "tcp:1234").listen(server.Site(Counter()))
|
|
||||||
print 'Server starting at http://localhost:1234'
|
print 'Server starting at http://localhost:1234'
|
||||||
reactor.run()
|
reactor.run()
|
||||||
|
134
feed.py
Normal file
134
feed.py
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
from twisted.web import server, resource
|
||||||
|
from twisted.internet import reactor, endpoints
|
||||||
|
from twisted.web.client import HTTPClientFactory, _makeGetterFactory
|
||||||
|
from twisted.web.server import NOT_DONE_YET
|
||||||
|
|
||||||
|
from scrapy.http.response.text import TextResponse
|
||||||
|
from scrapy.downloadermiddlewares.decompression import DecompressionMiddleware
|
||||||
|
from scrapy.selector import Selector
|
||||||
|
|
||||||
|
from scrapy.http import Headers
|
||||||
|
from scrapy.responsetypes import responsetypes
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from feedgenerator import Rss201rev2Feed, Enclosure
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
import MySQLdb
|
||||||
|
from settings import DATABASES
|
||||||
|
|
||||||
|
|
||||||
|
def _getPageFactory(url, contextFactory=None, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Download a web page as a string.
|
||||||
|
Download a page. Return a deferred, which will callback with a
|
||||||
|
page (as a string) or errback with a description of the error.
|
||||||
|
See L{HTTPClientFactory} to see what extra arguments can be passed.
|
||||||
|
"""
|
||||||
|
return _makeGetterFactory(
|
||||||
|
url,
|
||||||
|
HTTPClientFactory,
|
||||||
|
contextFactory=contextFactory,
|
||||||
|
*args, **kwargs)
|
||||||
|
|
||||||
|
def _buildScrapyResponse(page_factory, body):
|
||||||
|
status = int(page_factory.status)
|
||||||
|
headers = Headers(page_factory.response_headers)
|
||||||
|
respcls = responsetypes.from_args(headers=headers, url=page_factory.url)
|
||||||
|
return respcls(url=page_factory.url, status=status, headers=headers, body=body)
|
||||||
|
|
||||||
|
def element_to_string(element):
|
||||||
|
s = [element.text] if element.text else []
|
||||||
|
for sub_element in element:
|
||||||
|
s.append(etree.tostring(sub_element))
|
||||||
|
s.append(element.tail)
|
||||||
|
return ''.join(s)
|
||||||
|
|
||||||
|
def _buildFeed(response, feed_config):
|
||||||
|
tree = response.selector._root.getroottree()
|
||||||
|
|
||||||
|
# get data from html
|
||||||
|
items = []
|
||||||
|
for node in tree.xpath(feed_config['xpath']):
|
||||||
|
item = {}
|
||||||
|
for field_name in ['title', 'description']:
|
||||||
|
if field_name in feed_config['fields']:
|
||||||
|
element = node.xpath(feed_config['fields'][field_name])
|
||||||
|
if element:
|
||||||
|
item[field_name] = element_to_string(element[0])
|
||||||
|
items.append(item)
|
||||||
|
|
||||||
|
#import pdb; pdb.set_trace()
|
||||||
|
#build feed
|
||||||
|
feed = Rss201rev2Feed(
|
||||||
|
title='Polite Pol: ' + feed_config['uri'],
|
||||||
|
link=feed_config['uri'],
|
||||||
|
description="Generated by PolitePol.com.\n"+\
|
||||||
|
"Url: " + feed_config['uri'],
|
||||||
|
language="en",
|
||||||
|
)
|
||||||
|
|
||||||
|
for item in items:
|
||||||
|
feed.add_item(
|
||||||
|
title=item['title'] if 'title' in item else '',
|
||||||
|
link=feed_config['uri'],
|
||||||
|
description=item['description'] if 'description' in item else '',
|
||||||
|
#enclosure=Enclosure(fields[4], "32000", "image/jpeg") if 4 in fields else None, #"Image"
|
||||||
|
pubdate=datetime.datetime.now()
|
||||||
|
)
|
||||||
|
return feed.writeString('utf-8')
|
||||||
|
|
||||||
|
def _downloadDone(response_str, request=None, page_factory=None, feed_config=None):
|
||||||
|
response = _buildScrapyResponse(page_factory, response_str)
|
||||||
|
|
||||||
|
response = DecompressionMiddleware().process_response(None, response, None)
|
||||||
|
|
||||||
|
if (isinstance(response, TextResponse)):
|
||||||
|
response_str = _buildFeed(response, feed_config)
|
||||||
|
|
||||||
|
request.setHeader(b"Content-Type", b'text/xml')
|
||||||
|
request.write(response_str)
|
||||||
|
request.finish()
|
||||||
|
|
||||||
|
def _downloadError(error, request=None, page_factory=None):
|
||||||
|
request.write('Downloader error: ' + error.value)
|
||||||
|
request.finish()
|
||||||
|
|
||||||
|
def startFeedRequest(request, feed_id):
|
||||||
|
# get url, xpathes
|
||||||
|
creds = DATABASES['default']
|
||||||
|
db = MySQLdb.connect(host=creds['HOST'], port=int(creds['PORT']), user=creds['USER'], passwd=creds['PASSWORD'], db=creds['NAME'])
|
||||||
|
feed = {}
|
||||||
|
with db:
|
||||||
|
cur = db.cursor()
|
||||||
|
cur.execute("""select f.uri, f.xpath, fi.name, ff.xpath from frontend_feed f
|
||||||
|
right join frontend_feedfield ff on ff.feed_id=f.id
|
||||||
|
left join frontend_field fi on fi.id=ff.field_id
|
||||||
|
where f.id=%s""", (feed_id,))
|
||||||
|
rows = cur.fetchall()
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
if not feed:
|
||||||
|
feed['uri'] = row[0]
|
||||||
|
feed['xpath'] = row[1]
|
||||||
|
feed['fields'] = {}
|
||||||
|
feed['fields'][row[2]] = row[3]
|
||||||
|
|
||||||
|
if feed:
|
||||||
|
page_factory = _getPageFactory(feed['uri'],
|
||||||
|
headers={
|
||||||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, sdch',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'
|
||||||
|
},
|
||||||
|
redirectLimit=5,
|
||||||
|
timeout=10
|
||||||
|
)
|
||||||
|
d = page_factory.deferred
|
||||||
|
d.addCallback(_downloadDone, request=request, page_factory=page_factory, feed_config=feed)
|
||||||
|
d.addErrback(_downloadError, request=request, page_factory=page_factory)
|
||||||
|
else:
|
||||||
|
Request.write('Feed generator error: config of feed is empty')
|
||||||
|
request.finish()
|
||||||
|
return
|
@ -38,7 +38,9 @@ INSTALLED_APPS = (
|
|||||||
'django.contrib.sessions',
|
'django.contrib.sessions',
|
||||||
'django.contrib.messages',
|
'django.contrib.messages',
|
||||||
'django.contrib.staticfiles',
|
'django.contrib.staticfiles',
|
||||||
|
'django.contrib.sites',
|
||||||
'pipeline',
|
'pipeline',
|
||||||
|
'frontend',
|
||||||
)
|
)
|
||||||
|
|
||||||
MIDDLEWARE_CLASSES = (
|
MIDDLEWARE_CLASSES = (
|
||||||
@ -153,9 +155,11 @@ PIPELINE_JS = {
|
|||||||
'frontend/assets/js/bootstrap.js',
|
'frontend/assets/js/bootstrap.js',
|
||||||
'frontend/assets/js/bootstrap_and_overrides.js',
|
'frontend/assets/js/bootstrap_and_overrides.js',
|
||||||
'frontend/assets/js/setup-tool.js',
|
'frontend/assets/js/setup-tool.js',
|
||||||
|
'frontend/assets/js/jquery.jfeed.js',
|
||||||
),
|
),
|
||||||
'output_filename': 'frontend/js/app.js',
|
'output_filename': 'frontend/js/app.js',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
DOWNLOADER_PAGE_URL = 'http://politepol.com/downloader?url='
|
DOWNLOADER_PAGE_URL = 'http://politepol.com/downloader?url='
|
||||||
|
FEED_PAGE_URL = 'http://politepol.com/feed/'
|
||||||
|
@ -324,7 +324,6 @@ function requestSelection() {
|
|||||||
reject(errMsg);
|
reject(errMsg);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
console.log(JSON.stringify(htmlJson));
|
|
||||||
});
|
});
|
||||||
else {
|
else {
|
||||||
return new Promise(function(resolve, reject){
|
return new Promise(function(resolve, reject){
|
||||||
@ -418,8 +417,7 @@ function onCreateButtonClick() {
|
|||||||
if (active)
|
if (active)
|
||||||
//todo: freeze UI
|
//todo: freeze UI
|
||||||
createFeed().then(function(feed_page_url){
|
createFeed().then(function(feed_page_url){
|
||||||
alert(feed_page_url);
|
window.location.href = feed_page_url;
|
||||||
//window.location.href = feed_page_url;
|
|
||||||
}, function(error){
|
}, function(error){
|
||||||
//todo: unfreez UI
|
//todo: unfreez UI
|
||||||
console.log('Server error: '+ error);
|
console.log('Server error: '+ error);
|
||||||
@ -440,7 +438,6 @@ function createFeed() {
|
|||||||
url: "/setup_create_feed",
|
url: "/setup_create_feed",
|
||||||
data: JSON.stringify({ html: htmlJson, names: name_ids, url:$('#create').data('page-url') }),
|
data: JSON.stringify({ html: htmlJson, names: name_ids, url:$('#create').data('page-url') }),
|
||||||
contentType: "application/json; charset=utf-8",
|
contentType: "application/json; charset=utf-8",
|
||||||
dataType: "json",
|
|
||||||
headers: {"X-CSRFToken": getCookie('csrftoken')},
|
headers: {"X-CSRFToken": getCookie('csrftoken')},
|
||||||
success: function(data){
|
success: function(data){
|
||||||
resolve(data)
|
resolve(data)
|
||||||
@ -449,7 +446,6 @@ function createFeed() {
|
|||||||
reject(errMsg);
|
reject(errMsg);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
console.log(JSON.stringify(htmlJson));
|
|
||||||
});
|
});
|
||||||
else {
|
else {
|
||||||
return new Promise(function(resolve, reject){
|
return new Promise(function(resolve, reject){
|
||||||
|
73
frontend/frontend/templates/frontend/preview.html
Normal file
73
frontend/frontend/templates/frontend/preview.html
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% load staticfiles %}
|
||||||
|
{% load i18n %}
|
||||||
|
|
||||||
|
{% block content %}
|
||||||
|
<div class="page-header">
|
||||||
|
<h1>Feed is ready</h1>
|
||||||
|
</div>
|
||||||
|
<div class="media">
|
||||||
|
<a class="pull-left" href="{{ feed_url }}">
|
||||||
|
<img src="{% static 'frontend/images/rss-640.png' %}" width="64" height="64" class="media-object" />
|
||||||
|
</a>
|
||||||
|
|
||||||
|
<div class="media-body">
|
||||||
|
<h4 class="media-heading">subscribe.your_feed <a href="?" target="_blank">subscribe.rss_help_link</a>
|
||||||
|
:</h4>
|
||||||
|
|
||||||
|
<h3>
|
||||||
|
<a href="{{ feed_url }}" target="_blank">{{ feed_url }}</a>
|
||||||
|
</h3>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="preview" class="well" style="margin-bottom: 60px">
|
||||||
|
subscribe.loading
|
||||||
|
</div>
|
||||||
|
<script type="text/javascript">
|
||||||
|
function tryGetFeed() {
|
||||||
|
$.getFeed({
|
||||||
|
url: '{{ feed_url }}',
|
||||||
|
success: function (feed) {
|
||||||
|
if ('items' in feed) {
|
||||||
|
$('#preview').empty()
|
||||||
|
.append($('<h3 style="margin-top: 0;"></h3>').append("<%= I18n.t('subscribe.preview') %>: "));
|
||||||
|
/*.append($('<div></div>').append($('<a/>').attr('href', feed.link).attr('target', '_blank').text(feed.title)))*/
|
||||||
|
|
||||||
|
var html = '';
|
||||||
|
|
||||||
|
for (var i = 0; i < feed.items.length; i++) {
|
||||||
|
|
||||||
|
var item = feed.items[i];
|
||||||
|
|
||||||
|
$('#preview').append('<h4>'
|
||||||
|
+ '<a href="'
|
||||||
|
+ item.link
|
||||||
|
+ '">'
|
||||||
|
+ item.title
|
||||||
|
+ '</a>'
|
||||||
|
+ '</h4>');
|
||||||
|
|
||||||
|
$('#preview').append('<div>'
|
||||||
|
+ item.description
|
||||||
|
+ '</div>');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
$('#preview').text($('#preview').text().trim() + ' .');
|
||||||
|
setTimeout(tryGetFeed, 2000);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
failure: function () {
|
||||||
|
$('#preview').text($('#preview').text().trim() + ' .');
|
||||||
|
setTimeout(tryGetFeed, 2000);
|
||||||
|
},
|
||||||
|
error: function () {
|
||||||
|
$('#preview').text($('#preview').text().trim() + ' .');
|
||||||
|
setTimeout(tryGetFeed, 2000);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
tryGetFeed();
|
||||||
|
</script>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
@ -22,6 +22,7 @@ from . import views
|
|||||||
urlpatterns = i18n_patterns(
|
urlpatterns = i18n_patterns(
|
||||||
url(r'^$', views.index, name='index'),
|
url(r'^$', views.index, name='index'),
|
||||||
url(r'^setup$', views.setup, name='setup'),
|
url(r'^setup$', views.setup, name='setup'),
|
||||||
|
url(r'^preview/([0-9]+)$', views.preview, name='preview'),
|
||||||
url(r'^admin/', include(admin.site.urls)),
|
url(r'^admin/', include(admin.site.urls)),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -9,7 +9,7 @@ from django.core.exceptions import ValidationError
|
|||||||
from django.core.urlresolvers import reverse
|
from django.core.urlresolvers import reverse
|
||||||
|
|
||||||
from .forms import IndexForm
|
from .forms import IndexForm
|
||||||
from .settings import DOWNLOADER_PAGE_URL
|
from .settings import DOWNLOADER_PAGE_URL, FEED_PAGE_URL
|
||||||
|
|
||||||
from .setup_tool import get_selection_tag_ids, build_xpathes_for_items
|
from .setup_tool import get_selection_tag_ids, build_xpathes_for_items
|
||||||
from .models import Feed, Field, FeedField
|
from .models import Feed, Field, FeedField
|
||||||
@ -37,8 +37,7 @@ def setup(request):
|
|||||||
return render(request, 'frontend/setup.html',
|
return render(request, 'frontend/setup.html',
|
||||||
{
|
{
|
||||||
'external_page_url': external_page_url,
|
'external_page_url': external_page_url,
|
||||||
'page_url': request.GET['url'],
|
'page_url': request.GET['url']
|
||||||
'feed_page_url': reverse('setup_create_feed') # todo: replace with feedpage
|
|
||||||
})
|
})
|
||||||
|
|
||||||
return HttpResponseBadRequest('Url is required')
|
return HttpResponseBadRequest('Url is required')
|
||||||
@ -75,7 +74,6 @@ def _create_feed(url, xpathes):
|
|||||||
feed_xpath = xpathes[0]
|
feed_xpath = xpathes[0]
|
||||||
item_xpathes = xpathes[1]
|
item_xpathes = xpathes[1]
|
||||||
|
|
||||||
#import pdb; pdb.set_trace()
|
|
||||||
feed = Feed(uri=url, xpath=feed_xpath)
|
feed = Feed(uri=url, xpath=feed_xpath)
|
||||||
feed.save()
|
feed.save()
|
||||||
|
|
||||||
@ -102,4 +100,15 @@ def setup_create_feed(request):
|
|||||||
xpathes = build_xpathes_for_items(item_names, html_json)
|
xpathes = build_xpathes_for_items(item_names, html_json)
|
||||||
feed_id = _create_feed(url, xpathes)
|
feed_id = _create_feed(url, xpathes)
|
||||||
|
|
||||||
return HttpResponse(feed_id)
|
return HttpResponse(reverse('preview', args=(feed_id,)))
|
||||||
|
|
||||||
|
def preview(request, feed_id):
|
||||||
|
#import pdb; pdb.set_trace()
|
||||||
|
|
||||||
|
if request.method == 'GET':
|
||||||
|
return render(request, 'frontend/preview.html',
|
||||||
|
{
|
||||||
|
'feed_url': FEED_PAGE_URL + feed_id,
|
||||||
|
})
|
||||||
|
|
||||||
|
return HttpResponseBadRequest('Only GET method supported')
|
||||||
|
@ -5,6 +5,7 @@ Scrapy==1.0.3
|
|||||||
django-pipeline==1.5.4
|
django-pipeline==1.5.4
|
||||||
mysqlclient==1.3.7
|
mysqlclient==1.3.7
|
||||||
w3lib==1.12.0
|
w3lib==1.12.0
|
||||||
|
feedgenerator==1.8
|
||||||
#sudo apt-get install nodejs npm
|
#sudo apt-get install nodejs npm
|
||||||
#npm install -g less
|
#npm install -g less
|
||||||
#ln -s /usr/bin/nodejs /usr/bin/node
|
#ln -s /usr/bin/nodejs /usr/bin/node
|
||||||
|
1
settings.py
Symbolic link
1
settings.py
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
./frontend/frontend/settings.py
|
Loading…
x
Reference in New Issue
Block a user