mirror of
https://github.com/taroved/pol
synced 2025-05-21 00:20:22 -07:00
feed generator is working
This commit is contained in:
parent
54e6dd0462
commit
ce8432fb55
@ -13,7 +13,9 @@ from scrapy.http import Headers
|
||||
from scrapy.responsetypes import responsetypes
|
||||
|
||||
from lxml import etree
|
||||
import re
|
||||
|
||||
from feed import startFeedRequest
|
||||
|
||||
def getPageFactory(url, contextFactory=None, *args, **kwargs):
|
||||
"""
|
||||
@ -87,14 +89,15 @@ def downloadDone(response_str, request=None, page_factory=None, url=None):
|
||||
request.finish()
|
||||
|
||||
def downloadError(error, request=None, page_factory=None):
|
||||
#import pdb; pdb.set_trace()
|
||||
request.write('Downloader error: ' + error.value)
|
||||
request.finish()
|
||||
|
||||
|
||||
class Counter(resource.Resource):
|
||||
class Downloader(resource.Resource):
|
||||
isLeaf = True
|
||||
|
||||
feed_regexp = re.compile('^/feed/(\d+)$')
|
||||
|
||||
def startRequest(self, request, url):
|
||||
page_factory = getPageFactory(url,
|
||||
headers={
|
||||
@ -118,18 +121,21 @@ class Counter(resource.Resource):
|
||||
|
||||
def render_GET(self, request):
|
||||
'''
|
||||
Render page for frontend
|
||||
Render page for frontend or RSS feed
|
||||
'''
|
||||
if 'url' in request.args:
|
||||
url = request.args['url'][0]
|
||||
|
||||
self.startRequest(request, url)
|
||||
return NOT_DONE_YET
|
||||
elif self.feed_regexp.match(request.uri) is not None:
|
||||
feed_id = self.feed_regexp.match(request.uri).groups()[0]
|
||||
startFeedRequest(request, feed_id)
|
||||
return NOT_DONE_YET
|
||||
else:
|
||||
return 'Url is required'
|
||||
|
||||
|
||||
|
||||
endpoints.serverFromString(reactor, "tcp:1234").listen(server.Site(Counter()))
|
||||
endpoints.serverFromString(reactor, "tcp:1234").listen(server.Site(Downloader()))
|
||||
print 'Server starting at http://localhost:1234'
|
||||
reactor.run()
|
||||
|
134
feed.py
Normal file
134
feed.py
Normal file
@ -0,0 +1,134 @@
|
||||
from twisted.web import server, resource
|
||||
from twisted.internet import reactor, endpoints
|
||||
from twisted.web.client import HTTPClientFactory, _makeGetterFactory
|
||||
from twisted.web.server import NOT_DONE_YET
|
||||
|
||||
from scrapy.http.response.text import TextResponse
|
||||
from scrapy.downloadermiddlewares.decompression import DecompressionMiddleware
|
||||
from scrapy.selector import Selector
|
||||
|
||||
from scrapy.http import Headers
|
||||
from scrapy.responsetypes import responsetypes
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from feedgenerator import Rss201rev2Feed, Enclosure
|
||||
import datetime
|
||||
|
||||
import MySQLdb
|
||||
from settings import DATABASES
|
||||
|
||||
|
||||
def _getPageFactory(url, contextFactory=None, *args, **kwargs):
|
||||
"""
|
||||
Download a web page as a string.
|
||||
Download a page. Return a deferred, which will callback with a
|
||||
page (as a string) or errback with a description of the error.
|
||||
See L{HTTPClientFactory} to see what extra arguments can be passed.
|
||||
"""
|
||||
return _makeGetterFactory(
|
||||
url,
|
||||
HTTPClientFactory,
|
||||
contextFactory=contextFactory,
|
||||
*args, **kwargs)
|
||||
|
||||
def _buildScrapyResponse(page_factory, body):
|
||||
status = int(page_factory.status)
|
||||
headers = Headers(page_factory.response_headers)
|
||||
respcls = responsetypes.from_args(headers=headers, url=page_factory.url)
|
||||
return respcls(url=page_factory.url, status=status, headers=headers, body=body)
|
||||
|
||||
def element_to_string(element):
|
||||
s = [element.text] if element.text else []
|
||||
for sub_element in element:
|
||||
s.append(etree.tostring(sub_element))
|
||||
s.append(element.tail)
|
||||
return ''.join(s)
|
||||
|
||||
def _buildFeed(response, feed_config):
|
||||
tree = response.selector._root.getroottree()
|
||||
|
||||
# get data from html
|
||||
items = []
|
||||
for node in tree.xpath(feed_config['xpath']):
|
||||
item = {}
|
||||
for field_name in ['title', 'description']:
|
||||
if field_name in feed_config['fields']:
|
||||
element = node.xpath(feed_config['fields'][field_name])
|
||||
if element:
|
||||
item[field_name] = element_to_string(element[0])
|
||||
items.append(item)
|
||||
|
||||
#import pdb; pdb.set_trace()
|
||||
#build feed
|
||||
feed = Rss201rev2Feed(
|
||||
title='Polite Pol: ' + feed_config['uri'],
|
||||
link=feed_config['uri'],
|
||||
description="Generated by PolitePol.com.\n"+\
|
||||
"Url: " + feed_config['uri'],
|
||||
language="en",
|
||||
)
|
||||
|
||||
for item in items:
|
||||
feed.add_item(
|
||||
title=item['title'] if 'title' in item else '',
|
||||
link=feed_config['uri'],
|
||||
description=item['description'] if 'description' in item else '',
|
||||
#enclosure=Enclosure(fields[4], "32000", "image/jpeg") if 4 in fields else None, #"Image"
|
||||
pubdate=datetime.datetime.now()
|
||||
)
|
||||
return feed.writeString('utf-8')
|
||||
|
||||
def _downloadDone(response_str, request=None, page_factory=None, feed_config=None):
|
||||
response = _buildScrapyResponse(page_factory, response_str)
|
||||
|
||||
response = DecompressionMiddleware().process_response(None, response, None)
|
||||
|
||||
if (isinstance(response, TextResponse)):
|
||||
response_str = _buildFeed(response, feed_config)
|
||||
|
||||
request.setHeader(b"Content-Type", b'text/xml')
|
||||
request.write(response_str)
|
||||
request.finish()
|
||||
|
||||
def _downloadError(error, request=None, page_factory=None):
|
||||
request.write('Downloader error: ' + error.value)
|
||||
request.finish()
|
||||
|
||||
def startFeedRequest(request, feed_id):
|
||||
# get url, xpathes
|
||||
creds = DATABASES['default']
|
||||
db = MySQLdb.connect(host=creds['HOST'], port=int(creds['PORT']), user=creds['USER'], passwd=creds['PASSWORD'], db=creds['NAME'])
|
||||
feed = {}
|
||||
with db:
|
||||
cur = db.cursor()
|
||||
cur.execute("""select f.uri, f.xpath, fi.name, ff.xpath from frontend_feed f
|
||||
right join frontend_feedfield ff on ff.feed_id=f.id
|
||||
left join frontend_field fi on fi.id=ff.field_id
|
||||
where f.id=%s""", (feed_id,))
|
||||
rows = cur.fetchall()
|
||||
|
||||
for row in rows:
|
||||
if not feed:
|
||||
feed['uri'] = row[0]
|
||||
feed['xpath'] = row[1]
|
||||
feed['fields'] = {}
|
||||
feed['fields'][row[2]] = row[3]
|
||||
|
||||
if feed:
|
||||
page_factory = _getPageFactory(feed['uri'],
|
||||
headers={
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Encoding': 'gzip, deflate, sdch',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36'
|
||||
},
|
||||
redirectLimit=5,
|
||||
timeout=10
|
||||
)
|
||||
d = page_factory.deferred
|
||||
d.addCallback(_downloadDone, request=request, page_factory=page_factory, feed_config=feed)
|
||||
d.addErrback(_downloadError, request=request, page_factory=page_factory)
|
||||
else:
|
||||
Request.write('Feed generator error: config of feed is empty')
|
||||
request.finish()
|
||||
return
|
@ -38,7 +38,9 @@ INSTALLED_APPS = (
|
||||
'django.contrib.sessions',
|
||||
'django.contrib.messages',
|
||||
'django.contrib.staticfiles',
|
||||
'django.contrib.sites',
|
||||
'pipeline',
|
||||
'frontend',
|
||||
)
|
||||
|
||||
MIDDLEWARE_CLASSES = (
|
||||
@ -153,9 +155,11 @@ PIPELINE_JS = {
|
||||
'frontend/assets/js/bootstrap.js',
|
||||
'frontend/assets/js/bootstrap_and_overrides.js',
|
||||
'frontend/assets/js/setup-tool.js',
|
||||
'frontend/assets/js/jquery.jfeed.js',
|
||||
),
|
||||
'output_filename': 'frontend/js/app.js',
|
||||
}
|
||||
}
|
||||
|
||||
DOWNLOADER_PAGE_URL = 'http://politepol.com/downloader?url='
|
||||
FEED_PAGE_URL = 'http://politepol.com/feed/'
|
||||
|
@ -324,7 +324,6 @@ function requestSelection() {
|
||||
reject(errMsg);
|
||||
}
|
||||
});
|
||||
console.log(JSON.stringify(htmlJson));
|
||||
});
|
||||
else {
|
||||
return new Promise(function(resolve, reject){
|
||||
@ -418,8 +417,7 @@ function onCreateButtonClick() {
|
||||
if (active)
|
||||
//todo: freeze UI
|
||||
createFeed().then(function(feed_page_url){
|
||||
alert(feed_page_url);
|
||||
//window.location.href = feed_page_url;
|
||||
window.location.href = feed_page_url;
|
||||
}, function(error){
|
||||
//todo: unfreez UI
|
||||
console.log('Server error: '+ error);
|
||||
@ -440,7 +438,6 @@ function createFeed() {
|
||||
url: "/setup_create_feed",
|
||||
data: JSON.stringify({ html: htmlJson, names: name_ids, url:$('#create').data('page-url') }),
|
||||
contentType: "application/json; charset=utf-8",
|
||||
dataType: "json",
|
||||
headers: {"X-CSRFToken": getCookie('csrftoken')},
|
||||
success: function(data){
|
||||
resolve(data)
|
||||
@ -449,7 +446,6 @@ function createFeed() {
|
||||
reject(errMsg);
|
||||
}
|
||||
});
|
||||
console.log(JSON.stringify(htmlJson));
|
||||
});
|
||||
else {
|
||||
return new Promise(function(resolve, reject){
|
||||
|
73
frontend/frontend/templates/frontend/preview.html
Normal file
73
frontend/frontend/templates/frontend/preview.html
Normal file
@ -0,0 +1,73 @@
|
||||
{% extends "base.html" %}
|
||||
{% load staticfiles %}
|
||||
{% load i18n %}
|
||||
|
||||
{% block content %}
|
||||
<div class="page-header">
|
||||
<h1>Feed is ready</h1>
|
||||
</div>
|
||||
<div class="media">
|
||||
<a class="pull-left" href="{{ feed_url }}">
|
||||
<img src="{% static 'frontend/images/rss-640.png' %}" width="64" height="64" class="media-object" />
|
||||
</a>
|
||||
|
||||
<div class="media-body">
|
||||
<h4 class="media-heading">subscribe.your_feed <a href="?" target="_blank">subscribe.rss_help_link</a>
|
||||
:</h4>
|
||||
|
||||
<h3>
|
||||
<a href="{{ feed_url }}" target="_blank">{{ feed_url }}</a>
|
||||
</h3>
|
||||
</div>
|
||||
|
||||
<div id="preview" class="well" style="margin-bottom: 60px">
|
||||
subscribe.loading
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
function tryGetFeed() {
|
||||
$.getFeed({
|
||||
url: '{{ feed_url }}',
|
||||
success: function (feed) {
|
||||
if ('items' in feed) {
|
||||
$('#preview').empty()
|
||||
.append($('<h3 style="margin-top: 0;"></h3>').append("<%= I18n.t('subscribe.preview') %>: "));
|
||||
/*.append($('<div></div>').append($('<a/>').attr('href', feed.link).attr('target', '_blank').text(feed.title)))*/
|
||||
|
||||
var html = '';
|
||||
|
||||
for (var i = 0; i < feed.items.length; i++) {
|
||||
|
||||
var item = feed.items[i];
|
||||
|
||||
$('#preview').append('<h4>'
|
||||
+ '<a href="'
|
||||
+ item.link
|
||||
+ '">'
|
||||
+ item.title
|
||||
+ '</a>'
|
||||
+ '</h4>');
|
||||
|
||||
$('#preview').append('<div>'
|
||||
+ item.description
|
||||
+ '</div>');
|
||||
}
|
||||
}
|
||||
else {
|
||||
$('#preview').text($('#preview').text().trim() + ' .');
|
||||
setTimeout(tryGetFeed, 2000);
|
||||
}
|
||||
},
|
||||
failure: function () {
|
||||
$('#preview').text($('#preview').text().trim() + ' .');
|
||||
setTimeout(tryGetFeed, 2000);
|
||||
},
|
||||
error: function () {
|
||||
$('#preview').text($('#preview').text().trim() + ' .');
|
||||
setTimeout(tryGetFeed, 2000);
|
||||
}
|
||||
});
|
||||
}
|
||||
tryGetFeed();
|
||||
</script>
|
||||
</div>
|
||||
{% endblock %}
|
@ -22,6 +22,7 @@ from . import views
|
||||
urlpatterns = i18n_patterns(
|
||||
url(r'^$', views.index, name='index'),
|
||||
url(r'^setup$', views.setup, name='setup'),
|
||||
url(r'^preview/([0-9]+)$', views.preview, name='preview'),
|
||||
url(r'^admin/', include(admin.site.urls)),
|
||||
)
|
||||
|
||||
|
@ -9,7 +9,7 @@ from django.core.exceptions import ValidationError
|
||||
from django.core.urlresolvers import reverse
|
||||
|
||||
from .forms import IndexForm
|
||||
from .settings import DOWNLOADER_PAGE_URL
|
||||
from .settings import DOWNLOADER_PAGE_URL, FEED_PAGE_URL
|
||||
|
||||
from .setup_tool import get_selection_tag_ids, build_xpathes_for_items
|
||||
from .models import Feed, Field, FeedField
|
||||
@ -37,8 +37,7 @@ def setup(request):
|
||||
return render(request, 'frontend/setup.html',
|
||||
{
|
||||
'external_page_url': external_page_url,
|
||||
'page_url': request.GET['url'],
|
||||
'feed_page_url': reverse('setup_create_feed') # todo: replace with feedpage
|
||||
'page_url': request.GET['url']
|
||||
})
|
||||
|
||||
return HttpResponseBadRequest('Url is required')
|
||||
@ -75,7 +74,6 @@ def _create_feed(url, xpathes):
|
||||
feed_xpath = xpathes[0]
|
||||
item_xpathes = xpathes[1]
|
||||
|
||||
#import pdb; pdb.set_trace()
|
||||
feed = Feed(uri=url, xpath=feed_xpath)
|
||||
feed.save()
|
||||
|
||||
@ -102,4 +100,15 @@ def setup_create_feed(request):
|
||||
xpathes = build_xpathes_for_items(item_names, html_json)
|
||||
feed_id = _create_feed(url, xpathes)
|
||||
|
||||
return HttpResponse(feed_id)
|
||||
return HttpResponse(reverse('preview', args=(feed_id,)))
|
||||
|
||||
def preview(request, feed_id):
|
||||
#import pdb; pdb.set_trace()
|
||||
|
||||
if request.method == 'GET':
|
||||
return render(request, 'frontend/preview.html',
|
||||
{
|
||||
'feed_url': FEED_PAGE_URL + feed_id,
|
||||
})
|
||||
|
||||
return HttpResponseBadRequest('Only GET method supported')
|
||||
|
@ -5,6 +5,7 @@ Scrapy==1.0.3
|
||||
django-pipeline==1.5.4
|
||||
mysqlclient==1.3.7
|
||||
w3lib==1.12.0
|
||||
feedgenerator==1.8
|
||||
#sudo apt-get install nodejs npm
|
||||
#npm install -g less
|
||||
#ln -s /usr/bin/nodejs /usr/bin/node
|
||||
|
1
settings.py
Symbolic link
1
settings.py
Symbolic link
@ -0,0 +1 @@
|
||||
./frontend/frontend/settings.py
|
Loading…
x
Reference in New Issue
Block a user