v/pol
1
0
mirror of https://github.com/taroved/pol synced 2025-05-28 03:50:08 -07:00

fix tests

This commit is contained in:
Alexandr Nesterenko 2017-07-20 12:47:07 +03:00
parent 27de6c7363
commit 615215ff16

View File

@ -1,24 +1,30 @@
# coding=utf-8
from lxml import etree
import sys
import requests
from feed import element_to_unicode
def element_to_string(element):
if isinstance(element, basestring): # attribute
return element
s = [element.text] if element.text else []
for sub_element in element:
s.append(etree.tostring(sub_element))
return ''.join(s)
def test1_get_inner_html():
root = etree.fromstring('<a>1<b>2</b>3<c>4</c>5</a>')
assert element_to_unicode(root, 'utf-8') == '1<b>2</b>3<c>4</c>5'
assert element_to_unicode(root, 'utf-8') == u'1<b>2</b>3<c>4</c>5'
ids = [1,2,3,5,6,54,100,101,113,118,123,124,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,166,167]
domain = "politepol.com"
def parse_feed(text):
ch = etree.fromstring(text).xpath('/rss/channel')
title = ch[0].xpath('title')[0].text
items = ch[0].xpath('item')
return [title, items]
return [title.encode('utf-8'), items]
def crawl(extention):
number = 0
@ -66,27 +72,27 @@ def diff(ext1, ext2):
print "Different post count: %s vs %s" % (len(items1), len(item2))
f.write("<< Different posts count: %s.%s:%s vs %s.%s:%s >>\n" % (id, ext1, len(items1), id, ext2, len(item2)))
for post in items1:
posts1[element_to_unicode(post)] = True
posts1[element_to_string(post)] = True
for post in items2:
posts2[element_to_unicode(post)] = True
posts2[element_to_string(post)] = True
for post in items1:
if not (element_to_unicode(post) in post2):
if not (element_to_string(post) in posts2):
posts_diff += 1
f.write("<<<<<<<<<<<<<<< Different posts (%s) >>>>>>>>>>>>>>>\n" % posts_diff)
f.write(">>>>>>>>>>>>>>> %s.%s <<<<<<<<<<<<<\n" % (id, ext1))
f.write(element_to_unicode(post) + "\n")
f.write(element_to_string(post) + "\n")
f.write(">>>>>>>>>>>>>>> %s.%s <<<<<<<<<<<<<\n" % (id, ext2))
f.write("*** Not found ***\n")
for post in items2:
if not (element_to_unicode(post) in post1):
if not (element_to_string(post) in posts1):
posts_diff += 1
f.write("<<<<<<<<<<<<<<< Different posts (%s) >>>>>>>>>>>>>>>\n" % posts_diff)
f.write(">>>>>>>>>>>>>>> %s.%s <<<<<<<<<<<<<\n" % (id, ext1))
f.write("*** Not found ***\n")
f.write(">>>>>>>>>>>>>>> %s.%s <<<<<<<<<<<<<\n" % (id, ext2))
f.write(element_to_unicode(post) + "\n")
f.write(element_to_string(post) + "\n")
print "Content of files %s.%s and %s.%s is different. Diff: %s.diff" % (id, ext1, id, ext2, id)
if posts_diff > 0:
print "Different feeds: %s" % posts_diff