diff --git a/downloader.py b/downloader.py index 6dbae40..a398ff8 100644 --- a/downloader.py +++ b/downloader.py @@ -4,7 +4,7 @@ import time, sys from hashlib import md5 from datetime import datetime -from twisted.logger import globalLogBeginner, formatEventAsClassicLogText +from twisted.logger import globalLogBeginner, formatEventAsClassicLogText, Logger from twisted.web import server, resource from twisted.internet import reactor, endpoints, defer from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError, HTTPConnectionPool @@ -52,6 +52,7 @@ def print_log(event): globalLogBeginner.beginLoggingTo([print_log], discardBuffer=True, redirectStandardIO=False) # requred, discardBuffer gets rid of the LimitedHistoryLogObserver, redirectStandardIO will loop print action +log = Logger() if FEED_REQUEST_PERIOD_LIMIT: import redis @@ -189,9 +190,9 @@ def downloadDone(response_str, request, response, feed_config): from pympler import tracker import gc -#sum = None + tr = tracker.SummaryTracker() -MON_PERIOD_SECONDS = 5#3 * 60 * 60 # 3 hours +MON_PERIOD_SECONDS = 1 * 60 * 60 # 1 hours mon_time = None def mon(none): global mon_time @@ -201,7 +202,8 @@ def mon(none): #pool.closeCachedConnections() gc.collect() global tr - tr.print_diff() + for line in tr.format_diff(): + log.info(line) mon_time = tm def run_pgc(): diff --git a/feed.py b/feed.py index 7f7d585..32d04b7 100644 --- a/feed.py +++ b/feed.py @@ -11,8 +11,11 @@ import datetime import MySQLdb from contextlib import closing from settings import DATABASES, DOWNLOADER_USER_AGENT +from twisted.logger import Logger +log = Logger() + url_hash_regexp = re.compile('(#.*)?$') POST_TIME_DISTANCE = 15 # minutes, RSS Feed Reader skip same titles created in 10 min interval @@ -23,15 +26,14 @@ def save_post(conn, created, feed_id, post_fields): with conn as cur: cur.execute("""insert into frontend_post (md5sum, created, feed_id) values (%s, %s, %s)""", (post_fields['md5'], created, feed_id)) - print(cur._last_executed) + post_id = cur._last_executed post_id = conn.insert_id() for key in ['title', 'description', 'title_link']: if key in post_fields: - #import pdb;pdb.set_trace() cur.execute("""insert into frontend_postfield (field_id, post_id, `text`) values (%s, %s, %s)""", (FIELD_IDS[key], post_id, post_fields[key].encode('utf-8'))) - print(cur._last_executed) + log.info('Post saved id:{id!r}', id=post_id) def fill_time(feed_id, items): if not items: @@ -55,7 +57,7 @@ def fill_time(feed_id, items): where p.md5sum in (%s) and p.feed_id=%s""" % (quoted_hashes, feed_id,)) rows = cur.fetchall() - print(cur._last_executed) + log.debug('Selected {count!r} posts', count=len(rows)) for row in rows: md5hash = row[0] created = row[1] @@ -99,7 +101,6 @@ def buildFeed(response, feed_config): tree = selector.root.getroottree() # get data from html items = [] - #import pdb;pdb.set_trace() for node in selector.xpath(feed_config['xpath']): item = {} required_count = 0