mirror of
https://github.com/taroved/pol
synced 2025-05-16 06:10:09 -07:00
new mon
This commit is contained in:
parent
35c382553c
commit
6e5cb836cd
@ -4,7 +4,7 @@ import time, sys
|
||||
from hashlib import md5
|
||||
from datetime import datetime
|
||||
|
||||
from twisted.logger import globalLogBeginner, formatEventAsClassicLogText
|
||||
from twisted.logger import globalLogBeginner, formatEventAsClassicLogText, Logger
|
||||
from twisted.web import server, resource
|
||||
from twisted.internet import reactor, endpoints, defer
|
||||
from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError, HTTPConnectionPool
|
||||
@ -52,6 +52,7 @@ def print_log(event):
|
||||
|
||||
globalLogBeginner.beginLoggingTo([print_log], discardBuffer=True, redirectStandardIO=False) # requred, discardBuffer gets rid of the LimitedHistoryLogObserver, redirectStandardIO will loop print action
|
||||
|
||||
log = Logger()
|
||||
|
||||
if FEED_REQUEST_PERIOD_LIMIT:
|
||||
import redis
|
||||
@ -189,9 +190,9 @@ def downloadDone(response_str, request, response, feed_config):
|
||||
|
||||
from pympler import tracker
|
||||
import gc
|
||||
#sum = None
|
||||
|
||||
tr = tracker.SummaryTracker()
|
||||
MON_PERIOD_SECONDS = 5#3 * 60 * 60 # 3 hours
|
||||
MON_PERIOD_SECONDS = 1 * 60 * 60 # 1 hours
|
||||
mon_time = None
|
||||
def mon(none):
|
||||
global mon_time
|
||||
@ -201,7 +202,8 @@ def mon(none):
|
||||
#pool.closeCachedConnections()
|
||||
gc.collect()
|
||||
global tr
|
||||
tr.print_diff()
|
||||
for line in tr.format_diff():
|
||||
log.info(line)
|
||||
mon_time = tm
|
||||
|
||||
def run_pgc():
|
||||
|
11
feed.py
11
feed.py
@ -11,8 +11,11 @@ import datetime
|
||||
import MySQLdb
|
||||
from contextlib import closing
|
||||
from settings import DATABASES, DOWNLOADER_USER_AGENT
|
||||
from twisted.logger import Logger
|
||||
|
||||
|
||||
log = Logger()
|
||||
|
||||
url_hash_regexp = re.compile('(#.*)?$')
|
||||
|
||||
POST_TIME_DISTANCE = 15 # minutes, RSS Feed Reader skip same titles created in 10 min interval
|
||||
@ -23,15 +26,14 @@ def save_post(conn, created, feed_id, post_fields):
|
||||
with conn as cur:
|
||||
cur.execute("""insert into frontend_post (md5sum, created, feed_id)
|
||||
values (%s, %s, %s)""", (post_fields['md5'], created, feed_id))
|
||||
print(cur._last_executed)
|
||||
post_id = cur._last_executed
|
||||
|
||||
post_id = conn.insert_id()
|
||||
for key in ['title', 'description', 'title_link']:
|
||||
if key in post_fields:
|
||||
#import pdb;pdb.set_trace()
|
||||
cur.execute("""insert into frontend_postfield (field_id, post_id, `text`)
|
||||
values (%s, %s, %s)""", (FIELD_IDS[key], post_id, post_fields[key].encode('utf-8')))
|
||||
print(cur._last_executed)
|
||||
log.info('Post saved id:{id!r}', id=post_id)
|
||||
|
||||
def fill_time(feed_id, items):
|
||||
if not items:
|
||||
@ -55,7 +57,7 @@ def fill_time(feed_id, items):
|
||||
where p.md5sum in (%s)
|
||||
and p.feed_id=%s""" % (quoted_hashes, feed_id,))
|
||||
rows = cur.fetchall()
|
||||
print(cur._last_executed)
|
||||
log.debug('Selected {count!r} posts', count=len(rows))
|
||||
for row in rows:
|
||||
md5hash = row[0]
|
||||
created = row[1]
|
||||
@ -99,7 +101,6 @@ def buildFeed(response, feed_config):
|
||||
tree = selector.root.getroottree()
|
||||
# get data from html
|
||||
items = []
|
||||
#import pdb;pdb.set_trace()
|
||||
for node in selector.xpath(feed_config['xpath']):
|
||||
item = {}
|
||||
required_count = 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user