mirror of
https://github.com/taroved/pol
synced 2025-05-16 14:20:10 -07:00
new mon
This commit is contained in:
parent
35c382553c
commit
6e5cb836cd
@ -4,7 +4,7 @@ import time, sys
|
|||||||
from hashlib import md5
|
from hashlib import md5
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from twisted.logger import globalLogBeginner, formatEventAsClassicLogText
|
from twisted.logger import globalLogBeginner, formatEventAsClassicLogText, Logger
|
||||||
from twisted.web import server, resource
|
from twisted.web import server, resource
|
||||||
from twisted.internet import reactor, endpoints, defer
|
from twisted.internet import reactor, endpoints, defer
|
||||||
from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError, HTTPConnectionPool
|
from twisted.web.client import Agent, BrowserLikeRedirectAgent, readBody, PartialDownloadError, HTTPConnectionPool
|
||||||
@ -52,6 +52,7 @@ def print_log(event):
|
|||||||
|
|
||||||
globalLogBeginner.beginLoggingTo([print_log], discardBuffer=True, redirectStandardIO=False) # requred, discardBuffer gets rid of the LimitedHistoryLogObserver, redirectStandardIO will loop print action
|
globalLogBeginner.beginLoggingTo([print_log], discardBuffer=True, redirectStandardIO=False) # requred, discardBuffer gets rid of the LimitedHistoryLogObserver, redirectStandardIO will loop print action
|
||||||
|
|
||||||
|
log = Logger()
|
||||||
|
|
||||||
if FEED_REQUEST_PERIOD_LIMIT:
|
if FEED_REQUEST_PERIOD_LIMIT:
|
||||||
import redis
|
import redis
|
||||||
@ -189,9 +190,9 @@ def downloadDone(response_str, request, response, feed_config):
|
|||||||
|
|
||||||
from pympler import tracker
|
from pympler import tracker
|
||||||
import gc
|
import gc
|
||||||
#sum = None
|
|
||||||
tr = tracker.SummaryTracker()
|
tr = tracker.SummaryTracker()
|
||||||
MON_PERIOD_SECONDS = 5#3 * 60 * 60 # 3 hours
|
MON_PERIOD_SECONDS = 1 * 60 * 60 # 1 hours
|
||||||
mon_time = None
|
mon_time = None
|
||||||
def mon(none):
|
def mon(none):
|
||||||
global mon_time
|
global mon_time
|
||||||
@ -201,7 +202,8 @@ def mon(none):
|
|||||||
#pool.closeCachedConnections()
|
#pool.closeCachedConnections()
|
||||||
gc.collect()
|
gc.collect()
|
||||||
global tr
|
global tr
|
||||||
tr.print_diff()
|
for line in tr.format_diff():
|
||||||
|
log.info(line)
|
||||||
mon_time = tm
|
mon_time = tm
|
||||||
|
|
||||||
def run_pgc():
|
def run_pgc():
|
||||||
|
11
feed.py
11
feed.py
@ -11,8 +11,11 @@ import datetime
|
|||||||
import MySQLdb
|
import MySQLdb
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
from settings import DATABASES, DOWNLOADER_USER_AGENT
|
from settings import DATABASES, DOWNLOADER_USER_AGENT
|
||||||
|
from twisted.logger import Logger
|
||||||
|
|
||||||
|
|
||||||
|
log = Logger()
|
||||||
|
|
||||||
url_hash_regexp = re.compile('(#.*)?$')
|
url_hash_regexp = re.compile('(#.*)?$')
|
||||||
|
|
||||||
POST_TIME_DISTANCE = 15 # minutes, RSS Feed Reader skip same titles created in 10 min interval
|
POST_TIME_DISTANCE = 15 # minutes, RSS Feed Reader skip same titles created in 10 min interval
|
||||||
@ -23,15 +26,14 @@ def save_post(conn, created, feed_id, post_fields):
|
|||||||
with conn as cur:
|
with conn as cur:
|
||||||
cur.execute("""insert into frontend_post (md5sum, created, feed_id)
|
cur.execute("""insert into frontend_post (md5sum, created, feed_id)
|
||||||
values (%s, %s, %s)""", (post_fields['md5'], created, feed_id))
|
values (%s, %s, %s)""", (post_fields['md5'], created, feed_id))
|
||||||
print(cur._last_executed)
|
post_id = cur._last_executed
|
||||||
|
|
||||||
post_id = conn.insert_id()
|
post_id = conn.insert_id()
|
||||||
for key in ['title', 'description', 'title_link']:
|
for key in ['title', 'description', 'title_link']:
|
||||||
if key in post_fields:
|
if key in post_fields:
|
||||||
#import pdb;pdb.set_trace()
|
|
||||||
cur.execute("""insert into frontend_postfield (field_id, post_id, `text`)
|
cur.execute("""insert into frontend_postfield (field_id, post_id, `text`)
|
||||||
values (%s, %s, %s)""", (FIELD_IDS[key], post_id, post_fields[key].encode('utf-8')))
|
values (%s, %s, %s)""", (FIELD_IDS[key], post_id, post_fields[key].encode('utf-8')))
|
||||||
print(cur._last_executed)
|
log.info('Post saved id:{id!r}', id=post_id)
|
||||||
|
|
||||||
def fill_time(feed_id, items):
|
def fill_time(feed_id, items):
|
||||||
if not items:
|
if not items:
|
||||||
@ -55,7 +57,7 @@ def fill_time(feed_id, items):
|
|||||||
where p.md5sum in (%s)
|
where p.md5sum in (%s)
|
||||||
and p.feed_id=%s""" % (quoted_hashes, feed_id,))
|
and p.feed_id=%s""" % (quoted_hashes, feed_id,))
|
||||||
rows = cur.fetchall()
|
rows = cur.fetchall()
|
||||||
print(cur._last_executed)
|
log.debug('Selected {count!r} posts', count=len(rows))
|
||||||
for row in rows:
|
for row in rows:
|
||||||
md5hash = row[0]
|
md5hash = row[0]
|
||||||
created = row[1]
|
created = row[1]
|
||||||
@ -99,7 +101,6 @@ def buildFeed(response, feed_config):
|
|||||||
tree = selector.root.getroottree()
|
tree = selector.root.getroottree()
|
||||||
# get data from html
|
# get data from html
|
||||||
items = []
|
items = []
|
||||||
#import pdb;pdb.set_trace()
|
|
||||||
for node in selector.xpath(feed_config['xpath']):
|
for node in selector.xpath(feed_config['xpath']):
|
||||||
item = {}
|
item = {}
|
||||||
required_count = 0
|
required_count = 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user