From acde8d3d8dad5b3b0c306f293f02e595ff63ac6b Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 15 Sep 2017 15:46:54 +0000 Subject: [PATCH] gc --- downloader.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/downloader.py b/downloader.py index a62a49a..fd2b0d9 100644 --- a/downloader.py +++ b/downloader.py @@ -2,6 +2,7 @@ import json import time, sys from hashlib import md5 from datetime import datetime +import gc from twisted.web import server, resource from twisted.internet import reactor, endpoints, defer @@ -45,6 +46,16 @@ def check_feed_request_time_limit(url): r.set(url, int(time.time())) return 0 +GC_PERIOD_SECONDS = 3 * 60 * 60 # 3 hours + +def periodical_garbage_collect(): + tm = int(time.time()) + if tm - periodical_garbage_collect.time >= GC_PERIOD_SECONDS: + print('GC: the number of unreachable objects: %s' % gc.collect()) + periodical_garbage_collect.time = tm + +periodical_garbage_collect.time = int(time.time()) + agent = BrowserLikeRedirectAgent( Agent(reactor, contextFactory=ScrapyClientContextFactory(), # skip certificate verification @@ -156,6 +167,8 @@ def downloadDone(response_str, request, response, feed_config): request.write(response_str) request.finish() + + periodical_garbage_collect() def error_html(msg): return "%s\n")