ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
extensions.py
(1954B)
1 import logging
2 import pprint
3
4 from twisted.internet.task import LoopingCall
5 from scrapy import signals
6
7 logger = logging.getLogger(__name__)
8
9
10 class _LoopingExtension:
11 def setup_looping_task(self, task, crawler, interval):
12 self._interval = interval
13 self._task = LoopingCall(task)
14 crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
15 crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
16
17 def spider_opened(self):
18 self._task.start(self._interval, now=False)
19
20 def spider_closed(self):
21 if self._task.running:
22 self._task.stop()
23
24
25 class MonitorDownloadsExtension(_LoopingExtension):
26 """
27 Enable this extension to periodically log a number of active downloads.
28 """
29
30 def __init__(self, crawler, interval):
31 self.crawler = crawler
32 self.setup_looping_task(self.monitor, crawler, interval)
33
34 @classmethod
35 def from_crawler(cls, crawler):
36 # fixme: 0 should mean NotConfigured
37 interval = crawler.settings.getfloat("MONITOR_DOWNLOADS_INTERVAL", 10.0)
38 return cls(crawler, interval)
39
40 def monitor(self):
41 active_downloads = len(self.crawler.engine.downloader.active)
42 logger.info("Active downloads: {}".format(active_downloads))
43
44
45 class DumpStatsExtension(_LoopingExtension):
46 """
47 Enable this extension to log Scrapy stats periodically, not only
48 at the end of the crawl.
49 """
50
51 def __init__(self, crawler, interval):
52 self.stats = crawler.stats
53 self.setup_looping_task(self.print_stats, crawler, interval)
54
55 def print_stats(self):
56 stats = self.stats.get_stats()
57 logger.info("Scrapy stats:\n" + pprint.pformat(stats))
58
59 @classmethod
60 def from_crawler(cls, crawler):
61 interval = crawler.settings.getfloat("DUMP_STATS_INTERVAL", 60.0)
62 # fixme: 0 should mean NotConfigured
63 return cls(crawler, interval)