ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

extensions.py

(1954B)


      1 import logging
      2 import pprint
      3 
      4 from twisted.internet.task import LoopingCall
      5 from scrapy import signals
      6 
      7 logger = logging.getLogger(__name__)
      8 
      9 
     10 class _LoopingExtension:
     11     def setup_looping_task(self, task, crawler, interval):
     12         self._interval = interval
     13         self._task = LoopingCall(task)
     14         crawler.signals.connect(self.spider_opened, signal=signals.spider_opened)
     15         crawler.signals.connect(self.spider_closed, signal=signals.spider_closed)
     16 
     17     def spider_opened(self):
     18         self._task.start(self._interval, now=False)
     19 
     20     def spider_closed(self):
     21         if self._task.running:
     22             self._task.stop()
     23 
     24 
     25 class MonitorDownloadsExtension(_LoopingExtension):
     26     """
     27     Enable this extension to periodically log a number of active downloads.
     28     """
     29 
     30     def __init__(self, crawler, interval):
     31         self.crawler = crawler
     32         self.setup_looping_task(self.monitor, crawler, interval)
     33 
     34     @classmethod
     35     def from_crawler(cls, crawler):
     36         # fixme: 0 should mean NotConfigured
     37         interval = crawler.settings.getfloat("MONITOR_DOWNLOADS_INTERVAL", 10.0)
     38         return cls(crawler, interval)
     39 
     40     def monitor(self):
     41         active_downloads = len(self.crawler.engine.downloader.active)
     42         logger.info("Active downloads: {}".format(active_downloads))
     43 
     44 
     45 class DumpStatsExtension(_LoopingExtension):
     46     """
     47     Enable this extension to log Scrapy stats periodically, not only
     48     at the end of the crawl.
     49     """
     50 
     51     def __init__(self, crawler, interval):
     52         self.stats = crawler.stats
     53         self.setup_looping_task(self.print_stats, crawler, interval)
     54 
     55     def print_stats(self):
     56         stats = self.stats.get_stats()
     57         logger.info("Scrapy stats:\n" + pprint.pformat(stats))
     58 
     59     @classmethod
     60     def from_crawler(cls, crawler):
     61         interval = crawler.settings.getfloat("DUMP_STATS_INTERVAL", 60.0)
     62         # fixme: 0 should mean NotConfigured
     63         return cls(crawler, interval)