ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
settings.py

(3254B)
      1 # -*- coding: utf-8 -*-
      2 
      3 BOT_NAME = 'opentable'
      4 
      5 SPIDER_MODULES = ['opentable.spiders']
      6 NEWSPIDER_MODULE = 'opentable.spiders'
      7 
      8 SPLASH_URL = 'http://localhost:8050/'
      9 
     10 DOWNLOADER_MIDDLEWARES = {
     11     'scrapy_splash.SplashCookiesMiddleware'                                 : 723,
     12     'scrapy_splash.SplashMiddleware'                                        : 725,
     13     'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
     14     # 'scrapy.downloadermiddleware.useragent.UserAgentMiddleware'             : None,
     15     'random_useragent.RandomUserAgentMiddleware'                            : 400
     16 }
     17 
     18 SPIDER_MIDDLEWARES = {
     19     'opentable.middlewares.OpentableSpiderMiddleware': 543,
     20     'scrapy_splash.SplashDeduplicateArgsMiddleware'  : 100,
     21 }
     22 
     23 DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'
     24 
     25 HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'
     26 
     27 # Obey robots.txt rules
     28 ROBOTSTXT_OBEY = True
     29 
     30 # Configure maximum concurrent requests performed by Scrapy (default: 16)
     31 # CONCURRENT_REQUESTS = 32
     32 
     33 # Configure a delay for requests for the same website (default: 0)
     34 # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
     35 # See also autothrottle settings and docs
     36 # DOWNLOAD_DELAY = 3
     37 # The download delay setting will honor only one of:
     38 # CONCURRENT_REQUESTS_PER_DOMAIN = 16
     39 # CONCURRENT_REQUESTS_PER_IP = 16
     40 
     41 # Disable cookies (enabled by default)
     42 # COOKIES_ENABLED = False
     43 
     44 # Disable Telnet Console (enabled by default)
     45 # TELNETCONSOLE_ENABLED = False
     46 
     47 # Override the default request headers:
     48 # DEFAULT_REQUEST_HEADERS = {
     49 #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
     50 #   'Accept-Language': 'en',
     51 # }
     52 
     53 CONCURRENT_REQUESTS = 1
     54 # Enable or disable extensions
     55 # See https://doc.scrapy.org/en/latest/topics/extensions.html
     56 # EXTENSIONS = {
     57 #    'scrapy.extensions.telnet.TelnetConsole': None,
     58 # }
     59 
     60 # Configure item pipelines
     61 # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
     62 # ITEM_PIPELINES = {
     63 #    'opentable.pipelines.OpentablePipeline': 300,
     64 # }
     65 
     66 # Enable and configure the AutoThrottle extension (disabled by default)
     67 # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
     68 # AUTOTHROTTLE_ENABLED = True
     69 # The initial download delay
     70 # AUTOTHROTTLE_START_DELAY = 1
     71 # The maximum download delay to be set in case of high latencies
     72 # AUTOTHROTTLE_MAX_DELAY = 10
     73 # The average number of requests Scrapy should be sending in parallel to
     74 # each remote server
     75 # AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
     76 # Enable showing throttling stats for every response received:
     77 AUTOTHROTTLE_DEBUG = True
     78 
     79 # Enable and configure HTTP caching (disabled by default)
     80 # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
     81 # HTTPCACHE_ENABLED = True
     82 # HTTPCACHE_EXPIRATION_SECS = 0
     83 # HTTPCACHE_DIR = 'httpcache'
     84 # HTTPCACHE_IGNORE_HTTP_CODES = []
     85 # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
     86 
     87 # EXTENSIONS = {
     88 #     'opentable.extensions.MonitorDownloadsExtension': 100,
     89 #     'opentable.extensions.DumpStatsExtension'       : 101,
     90 #     'scrapy.extensions.logstats.LogStats'           : 500,
     91 # }
     92 
     93 LOG_LEVEL = 'DEBUG'
     94 LOG_FILE = 'spider.log'
     95 
     96 USER_AGENT_LIST = "opentable/user_agents.txt"