ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
settings.py
(3254B)
1 # -*- coding: utf-8 -*-
2
3 BOT_NAME = 'opentable'
4
5 SPIDER_MODULES = ['opentable.spiders']
6 NEWSPIDER_MODULE = 'opentable.spiders'
7
8 SPLASH_URL = 'http://localhost:8050/'
9
10 DOWNLOADER_MIDDLEWARES = {
11 'scrapy_splash.SplashCookiesMiddleware' : 723,
12 'scrapy_splash.SplashMiddleware' : 725,
13 'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
14 # 'scrapy.downloadermiddleware.useragent.UserAgentMiddleware' : None,
15 'random_useragent.RandomUserAgentMiddleware' : 400
16 }
17
18 SPIDER_MIDDLEWARES = {
19 'opentable.middlewares.OpentableSpiderMiddleware': 543,
20 'scrapy_splash.SplashDeduplicateArgsMiddleware' : 100,
21 }
22
23 DUPEFILTER_CLASS = 'scrapy_splash.SplashAwareDupeFilter'
24
25 HTTPCACHE_STORAGE = 'scrapy_splash.SplashAwareFSCacheStorage'
26
27 # Obey robots.txt rules
28 ROBOTSTXT_OBEY = True
29
30 # Configure maximum concurrent requests performed by Scrapy (default: 16)
31 # CONCURRENT_REQUESTS = 32
32
33 # Configure a delay for requests for the same website (default: 0)
34 # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
35 # See also autothrottle settings and docs
36 # DOWNLOAD_DELAY = 3
37 # The download delay setting will honor only one of:
38 # CONCURRENT_REQUESTS_PER_DOMAIN = 16
39 # CONCURRENT_REQUESTS_PER_IP = 16
40
41 # Disable cookies (enabled by default)
42 # COOKIES_ENABLED = False
43
44 # Disable Telnet Console (enabled by default)
45 # TELNETCONSOLE_ENABLED = False
46
47 # Override the default request headers:
48 # DEFAULT_REQUEST_HEADERS = {
49 # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
50 # 'Accept-Language': 'en',
51 # }
52
53 CONCURRENT_REQUESTS = 1
54 # Enable or disable extensions
55 # See https://doc.scrapy.org/en/latest/topics/extensions.html
56 # EXTENSIONS = {
57 # 'scrapy.extensions.telnet.TelnetConsole': None,
58 # }
59
60 # Configure item pipelines
61 # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
62 # ITEM_PIPELINES = {
63 # 'opentable.pipelines.OpentablePipeline': 300,
64 # }
65
66 # Enable and configure the AutoThrottle extension (disabled by default)
67 # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
68 # AUTOTHROTTLE_ENABLED = True
69 # The initial download delay
70 # AUTOTHROTTLE_START_DELAY = 1
71 # The maximum download delay to be set in case of high latencies
72 # AUTOTHROTTLE_MAX_DELAY = 10
73 # The average number of requests Scrapy should be sending in parallel to
74 # each remote server
75 # AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
76 # Enable showing throttling stats for every response received:
77 AUTOTHROTTLE_DEBUG = True
78
79 # Enable and configure HTTP caching (disabled by default)
80 # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
81 # HTTPCACHE_ENABLED = True
82 # HTTPCACHE_EXPIRATION_SECS = 0
83 # HTTPCACHE_DIR = 'httpcache'
84 # HTTPCACHE_IGNORE_HTTP_CODES = []
85 # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
86
87 # EXTENSIONS = {
88 # 'opentable.extensions.MonitorDownloadsExtension': 100,
89 # 'opentable.extensions.DumpStatsExtension' : 101,
90 # 'scrapy.extensions.logstats.LogStats' : 500,
91 # }
92
93 LOG_LEVEL = 'DEBUG'
94 LOG_FILE = 'spider.log'
95
96 USER_AGENT_LIST = "opentable/user_agents.txt"