ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
table_spider.py
(1360B)
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 __author__ = 'Stefan Jansen'
4
5 from opentable.items import OpentableItem
6 from scrapy import Spider
7 from scrapy_splash import SplashRequest
8
9
10 class OpenTableSpider(Spider):
11 name = 'opentable'
12 start_urls = ['https://www.opentable.com/new-york-restaurant-listings']
13
14 def start_requests(self):
15 for url in self.start_urls:
16 yield SplashRequest(url=url,
17 callback=self.parse,
18 endpoint='render.html',
19 args={'wait': 1},
20 )
21
22 def parse(self, response):
23 item = OpentableItem()
24 for resto in response.css('div.rest-row-info'):
25 item['name'] = resto.css('span.rest-row-name-text::text').extract()
26 item['bookings'] = resto.css('div.booking::text').re(r'\d+')
27 item['rating'] = resto.css('div.all-stars::attr(style)').re_first('\d+')
28 item['reviews'] = resto.css('span.star-rating-text--review-text::text').re_first(r'\d+')
29 item['price'] = len(resto.css('div.rest-row-pricing > i::text').re('\$'))
30 item['cuisine'] = resto.css('span.rest-row-meta--cuisine::text').extract()
31 item['location'] = resto.css('span.rest-row-meta--location::text').extract()
32 yield item