ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

table_spider.py

(1360B)


      1 #!/usr/bin/env python
      2 # -*- coding: utf-8 -*-
      3 __author__ = 'Stefan Jansen'
      4 
      5 from opentable.items import OpentableItem
      6 from scrapy import Spider
      7 from scrapy_splash import SplashRequest
      8 
      9 
     10 class OpenTableSpider(Spider):
     11     name = 'opentable'
     12     start_urls = ['https://www.opentable.com/new-york-restaurant-listings']
     13 
     14     def start_requests(self):
     15         for url in self.start_urls:
     16             yield SplashRequest(url=url,
     17                                 callback=self.parse,
     18                                 endpoint='render.html',
     19                                 args={'wait': 1},
     20                                 )
     21 
     22     def parse(self, response):
     23         item = OpentableItem()
     24         for resto in response.css('div.rest-row-info'):
     25             item['name'] = resto.css('span.rest-row-name-text::text').extract()
     26             item['bookings'] = resto.css('div.booking::text').re(r'\d+')
     27             item['rating'] = resto.css('div.all-stars::attr(style)').re_first('\d+')
     28             item['reviews'] = resto.css('span.star-rating-text--review-text::text').re_first(r'\d+')
     29             item['price'] = len(resto.css('div.rest-row-pricing > i::text').re('\$'))
     30             item['cuisine'] = resto.css('span.rest-row-meta--cuisine::text').extract()
     31             item['location'] = resto.css('span.rest-row-meta--location::text').extract()
     32             yield item