ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

opentable_selenium.py

(1652B)


      1 # coding: utf-8
      2 
      3 
      4 import re
      5 from time import sleep
      6 import pandas as pd
      7 from bs4 import BeautifulSoup
      8 from selenium import webdriver
      9 
     10 
     11 def parse_html(html):
     12     data, item = pd.DataFrame(), {}
     13     soup = BeautifulSoup(html, 'lxml')
     14     for i, resto in enumerate(soup.find_all('div', class_='rest-row-info')):
     15         item['name'] = resto.find('span', class_='rest-row-name-text').text
     16 
     17         booking = resto.find('div', class_='booking')
     18         item['bookings'] = re.search('\d+', booking.text).group() if booking else 'NA'
     19 
     20         rating = resto.select('div.all-stars.filled')
     21         item['rating'] = int(re.search('\d+', rating[0].get('style')).group()) if rating else 'NA'
     22 
     23         reviews = resto.find('span', class_='star-rating-text--review-text')
     24         item['reviews'] = int(re.search('\d+', reviews.text).group()) if reviews else 'NA'
     25 
     26         item['price'] = int(resto.find('div', class_='rest-row-pricing').find('i').text.count('$'))
     27         item['cuisine'] = resto.find('span', class_='rest-row-meta--cuisine').text
     28         item['location'] = resto.find('span', class_='rest-row-meta--location').text
     29         data[i] = pd.Series(item)
     30     return data.T
     31 
     32 
     33 restaurants = pd.DataFrame()
     34 driver = webdriver.Firefox()
     35 url = "https://www.opentable.com/new-york-restaurant-listings"
     36 driver.get(url)
     37 while True:
     38     sleep(1)
     39     new_data = parse_html(driver.page_source)
     40     if new_data.empty:
     41         break
     42     restaurants = pd.concat([restaurants, new_data], ignore_index=True)
     43     print(len(restaurants))
     44     driver.find_element_by_link_text('Next').click()
     45 
     46 driver.close()
     47 restaurants.to_csv('results.csv', index=False)
     48 print(restaurants)