ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
opentable_selenium.py
(1652B)
1 # coding: utf-8
2
3
4 import re
5 from time import sleep
6 import pandas as pd
7 from bs4 import BeautifulSoup
8 from selenium import webdriver
9
10
11 def parse_html(html):
12 data, item = pd.DataFrame(), {}
13 soup = BeautifulSoup(html, 'lxml')
14 for i, resto in enumerate(soup.find_all('div', class_='rest-row-info')):
15 item['name'] = resto.find('span', class_='rest-row-name-text').text
16
17 booking = resto.find('div', class_='booking')
18 item['bookings'] = re.search('\d+', booking.text).group() if booking else 'NA'
19
20 rating = resto.select('div.all-stars.filled')
21 item['rating'] = int(re.search('\d+', rating[0].get('style')).group()) if rating else 'NA'
22
23 reviews = resto.find('span', class_='star-rating-text--review-text')
24 item['reviews'] = int(re.search('\d+', reviews.text).group()) if reviews else 'NA'
25
26 item['price'] = int(resto.find('div', class_='rest-row-pricing').find('i').text.count('$'))
27 item['cuisine'] = resto.find('span', class_='rest-row-meta--cuisine').text
28 item['location'] = resto.find('span', class_='rest-row-meta--location').text
29 data[i] = pd.Series(item)
30 return data.T
31
32
33 restaurants = pd.DataFrame()
34 driver = webdriver.Firefox()
35 url = "https://www.opentable.com/new-york-restaurant-listings"
36 driver.get(url)
37 while True:
38 sleep(1)
39 new_data = parse_html(driver.page_source)
40 if new_data.empty:
41 break
42 restaurants = pd.concat([restaurants, new_data], ignore_index=True)
43 print(len(restaurants))
44 driver.find_element_by_link_text('Next').click()
45
46 driver.close()
47 restaurants.to_csv('results.csv', index=False)
48 print(restaurants)