ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
scrape_test.py
(2106B)
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 __author__ = 'Stefan Jansen'
4 from bs4 import BeautifulSoup
5 from pathlib import Path
6 from selenium import webdriver
7 from selenium.webdriver.support import expected_conditions
8 from selenium.webdriver.common.by import By
9 from selenium.webdriver.support.ui import WebDriverWait
10 from time import sleep
11 import pickle
12 import requests
13 from lxml import html
14
15 from os import environ
16
17 EMAIL = environ['SEEKING_ALPHA_USER']
18 PASS = environ['SEEKING_ALPHA_PWD']
19
20 driver = webdriver.Chrome()
21 url = 'http://seekingalpha.com/account/login'
22 # url = 'https://seekingalpha.com/'
23 driver.get(url)
24
25 driver.find_element_by_id("sign-in").click()
26 # box = 'alphabox-modal-window'
27
28 sleep(1)
29
30 try:
31 email = driver.find_element_by_id("authentication_login_email")
32 email.send_keys(EMAIL)
33 except Exception as e:
34 print(e)
35
36 try:
37 password = driver.find_element_by_id('authentication_login_password')
38 password.send_keys(PASS)
39 except Exception as e:
40 print(e)
41
42 try:
43 driver.find_element_by_xpath("//input[@value='Sign in' and @class='c']").click()
44 # WebDriverWait(driver, 10).until(expected_conditions.title_contains("home"))
45
46 except Exception as e:
47 print(e)
48 # html = driver.page_source
49 sleep(10)
50 cookies = driver.get_cookies()
51
52 pickle.dump(cookies, open('SA_cookies.pkl', 'wb'))
53 driver.close()
54
55 # exit()
56 # WebDriverWait(driver, 10).until(expected_conditions.title_contains("home"))
57
58
59 sessionRequests = requests.Session()
60
61
62 # This is the form data that the page sends when logging in
63 loginData = {
64 'slugs[]' : None,
65 'rt' : None,
66 'user[url_source]' : None,
67 'user[location_source]': 'orthodox_login',
68 'user[email]' : keys['username'],
69 'user[password]' : keys['password'],
70
71 }
72 # Authenticate
73 r = sessionRequests.post(loginUrl, data=loginData, headers={"Referer" : "http://seekingalpha.com/",
74 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"})