ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

scrape_test.py

(2106B)


      1 #!/usr/bin/env python
      2 # -*- coding: utf-8 -*-
      3 __author__ = 'Stefan Jansen'
      4 from bs4 import BeautifulSoup
      5 from pathlib import Path
      6 from selenium import webdriver
      7 from selenium.webdriver.support import expected_conditions
      8 from selenium.webdriver.common.by import By
      9 from selenium.webdriver.support.ui import WebDriverWait
     10 from time import sleep
     11 import pickle
     12 import requests
     13 from lxml import html
     14 
     15 from os import environ
     16 
     17 EMAIL = environ['SEEKING_ALPHA_USER']
     18 PASS = environ['SEEKING_ALPHA_PWD']
     19 
     20 driver = webdriver.Chrome()
     21 url = 'http://seekingalpha.com/account/login'
     22 # url = 'https://seekingalpha.com/'
     23 driver.get(url)
     24 
     25 driver.find_element_by_id("sign-in").click()
     26 # box = 'alphabox-modal-window'
     27 
     28 sleep(1)
     29 
     30 try:
     31     email = driver.find_element_by_id("authentication_login_email")
     32     email.send_keys(EMAIL)
     33 except Exception as e:
     34     print(e)
     35 
     36 try:
     37     password = driver.find_element_by_id('authentication_login_password')
     38     password.send_keys(PASS)
     39 except Exception as e:
     40     print(e)
     41 
     42 try:
     43     driver.find_element_by_xpath("//input[@value='Sign in' and @class='c']").click()
     44     # WebDriverWait(driver, 10).until(expected_conditions.title_contains("home"))
     45 
     46 except Exception as e:
     47     print(e)
     48 # html = driver.page_source
     49 sleep(10)
     50 cookies = driver.get_cookies()
     51 
     52 pickle.dump(cookies, open('SA_cookies.pkl', 'wb'))
     53 driver.close()
     54 
     55 # exit()
     56 # WebDriverWait(driver, 10).until(expected_conditions.title_contains("home"))
     57 
     58 
     59 sessionRequests = requests.Session()
     60 
     61 
     62 # This is the form data that the page sends when logging in
     63 loginData = {
     64     'slugs[]'              : None,
     65     'rt'                   : None,
     66     'user[url_source]'     : None,
     67     'user[location_source]': 'orthodox_login',
     68     'user[email]'          : keys['username'],
     69     'user[password]'       : keys['password'],
     70 
     71 }
     72 # Authenticate
     73 r = sessionRequests.post(loginUrl, data=loginData, headers={"Referer"   : "http://seekingalpha.com/",
     74                                                             "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"})