ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

01_datareader.ipynb

(88628B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "# Remote data access using pandas"
      8    ]
      9   },
     10   {
     11    "cell_type": "markdown",
     12    "metadata": {},
     13    "source": [
     14     "The pandas library enables access to data displayed on websites using the `read_html()` function and access to the API endpoints of various data providers through the related `pandas-datareader` library."
     15    ]
     16   },
     17   {
     18    "cell_type": "code",
     19    "execution_count": 1,
     20    "metadata": {
     21     "ExecuteTime": {
     22      "end_time": "2018-12-25T19:14:54.455404Z",
     23      "start_time": "2018-12-25T19:14:54.447513Z"
     24     }
     25    },
     26    "outputs": [],
     27    "source": [
     28     "import os\n",
     29     "import pandas_datareader.data as web\n",
     30     "from datetime import datetime\n",
     31     "from pprint import pprint"
     32    ]
     33   },
     34   {
     35    "cell_type": "markdown",
     36    "metadata": {},
     37    "source": [
     38     "## Download html table with SP500 constituents"
     39    ]
     40   },
     41   {
     42    "cell_type": "markdown",
     43    "metadata": {},
     44    "source": [
     45     "The download of the content of one or more html tables works as follows, for instance for the constituents of the S&P500 index from Wikipedia"
     46    ]
     47   },
     48   {
     49    "cell_type": "code",
     50    "execution_count": 2,
     51    "metadata": {
     52     "ExecuteTime": {
     53      "end_time": "2018-12-25T19:14:55.680031Z",
     54      "start_time": "2018-12-25T19:14:54.460128Z"
     55     }
     56    },
     57    "outputs": [],
     58    "source": [
     59     "sp_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'\n",
     60     "sp500_constituents = pd.read_html(sp_url, header=0)[0]"
     61    ]
     62   },
     63   {
     64    "cell_type": "code",
     65    "execution_count": 3,
     66    "metadata": {
     67     "ExecuteTime": {
     68      "end_time": "2018-12-25T19:14:55.685673Z",
     69      "start_time": "2018-12-25T19:14:55.681237Z"
     70     },
     71     "scrolled": true
     72    },
     73    "outputs": [
     74     {
     75      "name": "stdout",
     76      "output_type": "stream",
     77      "text": [
     78       "<class 'pandas.core.frame.DataFrame'>\n",
     79       "RangeIndex: 505 entries, 0 to 504\n",
     80       "Data columns (total 9 columns):\n",
     81       "Security                 505 non-null object\n",
     82       "Symbol                   505 non-null object\n",
     83       "SEC filings              505 non-null object\n",
     84       "GICS Sector              505 non-null object\n",
     85       "GICS Sub Industry        505 non-null object\n",
     86       "Headquarters Location    505 non-null object\n",
     87       "Date first added         402 non-null object\n",
     88       "CIK                      505 non-null int64\n",
     89       "Founded                  172 non-null object\n",
     90       "dtypes: int64(1), object(8)\n",
     91       "memory usage: 35.6+ KB\n"
     92      ]
     93     }
     94    ],
     95    "source": [
     96     "sp500_constituents.info()"
     97    ]
     98   },
     99   {
    100    "cell_type": "code",
    101    "execution_count": 4,
    102    "metadata": {
    103     "ExecuteTime": {
    104      "end_time": "2018-12-25T19:14:55.702503Z",
    105      "start_time": "2018-12-25T19:14:55.686865Z"
    106     }
    107    },
    108    "outputs": [
    109     {
    110      "data": {
    111       "text/html": [
    112        "<div>\n",
    113        "<style scoped>\n",
    114        "    .dataframe tbody tr th:only-of-type {\n",
    115        "        vertical-align: middle;\n",
    116        "    }\n",
    117        "\n",
    118        "    .dataframe tbody tr th {\n",
    119        "        vertical-align: top;\n",
    120        "    }\n",
    121        "\n",
    122        "    .dataframe thead th {\n",
    123        "        text-align: right;\n",
    124        "    }\n",
    125        "</style>\n",
    126        "<table border=\"1\" class=\"dataframe\">\n",
    127        "  <thead>\n",
    128        "    <tr style=\"text-align: right;\">\n",
    129        "      <th></th>\n",
    130        "      <th>Security</th>\n",
    131        "      <th>Symbol</th>\n",
    132        "      <th>SEC filings</th>\n",
    133        "      <th>GICS Sector</th>\n",
    134        "      <th>GICS Sub Industry</th>\n",
    135        "      <th>Headquarters Location</th>\n",
    136        "      <th>Date first added</th>\n",
    137        "      <th>CIK</th>\n",
    138        "      <th>Founded</th>\n",
    139        "    </tr>\n",
    140        "  </thead>\n",
    141        "  <tbody>\n",
    142        "    <tr>\n",
    143        "      <th>0</th>\n",
    144        "      <td>3M Company</td>\n",
    145        "      <td>MMM</td>\n",
    146        "      <td>reports</td>\n",
    147        "      <td>Industrials</td>\n",
    148        "      <td>Industrial Conglomerates</td>\n",
    149        "      <td>St. Paul, Minnesota</td>\n",
    150        "      <td>NaN</td>\n",
    151        "      <td>66740</td>\n",
    152        "      <td>1902</td>\n",
    153        "    </tr>\n",
    154        "    <tr>\n",
    155        "      <th>1</th>\n",
    156        "      <td>Abbott Laboratories</td>\n",
    157        "      <td>ABT</td>\n",
    158        "      <td>reports</td>\n",
    159        "      <td>Health Care</td>\n",
    160        "      <td>Health Care Equipment</td>\n",
    161        "      <td>North Chicago, Illinois</td>\n",
    162        "      <td>1964-03-31</td>\n",
    163        "      <td>1800</td>\n",
    164        "      <td>1888</td>\n",
    165        "    </tr>\n",
    166        "    <tr>\n",
    167        "      <th>2</th>\n",
    168        "      <td>AbbVie Inc.</td>\n",
    169        "      <td>ABBV</td>\n",
    170        "      <td>reports</td>\n",
    171        "      <td>Health Care</td>\n",
    172        "      <td>Pharmaceuticals</td>\n",
    173        "      <td>North Chicago, Illinois</td>\n",
    174        "      <td>2012-12-31</td>\n",
    175        "      <td>1551152</td>\n",
    176        "      <td>2013 (1888)</td>\n",
    177        "    </tr>\n",
    178        "    <tr>\n",
    179        "      <th>3</th>\n",
    180        "      <td>ABIOMED Inc</td>\n",
    181        "      <td>ABMD</td>\n",
    182        "      <td>reports</td>\n",
    183        "      <td>Health Care</td>\n",
    184        "      <td>Health Care Equipment</td>\n",
    185        "      <td>Danvers, Massachusetts</td>\n",
    186        "      <td>2018-05-31</td>\n",
    187        "      <td>815094</td>\n",
    188        "      <td>1981</td>\n",
    189        "    </tr>\n",
    190        "    <tr>\n",
    191        "      <th>4</th>\n",
    192        "      <td>Accenture plc</td>\n",
    193        "      <td>ACN</td>\n",
    194        "      <td>reports</td>\n",
    195        "      <td>Information Technology</td>\n",
    196        "      <td>IT Consulting &amp; Other Services</td>\n",
    197        "      <td>Dublin, Ireland</td>\n",
    198        "      <td>2011-07-06</td>\n",
    199        "      <td>1467373</td>\n",
    200        "      <td>1989</td>\n",
    201        "    </tr>\n",
    202        "  </tbody>\n",
    203        "</table>\n",
    204        "</div>"
    205       ],
    206       "text/plain": [
    207        "              Security Symbol SEC filings             GICS Sector  \\\n",
    208        "0           3M Company    MMM     reports             Industrials   \n",
    209        "1  Abbott Laboratories    ABT     reports             Health Care   \n",
    210        "2          AbbVie Inc.   ABBV     reports             Health Care   \n",
    211        "3          ABIOMED Inc   ABMD     reports             Health Care   \n",
    212        "4        Accenture plc    ACN     reports  Information Technology   \n",
    213        "\n",
    214        "                GICS Sub Industry    Headquarters Location Date first added  \\\n",
    215        "0        Industrial Conglomerates      St. Paul, Minnesota              NaN   \n",
    216        "1           Health Care Equipment  North Chicago, Illinois       1964-03-31   \n",
    217        "2                 Pharmaceuticals  North Chicago, Illinois       2012-12-31   \n",
    218        "3           Health Care Equipment   Danvers, Massachusetts       2018-05-31   \n",
    219        "4  IT Consulting & Other Services          Dublin, Ireland       2011-07-06   \n",
    220        "\n",
    221        "       CIK      Founded  \n",
    222        "0    66740         1902  \n",
    223        "1     1800         1888  \n",
    224        "2  1551152  2013 (1888)  \n",
    225        "3   815094         1981  \n",
    226        "4  1467373         1989  "
    227       ]
    228      },
    229      "execution_count": 4,
    230      "metadata": {},
    231      "output_type": "execute_result"
    232     }
    233    ],
    234    "source": [
    235     "sp500_constituents.head()"
    236    ]
    237   },
    238   {
    239    "cell_type": "markdown",
    240    "metadata": {},
    241    "source": [
    242     "## pandas-datareader for Market Data"
    243    ]
    244   },
    245   {
    246    "cell_type": "markdown",
    247    "metadata": {},
    248    "source": [
    249     "`pandas` used to facilitate access to data providers' APIs directly, but this functionality has moved to the related pandas-datareader library. The stability of the APIs varies with provider policies, and as of June 2o18 at version 0.7, the following sources are available"
    250    ]
    251   },
    252   {
    253    "cell_type": "markdown",
    254    "metadata": {},
    255    "source": [
    256     "See [documentation](https://pandas-datareader.readthedocs.io/en/latest/); functionality frequently changes as underlying provider APIs evolve."
    257    ]
    258   },
    259   {
    260    "cell_type": "markdown",
    261    "metadata": {},
    262    "source": [
    263     "### Yahoo Finance"
    264    ]
    265   },
    266   {
    267    "cell_type": "code",
    268    "execution_count": 5,
    269    "metadata": {
    270     "ExecuteTime": {
    271      "end_time": "2018-12-25T19:14:56.812409Z",
    272      "start_time": "2018-12-25T19:14:55.703363Z"
    273     }
    274    },
    275    "outputs": [
    276     {
    277      "name": "stdout",
    278      "output_type": "stream",
    279      "text": [
    280       "<class 'pandas.core.frame.DataFrame'>\n",
    281       "DatetimeIndex: 856 entries, 2014-01-02 to 2017-05-25\n",
    282       "Data columns (total 6 columns):\n",
    283       "High         856 non-null float64\n",
    284       "Low          856 non-null float64\n",
    285       "Open         856 non-null float64\n",
    286       "Close        856 non-null float64\n",
    287       "Volume       856 non-null int64\n",
    288       "Adj Close    856 non-null float64\n",
    289       "dtypes: float64(5), int64(1)\n",
    290       "memory usage: 46.8 KB\n"
    291      ]
    292     }
    293    ],
    294    "source": [
    295     "start = '2014'\n",
    296     "end = datetime(2017, 5, 24)\n",
    297     "\n",
    298     "yahoo= web.DataReader('FB', 'yahoo', start=start, end=end)\n",
    299     "yahoo.info()"
    300    ]
    301   },
    302   {
    303    "cell_type": "markdown",
    304    "metadata": {},
    305    "source": [
    306     "### IEX"
    307    ]
    308   },
    309   {
    310    "cell_type": "markdown",
    311    "metadata": {},
    312    "source": [
    313     "IEX is an alternative exchange started in response to the HFT controversy and portrayed in Michael Lewis' controversial Flash Boys. It aims to slow down the speed of trading to create a more level playing field and has been growing rapidly since launch in 2016 while still small with a market share of around 2.5% in June 2018."
    314    ]
    315   },
    316   {
    317    "cell_type": "code",
    318    "execution_count": 6,
    319    "metadata": {
    320     "ExecuteTime": {
    321      "end_time": "2018-12-25T19:14:57.093439Z",
    322      "start_time": "2018-12-25T19:14:56.813719Z"
    323     }
    324    },
    325    "outputs": [
    326     {
    327      "name": "stdout",
    328      "output_type": "stream",
    329      "text": [
    330       "<class 'pandas.core.frame.DataFrame'>\n",
    331       "Index: 1054 entries, 2015-02-09 to 2019-04-16\n",
    332       "Data columns (total 5 columns):\n",
    333       "open      1054 non-null float64\n",
    334       "high      1054 non-null float64\n",
    335       "low       1054 non-null float64\n",
    336       "close     1054 non-null float64\n",
    337       "volume    1054 non-null int64\n",
    338       "dtypes: float64(4), int64(1)\n",
    339       "memory usage: 49.4+ KB\n"
    340      ]
    341     }
    342    ],
    343    "source": [
    344     "start = datetime(2015, 2, 9)\n",
    345     "# end = datetime(2017, 5, 24)\n",
    346     "\n",
    347     "iex = web.DataReader('FB', 'iex', start)\n",
    348     "iex.info()"
    349    ]
    350   },
    351   {
    352    "cell_type": "code",
    353    "execution_count": 7,
    354    "metadata": {
    355     "ExecuteTime": {
    356      "end_time": "2018-12-25T19:14:57.108760Z",
    357      "start_time": "2018-12-25T19:14:57.095941Z"
    358     }
    359    },
    360    "outputs": [
    361     {
    362      "data": {
    363       "text/html": [
    364        "<div>\n",
    365        "<style scoped>\n",
    366        "    .dataframe tbody tr th:only-of-type {\n",
    367        "        vertical-align: middle;\n",
    368        "    }\n",
    369        "\n",
    370        "    .dataframe tbody tr th {\n",
    371        "        vertical-align: top;\n",
    372        "    }\n",
    373        "\n",
    374        "    .dataframe thead th {\n",
    375        "        text-align: right;\n",
    376        "    }\n",
    377        "</style>\n",
    378        "<table border=\"1\" class=\"dataframe\">\n",
    379        "  <thead>\n",
    380        "    <tr style=\"text-align: right;\">\n",
    381        "      <th></th>\n",
    382        "      <th>open</th>\n",
    383        "      <th>high</th>\n",
    384        "      <th>low</th>\n",
    385        "      <th>close</th>\n",
    386        "      <th>volume</th>\n",
    387        "    </tr>\n",
    388        "    <tr>\n",
    389        "      <th>date</th>\n",
    390        "      <th></th>\n",
    391        "      <th></th>\n",
    392        "      <th></th>\n",
    393        "      <th></th>\n",
    394        "      <th></th>\n",
    395        "    </tr>\n",
    396        "  </thead>\n",
    397        "  <tbody>\n",
    398        "    <tr>\n",
    399        "      <th>2019-04-10</th>\n",
    400        "      <td>178.18</td>\n",
    401        "      <td>178.79</td>\n",
    402        "      <td>176.54</td>\n",
    403        "      <td>177.82</td>\n",
    404        "      <td>11701479</td>\n",
    405        "    </tr>\n",
    406        "    <tr>\n",
    407        "      <th>2019-04-11</th>\n",
    408        "      <td>178.24</td>\n",
    409        "      <td>178.40</td>\n",
    410        "      <td>177.00</td>\n",
    411        "      <td>177.51</td>\n",
    412        "      <td>8070967</td>\n",
    413        "    </tr>\n",
    414        "    <tr>\n",
    415        "      <th>2019-04-12</th>\n",
    416        "      <td>178.00</td>\n",
    417        "      <td>179.63</td>\n",
    418        "      <td>177.95</td>\n",
    419        "      <td>179.10</td>\n",
    420        "      <td>12329812</td>\n",
    421        "    </tr>\n",
    422        "    <tr>\n",
    423        "      <th>2019-04-15</th>\n",
    424        "      <td>178.50</td>\n",
    425        "      <td>180.50</td>\n",
    426        "      <td>176.87</td>\n",
    427        "      <td>179.65</td>\n",
    428        "      <td>10834762</td>\n",
    429        "    </tr>\n",
    430        "    <tr>\n",
    431        "      <th>2019-04-16</th>\n",
    432        "      <td>179.00</td>\n",
    433        "      <td>180.17</td>\n",
    434        "      <td>178.30</td>\n",
    435        "      <td>178.87</td>\n",
    436        "      <td>11215193</td>\n",
    437        "    </tr>\n",
    438        "  </tbody>\n",
    439        "</table>\n",
    440        "</div>"
    441       ],
    442       "text/plain": [
    443        "              open    high     low   close    volume\n",
    444        "date                                                \n",
    445        "2019-04-10  178.18  178.79  176.54  177.82  11701479\n",
    446        "2019-04-11  178.24  178.40  177.00  177.51   8070967\n",
    447        "2019-04-12  178.00  179.63  177.95  179.10  12329812\n",
    448        "2019-04-15  178.50  180.50  176.87  179.65  10834762\n",
    449        "2019-04-16  179.00  180.17  178.30  178.87  11215193"
    450       ]
    451      },
    452      "execution_count": 7,
    453      "metadata": {},
    454      "output_type": "execute_result"
    455     }
    456    ],
    457    "source": [
    458     "iex.tail()"
    459    ]
    460   },
    461   {
    462    "cell_type": "markdown",
    463    "metadata": {},
    464    "source": [
    465     "#### Book Data\n",
    466     "\n",
    467     "In addition to historical EOD price and volume data, IEX provides real-time depth of book quotations that offer an aggregated size of orders by price and side. This service also includes last trade price and size information.\n",
    468     "\n",
    469     "DEEP is used to receive real-time depth of book quotations direct from IEX. The depth of book quotations received via DEEP provide an aggregated size of resting displayed orders at a price and side, and do not indicate the size or number of individual orders at any price level. Non-displayed orders and non-displayed portions of reserve orders are not represented in DEEP.\n",
    470     "\n",
    471     "DEEP also provides last trade price and size information. Trades resulting from either displayed or non-displayed orders matching on IEX will be reported. Routed executions will not be reported."
    472    ]
    473   },
    474   {
    475    "cell_type": "markdown",
    476    "metadata": {},
    477    "source": [
    478     "Only works on trading days."
    479    ]
    480   },
    481   {
    482    "cell_type": "code",
    483    "execution_count": 8,
    484    "metadata": {
    485     "ExecuteTime": {
    486      "end_time": "2018-12-25T19:14:57.278095Z",
    487      "start_time": "2018-12-25T19:14:57.110743Z"
    488     }
    489    },
    490    "outputs": [],
    491    "source": [
    492     "book = web.get_iex_book('AAPL')"
    493    ]
    494   },
    495   {
    496    "cell_type": "code",
    497    "execution_count": 9,
    498    "metadata": {
    499     "ExecuteTime": {
    500      "end_time": "2018-12-25T19:14:57.302122Z",
    501      "start_time": "2018-12-25T19:14:57.289530Z"
    502     }
    503    },
    504    "outputs": [
    505     {
    506      "data": {
    507       "text/plain": [
    508        "['symbol',\n",
    509        " 'marketPercent',\n",
    510        " 'volume',\n",
    511        " 'lastSalePrice',\n",
    512        " 'lastSaleSize',\n",
    513        " 'lastSaleTime',\n",
    514        " 'lastUpdated',\n",
    515        " 'bids',\n",
    516        " 'asks',\n",
    517        " 'systemEvent',\n",
    518        " 'tradingStatus',\n",
    519        " 'opHaltStatus',\n",
    520        " 'ssrStatus',\n",
    521        " 'securityEvent',\n",
    522        " 'trades',\n",
    523        " 'tradeBreaks']"
    524       ]
    525      },
    526      "execution_count": 9,
    527      "metadata": {},
    528      "output_type": "execute_result"
    529     }
    530    ],
    531    "source": [
    532     "list(book.keys())"
    533    ]
    534   },
    535   {
    536    "cell_type": "code",
    537    "execution_count": 10,
    538    "metadata": {
    539     "ExecuteTime": {
    540      "end_time": "2018-12-25T19:14:57.318139Z",
    541      "start_time": "2018-12-25T19:14:57.307227Z"
    542     }
    543    },
    544    "outputs": [
    545     {
    546      "data": {
    547       "text/html": [
    548        "<div>\n",
    549        "<style scoped>\n",
    550        "    .dataframe tbody tr th:only-of-type {\n",
    551        "        vertical-align: middle;\n",
    552        "    }\n",
    553        "\n",
    554        "    .dataframe tbody tr th {\n",
    555        "        vertical-align: top;\n",
    556        "    }\n",
    557        "\n",
    558        "    .dataframe thead th {\n",
    559        "        text-align: right;\n",
    560        "    }\n",
    561        "</style>\n",
    562        "<table border=\"1\" class=\"dataframe\">\n",
    563        "  <thead>\n",
    564        "    <tr style=\"text-align: right;\">\n",
    565        "      <th></th>\n",
    566        "      <th>side</th>\n",
    567        "    </tr>\n",
    568        "  </thead>\n",
    569        "  <tbody>\n",
    570        "  </tbody>\n",
    571        "</table>\n",
    572        "</div>"
    573       ],
    574       "text/plain": [
    575        "Empty DataFrame\n",
    576        "Columns: [side]\n",
    577        "Index: []"
    578       ]
    579      },
    580      "execution_count": 10,
    581      "metadata": {},
    582      "output_type": "execute_result"
    583     }
    584    ],
    585    "source": [
    586     "orders = pd.concat([pd.DataFrame(book[side]).assign(side=side) for side in ['bids', 'asks']])\n",
    587     "orders.head()"
    588    ]
    589   },
    590   {
    591    "cell_type": "code",
    592    "execution_count": 11,
    593    "metadata": {
    594     "ExecuteTime": {
    595      "end_time": "2018-12-25T19:14:57.333155Z",
    596      "start_time": "2018-12-25T19:14:57.319823Z"
    597     }
    598    },
    599    "outputs": [
    600     {
    601      "name": "stdout",
    602      "output_type": "stream",
    603      "text": [
    604       "\n",
    605       "symbol\n",
    606       "AAPL\n",
    607       "\n",
    608       "marketPercent\n",
    609       "0.03324\n",
    610       "\n",
    611       "volume\n",
    612       "977659\n",
    613       "\n",
    614       "lastSalePrice\n",
    615       "203.19\n",
    616       "\n",
    617       "lastSaleSize\n",
    618       "3\n",
    619       "\n",
    620       "lastSaleTime\n",
    621       "1555531318248\n",
    622       "\n",
    623       "lastUpdated\n",
    624       "1555532174025\n",
    625       "\n",
    626       "bids\n",
    627       "Empty DataFrame\n",
    628       "Columns: []\n",
    629       "Index: []\n",
    630       "\n",
    631       "asks\n",
    632       "Empty DataFrame\n",
    633       "Columns: []\n",
    634       "Index: []\n",
    635       "\n",
    636       "systemEvent\n",
    637       "{'systemEvent': 'C', 'timestamp': 1555535400001}\n",
    638       "\n",
    639       "tradingStatus\n",
    640       "{'status': 'T', 'reason': '    ', 'timestamp': 1555500532036}\n",
    641       "\n",
    642       "opHaltStatus\n",
    643       "{'isHalted': False, 'timestamp': 1555500532036}\n",
    644       "\n",
    645       "ssrStatus\n",
    646       "{'isSSR': False, 'detail': ' ', 'timestamp': 1555500532036}\n",
    647       "\n",
    648       "securityEvent\n",
    649       "{'securityEvent': 'MarketClose', 'timestamp': 1555531200000}\n",
    650       "\n",
    651       "trades\n",
    652       "    isISO  isOddLot  isOutsideRegularHours  isSinglePriceCross  \\\n",
    653       "0    True      True                   True               False   \n",
    654       "1   False     False                  False               False   \n",
    655       "2    True     False                  False               False   \n",
    656       "3   False     False                  False               False   \n",
    657       "4    True     False                  False               False   \n",
    658       "5   False     False                  False               False   \n",
    659       "6   False     False                  False               False   \n",
    660       "7    True     False                  False               False   \n",
    661       "8    True     False                  False               False   \n",
    662       "9    True     False                  False               False   \n",
    663       "10  False      True                  False               False   \n",
    664       "11  False     False                  False               False   \n",
    665       "12   True      True                  False               False   \n",
    666       "13  False     False                  False               False   \n",
    667       "14  False     False                  False               False   \n",
    668       "15  False     False                  False               False   \n",
    669       "16   True      True                  False               False   \n",
    670       "17   True     False                  False               False   \n",
    671       "18   True      True                  False               False   \n",
    672       "19  False     False                  False               False   \n",
    673       "\n",
    674       "    isTradeThroughExempt    price  size      timestamp    tradeId  \n",
    675       "0                  False  203.190     3  1555531318248  891604355  \n",
    676       "1                  False  203.200   100  1555531197248  890272160  \n",
    677       "2                  False  203.140   100  1555531195857  889977488  \n",
    678       "3                  False  203.155   100  1555531195465  889859713  \n",
    679       "4                  False  203.140   100  1555531195292  889808657  \n",
    680       "5                  False  203.110   100  1555531194852  889619571  \n",
    681       "6                  False  203.100   100  1555531194822  889606541  \n",
    682       "7                  False  203.180   100  1555531194794  889585630  \n",
    683       "8                   True  203.180   100  1555531194794  889585556  \n",
    684       "9                  False  203.180   100  1555531191476  889002606  \n",
    685       "10                 False  203.190    25  1555531190110  888694678  \n",
    686       "11                 False  203.140   100  1555531187948  888262843  \n",
    687       "12                 False  203.160    64  1555531187684  888228377  \n",
    688       "13                 False  203.160   100  1555531187682  888227956  \n",
    689       "14                 False  203.160   100  1555531187681  888227469  \n",
    690       "15                 False  203.160   100  1555531187680  888227241  \n",
    691       "16                 False  203.160    36  1555531187680  888227079  \n",
    692       "17                 False  203.160   100  1555531187680  888227061  \n",
    693       "18                 False  203.170     9  1555531185364  887813837  \n",
    694       "19                 False  203.150   200  1555531184053  887531882  \n",
    695       "\n",
    696       "tradeBreaks\n",
    697       "Empty DataFrame\n",
    698       "Columns: []\n",
    699       "Index: []\n"
    700      ]
    701     }
    702    ],
    703    "source": [
    704     "for key in book.keys():\n",
    705     "    try:\n",
    706     "        print(f'\\n{key}')\n",
    707     "        print(pd.DataFrame(book[key]))\n",
    708     "    except:\n",
    709     "        print(book[key])"
    710    ]
    711   },
    712   {
    713    "cell_type": "code",
    714    "execution_count": 12,
    715    "metadata": {
    716     "ExecuteTime": {
    717      "end_time": "2018-12-25T19:14:57.420316Z",
    718      "start_time": "2018-12-25T19:14:57.334612Z"
    719     }
    720    },
    721    "outputs": [
    722     {
    723      "data": {
    724       "text/html": [
    725        "<div>\n",
    726        "<style scoped>\n",
    727        "    .dataframe tbody tr th:only-of-type {\n",
    728        "        vertical-align: middle;\n",
    729        "    }\n",
    730        "\n",
    731        "    .dataframe tbody tr th {\n",
    732        "        vertical-align: top;\n",
    733        "    }\n",
    734        "\n",
    735        "    .dataframe thead th {\n",
    736        "        text-align: right;\n",
    737        "    }\n",
    738        "</style>\n",
    739        "<table border=\"1\" class=\"dataframe\">\n",
    740        "  <thead>\n",
    741        "    <tr style=\"text-align: right;\">\n",
    742        "      <th></th>\n",
    743        "      <th>isISO</th>\n",
    744        "      <th>isOddLot</th>\n",
    745        "      <th>isOutsideRegularHours</th>\n",
    746        "      <th>isSinglePriceCross</th>\n",
    747        "      <th>isTradeThroughExempt</th>\n",
    748        "      <th>price</th>\n",
    749        "      <th>size</th>\n",
    750        "      <th>timestamp</th>\n",
    751        "      <th>tradeId</th>\n",
    752        "    </tr>\n",
    753        "  </thead>\n",
    754        "  <tbody>\n",
    755        "    <tr>\n",
    756        "      <th>0</th>\n",
    757        "      <td>True</td>\n",
    758        "      <td>True</td>\n",
    759        "      <td>True</td>\n",
    760        "      <td>False</td>\n",
    761        "      <td>False</td>\n",
    762        "      <td>203.190</td>\n",
    763        "      <td>3</td>\n",
    764        "      <td>1555531318248</td>\n",
    765        "      <td>891604355</td>\n",
    766        "    </tr>\n",
    767        "    <tr>\n",
    768        "      <th>1</th>\n",
    769        "      <td>False</td>\n",
    770        "      <td>False</td>\n",
    771        "      <td>False</td>\n",
    772        "      <td>False</td>\n",
    773        "      <td>False</td>\n",
    774        "      <td>203.200</td>\n",
    775        "      <td>100</td>\n",
    776        "      <td>1555531197248</td>\n",
    777        "      <td>890272160</td>\n",
    778        "    </tr>\n",
    779        "    <tr>\n",
    780        "      <th>2</th>\n",
    781        "      <td>True</td>\n",
    782        "      <td>False</td>\n",
    783        "      <td>False</td>\n",
    784        "      <td>False</td>\n",
    785        "      <td>False</td>\n",
    786        "      <td>203.140</td>\n",
    787        "      <td>100</td>\n",
    788        "      <td>1555531195857</td>\n",
    789        "      <td>889977488</td>\n",
    790        "    </tr>\n",
    791        "    <tr>\n",
    792        "      <th>3</th>\n",
    793        "      <td>False</td>\n",
    794        "      <td>False</td>\n",
    795        "      <td>False</td>\n",
    796        "      <td>False</td>\n",
    797        "      <td>False</td>\n",
    798        "      <td>203.155</td>\n",
    799        "      <td>100</td>\n",
    800        "      <td>1555531195465</td>\n",
    801        "      <td>889859713</td>\n",
    802        "    </tr>\n",
    803        "    <tr>\n",
    804        "      <th>4</th>\n",
    805        "      <td>True</td>\n",
    806        "      <td>False</td>\n",
    807        "      <td>False</td>\n",
    808        "      <td>False</td>\n",
    809        "      <td>False</td>\n",
    810        "      <td>203.140</td>\n",
    811        "      <td>100</td>\n",
    812        "      <td>1555531195292</td>\n",
    813        "      <td>889808657</td>\n",
    814        "    </tr>\n",
    815        "  </tbody>\n",
    816        "</table>\n",
    817        "</div>"
    818       ],
    819       "text/plain": [
    820        "   isISO  isOddLot  isOutsideRegularHours  isSinglePriceCross  \\\n",
    821        "0   True      True                   True               False   \n",
    822        "1  False     False                  False               False   \n",
    823        "2   True     False                  False               False   \n",
    824        "3  False     False                  False               False   \n",
    825        "4   True     False                  False               False   \n",
    826        "\n",
    827        "   isTradeThroughExempt    price  size      timestamp    tradeId  \n",
    828        "0                 False  203.190     3  1555531318248  891604355  \n",
    829        "1                 False  203.200   100  1555531197248  890272160  \n",
    830        "2                 False  203.140   100  1555531195857  889977488  \n",
    831        "3                 False  203.155   100  1555531195465  889859713  \n",
    832        "4                 False  203.140   100  1555531195292  889808657  "
    833       ]
    834      },
    835      "execution_count": 12,
    836      "metadata": {},
    837      "output_type": "execute_result"
    838     }
    839    ],
    840    "source": [
    841     "pd.DataFrame(book['trades']).head()"
    842    ]
    843   },
    844   {
    845    "cell_type": "markdown",
    846    "metadata": {},
    847    "source": [
    848     "### Quandl"
    849    ]
    850   },
    851   {
    852    "cell_type": "code",
    853    "execution_count": 13,
    854    "metadata": {
    855     "ExecuteTime": {
    856      "end_time": "2018-12-25T19:15:07.500237Z",
    857      "start_time": "2018-12-25T19:15:06.978720Z"
    858     }
    859    },
    860    "outputs": [
    861     {
    862      "name": "stdout",
    863      "output_type": "stream",
    864      "text": [
    865       "<class 'pandas.core.frame.DataFrame'>\n",
    866       "DatetimeIndex: 813 entries, 2018-03-27 to 2015-01-02\n",
    867       "Data columns (total 12 columns):\n",
    868       "Open          813 non-null float64\n",
    869       "High          813 non-null float64\n",
    870       "Low           813 non-null float64\n",
    871       "Close         813 non-null float64\n",
    872       "Volume        813 non-null float64\n",
    873       "ExDividend    813 non-null float64\n",
    874       "SplitRatio    813 non-null float64\n",
    875       "AdjOpen       813 non-null float64\n",
    876       "AdjHigh       813 non-null float64\n",
    877       "AdjLow        813 non-null float64\n",
    878       "AdjClose      813 non-null float64\n",
    879       "AdjVolume     813 non-null float64\n",
    880       "dtypes: float64(12)\n",
    881       "memory usage: 82.6 KB\n"
    882      ]
    883     }
    884    ],
    885    "source": [
    886     "symbol = 'FB.US'\n",
    887     "\n",
    888     "quandl = web.DataReader(symbol, 'quandl', '2015-01-01')\n",
    889     "quandl.info()"
    890    ]
    891   },
    892   {
    893    "cell_type": "markdown",
    894    "metadata": {},
    895    "source": [
    896     "### FRED"
    897    ]
    898   },
    899   {
    900    "cell_type": "code",
    901    "execution_count": 14,
    902    "metadata": {
    903     "ExecuteTime": {
    904      "end_time": "2018-12-25T19:15:07.662461Z",
    905      "start_time": "2018-12-25T19:15:07.501573Z"
    906     }
    907    },
    908    "outputs": [
    909     {
    910      "name": "stdout",
    911      "output_type": "stream",
    912      "text": [
    913       "<class 'pandas.core.frame.DataFrame'>\n",
    914       "DatetimeIndex: 13 entries, 2010-01-01 to 2013-01-01\n",
    915       "Data columns (total 1 columns):\n",
    916       "GDP    13 non-null float64\n",
    917       "dtypes: float64(1)\n",
    918       "memory usage: 208.0 bytes\n"
    919      ]
    920     }
    921    ],
    922    "source": [
    923     "start = datetime(2010, 1, 1)\n",
    924     "\n",
    925     "end = datetime(2013, 1, 27)\n",
    926     "\n",
    927     "gdp = web.DataReader('GDP', 'fred', start, end)\n",
    928     "\n",
    929     "gdp.info()"
    930    ]
    931   },
    932   {
    933    "cell_type": "code",
    934    "execution_count": 15,
    935    "metadata": {
    936     "ExecuteTime": {
    937      "end_time": "2018-12-25T19:15:09.584269Z",
    938      "start_time": "2018-12-25T19:15:09.399866Z"
    939     }
    940    },
    941    "outputs": [
    942     {
    943      "name": "stdout",
    944      "output_type": "stream",
    945      "text": [
    946       "<class 'pandas.core.frame.DataFrame'>\n",
    947       "DatetimeIndex: 37 entries, 2010-01-01 to 2013-01-01\n",
    948       "Freq: MS\n",
    949       "Data columns (total 2 columns):\n",
    950       "CPIAUCSL    37 non-null float64\n",
    951       "CPILFESL    37 non-null float64\n",
    952       "dtypes: float64(2)\n",
    953       "memory usage: 888.0 bytes\n"
    954      ]
    955     }
    956    ],
    957    "source": [
    958     "inflation = web.DataReader(['CPIAUCSL', 'CPILFESL'], 'fred', start, end)\n",
    959     "inflation.info()"
    960    ]
    961   },
    962   {
    963    "cell_type": "markdown",
    964    "metadata": {},
    965    "source": [
    966     "### Fama/French"
    967    ]
    968   },
    969   {
    970    "cell_type": "code",
    971    "execution_count": 16,
    972    "metadata": {
    973     "ExecuteTime": {
    974      "end_time": "2018-12-25T19:15:10.764850Z",
    975      "start_time": "2018-12-25T19:15:10.755565Z"
    976     }
    977    },
    978    "outputs": [
    979     {
    980      "data": {
    981       "text/plain": [
    982        "['F-F_Research_Data_Factors',\n",
    983        " 'F-F_Research_Data_Factors_weekly',\n",
    984        " 'F-F_Research_Data_Factors_daily',\n",
    985        " 'F-F_Research_Data_5_Factors_2x3',\n",
    986        " 'F-F_Research_Data_5_Factors_2x3_daily',\n",
    987        " 'Portfolios_Formed_on_ME',\n",
    988        " 'Portfolios_Formed_on_ME_Wout_Div',\n",
    989        " 'Portfolios_Formed_on_ME_Daily',\n",
    990        " 'Portfolios_Formed_on_BE-ME',\n",
    991        " 'Portfolios_Formed_on_BE-ME_Wout_Div',\n",
    992        " 'Portfolios_Formed_on_BE-ME_Daily',\n",
    993        " 'Portfolios_Formed_on_OP',\n",
    994        " 'Portfolios_Formed_on_OP_Wout_Div',\n",
    995        " 'Portfolios_Formed_on_INV',\n",
    996        " 'Portfolios_Formed_on_INV_Wout_Div',\n",
    997        " '6_Portfolios_2x3',\n",
    998        " '6_Portfolios_2x3_Wout_Div',\n",
    999        " '6_Portfolios_2x3_weekly',\n",
   1000        " '6_Portfolios_2x3_daily',\n",
   1001        " '25_Portfolios_5x5',\n",
   1002        " '25_Portfolios_5x5_Wout_Div',\n",
   1003        " '25_Portfolios_5x5_Daily',\n",
   1004        " '100_Portfolios_10x10',\n",
   1005        " '100_Portfolios_10x10_Wout_Div',\n",
   1006        " '100_Portfolios_10x10_Daily',\n",
   1007        " '6_Portfolios_ME_OP_2x3',\n",
   1008        " '6_Portfolios_ME_OP_2x3_Wout_Div',\n",
   1009        " '6_Portfolios_ME_OP_2x3_daily',\n",
   1010        " '25_Portfolios_ME_OP_5x5',\n",
   1011        " '25_Portfolios_ME_OP_5x5_Wout_Div',\n",
   1012        " '25_Portfolios_ME_OP_5x5_daily',\n",
   1013        " '100_Portfolios_ME_OP_10x10',\n",
   1014        " '100_Portfolios_10x10_ME_OP_Wout_Div',\n",
   1015        " '100_Portfolios_ME_OP_10x10_daily',\n",
   1016        " '6_Portfolios_ME_INV_2x3',\n",
   1017        " '6_Portfolios_ME_INV_2x3_Wout_Div',\n",
   1018        " '6_Portfolios_ME_INV_2x3_daily',\n",
   1019        " '25_Portfolios_ME_INV_5x5',\n",
   1020        " '25_Portfolios_ME_INV_5x5_Wout_Div',\n",
   1021        " '25_Portfolios_ME_INV_5x5_daily',\n",
   1022        " '100_Portfolios_ME_INV_10x10',\n",
   1023        " '100_Portfolios_10x10_ME_INV_Wout_Div',\n",
   1024        " '100_Portfolios_ME_INV_10x10_daily',\n",
   1025        " '25_Portfolios_BEME_OP_5x5',\n",
   1026        " '25_Portfolios_BEME_OP_5x5_Wout_Div',\n",
   1027        " '25_Portfolios_BEME_OP_5x5_daily',\n",
   1028        " '25_Portfolios_BEME_INV_5x5',\n",
   1029        " '25_Portfolios_BEME_INV_5x5_Wout_Div',\n",
   1030        " '25_Portfolios_BEME_INV_5x5_daily',\n",
   1031        " '25_Portfolios_OP_INV_5x5',\n",
   1032        " '25_Portfolios_OP_INV_5x5_Wout_Div',\n",
   1033        " '25_Portfolios_OP_INV_5x5_daily',\n",
   1034        " '32_Portfolios_ME_BEME_OP_2x4x4',\n",
   1035        " '32_Portfolios_ME_BEME_OP_2x4x4_Wout_Div',\n",
   1036        " '32_Portfolios_ME_BEME_INV_2x4x4',\n",
   1037        " '32_Portfolios_ME_BEME_INV_2x4x4_Wout_Div',\n",
   1038        " '32_Portfolios_ME_OP_INV_2x4x4',\n",
   1039        " '32_Portfolios_ME_OP_INV_2x4x4_Wout_Div',\n",
   1040        " 'Portfolios_Formed_on_E-P',\n",
   1041        " 'Portfolios_Formed_on_E-P_Wout_Div',\n",
   1042        " 'Portfolios_Formed_on_CF-P',\n",
   1043        " 'Portfolios_Formed_on_CF-P_Wout_Div',\n",
   1044        " 'Portfolios_Formed_on_D-P',\n",
   1045        " 'Portfolios_Formed_on_D-P_Wout_Div',\n",
   1046        " '6_Portfolios_ME_EP_2x3',\n",
   1047        " '6_Portfolios_ME_EP_2x3_Wout_Div',\n",
   1048        " '6_Portfolios_ME_CFP_2x3',\n",
   1049        " '6_Portfolios_ME_CFP_2x3_Wout_Div',\n",
   1050        " '6_Portfolios_ME_DP_2x3',\n",
   1051        " '6_Portfolios_ME_DP_2x3_Wout_Div',\n",
   1052        " 'F-F_Momentum_Factor',\n",
   1053        " 'F-F_Momentum_Factor_daily',\n",
   1054        " '6_Portfolios_ME_Prior_12_2',\n",
   1055        " '6_Portfolios_ME_Prior_12_2_Daily',\n",
   1056        " '25_Portfolios_ME_Prior_12_2',\n",
   1057        " '25_Portfolios_ME_Prior_12_2_Daily',\n",
   1058        " '10_Portfolios_Prior_12_2',\n",
   1059        " '10_Portfolios_Prior_12_2_Daily',\n",
   1060        " 'F-F_ST_Reversal_Factor',\n",
   1061        " 'F-F_ST_Reversal_Factor_daily',\n",
   1062        " '6_Portfolios_ME_Prior_1_0',\n",
   1063        " '6_Portfolios_ME_Prior_1_0_Daily',\n",
   1064        " '25_Portfolios_ME_Prior_1_0',\n",
   1065        " '25_Portfolios_ME_Prior_1_0_Daily',\n",
   1066        " '10_Portfolios_Prior_1_0',\n",
   1067        " '10_Portfolios_Prior_1_0_Daily',\n",
   1068        " 'F-F_LT_Reversal_Factor',\n",
   1069        " 'F-F_LT_Reversal_Factor_daily',\n",
   1070        " '6_Portfolios_ME_Prior_60_13',\n",
   1071        " '6_Portfolios_ME_Prior_60_13_Daily',\n",
   1072        " '25_Portfolios_ME_Prior_60_13',\n",
   1073        " '25_Portfolios_ME_Prior_60_13_Daily',\n",
   1074        " '10_Portfolios_Prior_60_13',\n",
   1075        " '10_Portfolios_Prior_60_13_Daily',\n",
   1076        " 'Portfolios_Formed_on_AC',\n",
   1077        " '25_Portfolios_ME_AC_5x5',\n",
   1078        " 'Portfolios_Formed_on_BETA',\n",
   1079        " '25_Portfolios_ME_BETA_5x5',\n",
   1080        " 'Portfolios_Formed_on_NI',\n",
   1081        " '25_Portfolios_ME_NI_5x5',\n",
   1082        " 'Portfolios_Formed_on_VAR',\n",
   1083        " '25_Portfolios_ME_VAR_5x5',\n",
   1084        " 'Portfolios_Formed_on_RESVAR',\n",
   1085        " '25_Portfolios_ME_RESVAR_5x5',\n",
   1086        " '5_Industry_Portfolios',\n",
   1087        " '5_Industry_Portfolios_Wout_Div',\n",
   1088        " '5_Industry_Portfolios_daily',\n",
   1089        " '10_Industry_Portfolios',\n",
   1090        " '10_Industry_Portfolios_Wout_Div',\n",
   1091        " '10_Industry_Portfolios_daily',\n",
   1092        " '12_Industry_Portfolios',\n",
   1093        " '12_Industry_Portfolios_Wout_Div',\n",
   1094        " '12_Industry_Portfolios_daily',\n",
   1095        " '17_Industry_Portfolios',\n",
   1096        " '17_Industry_Portfolios_Wout_Div',\n",
   1097        " '17_Industry_Portfolios_daily',\n",
   1098        " '30_Industry_Portfolios',\n",
   1099        " '30_Industry_Portfolios_Wout_Div',\n",
   1100        " '30_Industry_Portfolios_daily',\n",
   1101        " '38_Industry_Portfolios',\n",
   1102        " '38_Industry_Portfolios_Wout_Div',\n",
   1103        " '38_Industry_Portfolios_daily',\n",
   1104        " '48_Industry_Portfolios',\n",
   1105        " '48_Industry_Portfolios_Wout_Div',\n",
   1106        " '48_Industry_Portfolios_daily',\n",
   1107        " '49_Industry_Portfolios',\n",
   1108        " '49_Industry_Portfolios_Wout_Div',\n",
   1109        " '49_Industry_Portfolios_daily',\n",
   1110        " 'ME_Breakpoints',\n",
   1111        " 'BE-ME_Breakpoints',\n",
   1112        " 'OP_Breakpoints',\n",
   1113        " 'INV_Breakpoints',\n",
   1114        " 'E-P_Breakpoints',\n",
   1115        " 'CF-P_Breakpoints',\n",
   1116        " 'D-P_Breakpoints',\n",
   1117        " 'Prior_2-12_Breakpoints',\n",
   1118        " 'Global_3_Factors',\n",
   1119        " 'Global_3_Factors_Daily',\n",
   1120        " 'Global_ex_US_3_Factors',\n",
   1121        " 'Global_ex_US_3_Factors_Daily',\n",
   1122        " 'Europe_3_Factors',\n",
   1123        " 'Europe_3_Factors_Daily',\n",
   1124        " 'Japan_3_Factors',\n",
   1125        " 'Japan_3_Factors_Daily',\n",
   1126        " 'Asia_Pacific_ex_Japan_3_Factors',\n",
   1127        " 'Asia_Pacific_ex_Japan_3_Factors_Daily',\n",
   1128        " 'North_America_3_Factors',\n",
   1129        " 'North_America_3_Factors_Daily',\n",
   1130        " 'Global_5_Factors',\n",
   1131        " 'Global_5_Factors_Daily',\n",
   1132        " 'Global_ex_US_5_Factors',\n",
   1133        " 'Global_ex_US_5_Factors_Daily',\n",
   1134        " 'Europe_5_Factors',\n",
   1135        " 'Europe_5_Factors_Daily',\n",
   1136        " 'Japan_5_Factors',\n",
   1137        " 'Japan_5_Factors_Daily',\n",
   1138        " 'Asia_Pacific_ex_Japan_5_Factors',\n",
   1139        " 'Asia_Pacific_ex_Japan_5_Factors_Daily',\n",
   1140        " 'North_America_5_Factors',\n",
   1141        " 'North_America_5_Factors_Daily',\n",
   1142        " 'Global_Mom_Factor',\n",
   1143        " 'Global_Mom_Factor_Daily',\n",
   1144        " 'Global_ex_US_Mom_Factor',\n",
   1145        " 'Global_ex_US_Mom_Factor_Daily',\n",
   1146        " 'Europe_Mom_Factor',\n",
   1147        " 'Europe_Mom_Factor_Daily',\n",
   1148        " 'Japan_Mom_Factor',\n",
   1149        " 'Japan_Mom_Factor_Daily',\n",
   1150        " 'Asia_Pacific_ex_Japan_MOM_Factor',\n",
   1151        " 'Asia_Pacific_ex_Japan_MOM_Factor_Daily',\n",
   1152        " 'North_America_Mom_Factor',\n",
   1153        " 'North_America_Mom_Factor_Daily',\n",
   1154        " 'Global_6_Portfolios_ME_BE-ME',\n",
   1155        " 'Global_6_Portfolios_ME_BE-ME_daily',\n",
   1156        " 'Global_ex_US_6_Portfolios_ME_BE-ME',\n",
   1157        " 'Global_ex_US_6_Portfolios_ME_BE-ME_daily',\n",
   1158        " 'Europe_6_Portfolios_ME_BE-ME',\n",
   1159        " 'Europe_6_Portfolios_ME_BE-ME_daily',\n",
   1160        " 'Japan_6_Portfolios_ME_BE-ME',\n",
   1161        " 'Japan_6_Portfolios_ME_BE-ME_daily',\n",
   1162        " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_BE-ME',\n",
   1163        " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_BE-ME_daily',\n",
   1164        " 'North_America_6_Portfolios_ME_BE-ME',\n",
   1165        " 'North_America_6_Portfolios_ME_BE-ME_daily',\n",
   1166        " 'Global_25_Portfolios_ME_BE-ME',\n",
   1167        " 'Global_25_Portfolios_ME_BE-ME_daily',\n",
   1168        " 'Global_ex_US_25_Portfolios_ME_BE-ME',\n",
   1169        " 'Global_ex_US_25_Portfolios_ME_BE-ME_daily',\n",
   1170        " 'Europe_25_Portfolios_ME_BE-ME',\n",
   1171        " 'Europe_25_Portfolios_ME_BE-ME_daily',\n",
   1172        " 'Japan_25_Portfolios_ME_BE-ME',\n",
   1173        " 'Japan_25_Portfolios_ME_BE-ME_daily',\n",
   1174        " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_BE-ME',\n",
   1175        " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_BE-ME_daily',\n",
   1176        " 'North_America_25_Portfolios_ME_BE-ME',\n",
   1177        " 'North_America_25_Portfolios_ME_BE-ME_daily',\n",
   1178        " 'Global_6_Portfolios_ME_OP',\n",
   1179        " 'Global_6_Portfolios_ME_OP_Daily',\n",
   1180        " 'Global_ex_US_6_Portfolios_ME_OP',\n",
   1181        " 'Global_ex_US_6_Portfolios_ME_OP_Daily',\n",
   1182        " 'Europe_6_Portfolios_ME_OP',\n",
   1183        " 'Europe_6_Portfolios_ME_OP_Daily',\n",
   1184        " 'Japan_6_Portfolios_ME_OP',\n",
   1185        " 'Japan_6_Portfolios_ME_OP_Daily',\n",
   1186        " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_OP',\n",
   1187        " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_OP_Daily',\n",
   1188        " 'North_America_6_Portfolios_ME_OP',\n",
   1189        " 'North_America_6_Portfolios_ME_OP_Daily',\n",
   1190        " 'Global_25_Portfolios_ME_OP',\n",
   1191        " 'Global_25_Portfolios_ME_OP_Daily',\n",
   1192        " 'Global_ex_US_25_Portfolios_ME_OP',\n",
   1193        " 'Global_ex_US_25_Portfolios_ME_OP_Daily',\n",
   1194        " 'Europe_25_Portfolios_ME_OP',\n",
   1195        " 'Europe_25_Portfolios_ME_OP_Daily',\n",
   1196        " 'Japan_25_Portfolios_ME_OP',\n",
   1197        " 'Japan_25_Portfolios_ME_OP_Daily',\n",
   1198        " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_OP',\n",
   1199        " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_OP_Daily',\n",
   1200        " 'North_America_25_Portfolios_ME_OP',\n",
   1201        " 'North_America_25_Portfolios_ME_OP_Daily',\n",
   1202        " 'Global_6_Portfolios_ME_INV',\n",
   1203        " 'Global_6_Portfolios_ME_INV_Daily',\n",
   1204        " 'Global_ex_US_6_Portfolios_ME_INV',\n",
   1205        " 'Global_ex_US_6_Portfolios_ME_INV_Daily',\n",
   1206        " 'Europe_6_Portfolios_ME_INV',\n",
   1207        " 'Europe_6_Portfolios_ME_INV_Daily',\n",
   1208        " 'Japan_6_Portfolios_ME_INV',\n",
   1209        " 'Japan_6_Portfolios_ME_INV_Daily',\n",
   1210        " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_INV',\n",
   1211        " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_INV_Daily',\n",
   1212        " 'North_America_6_Portfolios_ME_INV',\n",
   1213        " 'North_America_6_Portfolios_ME_INV_Daily',\n",
   1214        " 'Global_25_Portfolios_ME_INV',\n",
   1215        " 'Global_25_Portfolios_ME_INV_Daily',\n",
   1216        " 'Global_ex_US_25_Portfolios_ME_INV',\n",
   1217        " 'Global_ex_US_25_Portfolios_ME_INV_Daily',\n",
   1218        " 'Europe_25_Portfolios_ME_INV',\n",
   1219        " 'Europe_25_Portfolios_ME_INV_Daily',\n",
   1220        " 'Japan_25_Portfolios_ME_INV',\n",
   1221        " 'Japan_25_Portfolios_ME_INV_Daily',\n",
   1222        " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_INV',\n",
   1223        " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_INV_Daily',\n",
   1224        " 'North_America_25_Portfolios_ME_INV',\n",
   1225        " 'North_America_25_Portfolios_ME_INV_Daily',\n",
   1226        " 'Global_6_Portfolios_ME_Prior_12_2',\n",
   1227        " 'Global_6_Portfolios_ME_Prior_250_20_daily',\n",
   1228        " 'Global_ex_US_6_Portfolios_ME_Prior_12_2',\n",
   1229        " 'Global_ex_US_6_Portfolios_ME_Prior_250_20_daily',\n",
   1230        " 'Europe_6_Portfolios_ME_Prior_12_2',\n",
   1231        " 'Europe_6_Portfolios_ME_Prior_250_20_daily',\n",
   1232        " 'Japan_6_Portfolios_ME_Prior_12_2',\n",
   1233        " 'Japan_6_Portfolios_ME_Prior_250_20_daily',\n",
   1234        " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_Prior_12_2',\n",
   1235        " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_Prior_250_20_daily',\n",
   1236        " 'North_America_6_Portfolios_ME_Prior_12_2',\n",
   1237        " 'North_America_6_Portfolios_ME_Prior_250_20_daily',\n",
   1238        " 'Global_25_Portfolios_ME_Prior_12_2',\n",
   1239        " 'Global_25_Portfolios_ME_Prior_250_20_daily',\n",
   1240        " 'Global_ex_US_25_Portfolios_ME_Prior_12_2',\n",
   1241        " 'Global_ex_US_25_Portfolios_ME_Prior_250_20_daily',\n",
   1242        " 'Europe_25_Portfolios_ME_Prior_12_2',\n",
   1243        " 'Europe_25_Portfolios_ME_Prior_250_20_daily',\n",
   1244        " 'Japan_25_Portfolios_ME_Prior_12_2',\n",
   1245        " 'Japan_25_Portfolios_ME_Prior_250_20_daily',\n",
   1246        " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_Prior_12_2',\n",
   1247        " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_Prior_250_20_daily',\n",
   1248        " 'North_America_25_Portfolios_ME_Prior_12_2',\n",
   1249        " 'North_America_25_Portfolios_ME_Prior_250_20_daily',\n",
   1250        " 'Global_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
   1251        " 'Global_ex_US_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
   1252        " 'Europe_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
   1253        " 'Japan_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
   1254        " 'Asia_Pacific_ex_Japan_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
   1255        " 'North_America_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
   1256        " 'Global_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
   1257        " 'Global_ex_US_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
   1258        " 'Europe_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
   1259        " 'Japan_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
   1260        " 'Asia_Pacific_ex_Japan_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
   1261        " 'North_America_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
   1262        " 'Global_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
   1263        " 'Global_ex_US_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
   1264        " 'Europe_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
   1265        " 'Japan_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
   1266        " 'Asia_Pacific_ex_Japan_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
   1267        " 'North_America_32_Portfolios_ME_INV(TA)_OP_2x4x4']"
   1268       ]
   1269      },
   1270      "execution_count": 16,
   1271      "metadata": {},
   1272      "output_type": "execute_result"
   1273     }
   1274    ],
   1275    "source": [
   1276     "from pandas_datareader.famafrench import get_available_datasets\n",
   1277     "get_available_datasets()"
   1278    ]
   1279   },
   1280   {
   1281    "cell_type": "code",
   1282    "execution_count": 17,
   1283    "metadata": {
   1284     "ExecuteTime": {
   1285      "end_time": "2018-12-25T19:16:23.570899Z",
   1286      "start_time": "2018-12-25T19:16:23.299817Z"
   1287     }
   1288    },
   1289    "outputs": [
   1290     {
   1291      "name": "stdout",
   1292      "output_type": "stream",
   1293      "text": [
   1294       "5 Industry Portfolios\n",
   1295       "---------------------\n",
   1296       "\n",
   1297       "This file was created by CMPT_IND_RETS using the 201901 CRSP database. It contains value- and equal-weighted returns for 5 industry portfolios. The portfolios are constructed at the end of June. The annual returns are from January to December. Missing data are indicated by -99.99 or -999. Copyright 2019 Kenneth R. French\n",
   1298       "\n",
   1299       "  0 : Average Value Weighted Returns -- Monthly (109 rows x 5 cols)\n",
   1300       "  1 : Average Equal Weighted Returns -- Monthly (109 rows x 5 cols)\n",
   1301       "  2 : Average Value Weighted Returns -- Annual (9 rows x 5 cols)\n",
   1302       "  3 : Average Equal Weighted Returns -- Annual (9 rows x 5 cols)\n",
   1303       "  4 : Number of Firms in Portfolios (109 rows x 5 cols)\n",
   1304       "  5 : Average Firm Size (109 rows x 5 cols)\n",
   1305       "  6 : Sum of BE / Sum of ME (9 rows x 5 cols)\n",
   1306       "  7 : Value-Weighted Average of BE/ME (9 rows x 5 cols)\n"
   1307      ]
   1308     }
   1309    ],
   1310    "source": [
   1311     "ds = web.DataReader('5_Industry_Portfolios', 'famafrench')\n",
   1312     "print(ds['DESCR'])"
   1313    ]
   1314   },
   1315   {
   1316    "cell_type": "markdown",
   1317    "metadata": {},
   1318    "source": [
   1319     "### World Bank"
   1320    ]
   1321   },
   1322   {
   1323    "cell_type": "code",
   1324    "execution_count": 16,
   1325    "metadata": {},
   1326    "outputs": [
   1327     {
   1328      "data": {
   1329       "text/html": [
   1330        "<div>\n",
   1331        "<style scoped>\n",
   1332        "    .dataframe tbody tr th:only-of-type {\n",
   1333        "        vertical-align: middle;\n",
   1334        "    }\n",
   1335        "\n",
   1336        "    .dataframe tbody tr th {\n",
   1337        "        vertical-align: top;\n",
   1338        "    }\n",
   1339        "\n",
   1340        "    .dataframe thead th {\n",
   1341        "        text-align: right;\n",
   1342        "    }\n",
   1343        "</style>\n",
   1344        "<table border=\"1\" class=\"dataframe\">\n",
   1345        "  <thead>\n",
   1346        "    <tr style=\"text-align: right;\">\n",
   1347        "      <th></th>\n",
   1348        "      <th>id</th>\n",
   1349        "      <th>name</th>\n",
   1350        "      <th>source</th>\n",
   1351        "      <th>sourceNote</th>\n",
   1352        "      <th>sourceOrganization</th>\n",
   1353        "      <th>topics</th>\n",
   1354        "      <th>unit</th>\n",
   1355        "    </tr>\n",
   1356        "  </thead>\n",
   1357        "  <tbody>\n",
   1358        "    <tr>\n",
   1359        "      <th>646</th>\n",
   1360        "      <td>6.0.GDPpc_constant</td>\n",
   1361        "      <td>GDP per capita, PPP (constant 2011 internation...</td>\n",
   1362        "      <td>LAC Equity Lab</td>\n",
   1363        "      <td>GDP per capita based on purchasing power parit...</td>\n",
   1364        "      <td>b'World Development Indicators (World Bank)'</td>\n",
   1365        "      <td>Economy &amp; Growth</td>\n",
   1366        "      <td></td>\n",
   1367        "    </tr>\n",
   1368        "    <tr>\n",
   1369        "      <th>9108</th>\n",
   1370        "      <td>NY.GDP.PCAP.KD</td>\n",
   1371        "      <td>GDP per capita (constant 2010 US$)</td>\n",
   1372        "      <td>World Development Indicators</td>\n",
   1373        "      <td>GDP per capita is gross domestic product divid...</td>\n",
   1374        "      <td>b'World Bank national accounts data, and OECD ...</td>\n",
   1375        "      <td>Economy &amp; Growth</td>\n",
   1376        "      <td></td>\n",
   1377        "    </tr>\n",
   1378        "    <tr>\n",
   1379        "      <th>9110</th>\n",
   1380        "      <td>NY.GDP.PCAP.KN</td>\n",
   1381        "      <td>GDP per capita (constant LCU)</td>\n",
   1382        "      <td>World Development Indicators</td>\n",
   1383        "      <td>GDP per capita is gross domestic product divid...</td>\n",
   1384        "      <td>b'World Bank national accounts data, and OECD ...</td>\n",
   1385        "      <td>Economy &amp; Growth</td>\n",
   1386        "      <td></td>\n",
   1387        "    </tr>\n",
   1388        "    <tr>\n",
   1389        "      <th>9112</th>\n",
   1390        "      <td>NY.GDP.PCAP.PP.KD</td>\n",
   1391        "      <td>GDP per capita, PPP (constant 2011 internation...</td>\n",
   1392        "      <td>World Development Indicators</td>\n",
   1393        "      <td>GDP per capita based on purchasing power parit...</td>\n",
   1394        "      <td>b'World Bank, International Comparison Program...</td>\n",
   1395        "      <td>Economy &amp; Growth</td>\n",
   1396        "      <td></td>\n",
   1397        "    </tr>\n",
   1398        "    <tr>\n",
   1399        "      <th>9113</th>\n",
   1400        "      <td>NY.GDP.PCAP.PP.KD.87</td>\n",
   1401        "      <td>GDP per capita, PPP (constant 1987 internation...</td>\n",
   1402        "      <td>WDI Database Archives</td>\n",
   1403        "      <td></td>\n",
   1404        "      <td>b''</td>\n",
   1405        "      <td></td>\n",
   1406        "      <td></td>\n",
   1407        "    </tr>\n",
   1408        "  </tbody>\n",
   1409        "</table>\n",
   1410        "</div>"
   1411       ],
   1412       "text/plain": [
   1413        "                        id                                               name  \\\n",
   1414        "646     6.0.GDPpc_constant  GDP per capita, PPP (constant 2011 internation...   \n",
   1415        "9108        NY.GDP.PCAP.KD                 GDP per capita (constant 2010 US$)   \n",
   1416        "9110        NY.GDP.PCAP.KN                      GDP per capita (constant LCU)   \n",
   1417        "9112     NY.GDP.PCAP.PP.KD  GDP per capita, PPP (constant 2011 internation...   \n",
   1418        "9113  NY.GDP.PCAP.PP.KD.87  GDP per capita, PPP (constant 1987 internation...   \n",
   1419        "\n",
   1420        "                            source  \\\n",
   1421        "646                 LAC Equity Lab   \n",
   1422        "9108  World Development Indicators   \n",
   1423        "9110  World Development Indicators   \n",
   1424        "9112  World Development Indicators   \n",
   1425        "9113         WDI Database Archives   \n",
   1426        "\n",
   1427        "                                             sourceNote  \\\n",
   1428        "646   GDP per capita based on purchasing power parit...   \n",
   1429        "9108  GDP per capita is gross domestic product divid...   \n",
   1430        "9110  GDP per capita is gross domestic product divid...   \n",
   1431        "9112  GDP per capita based on purchasing power parit...   \n",
   1432        "9113                                                      \n",
   1433        "\n",
   1434        "                                     sourceOrganization            topics unit  \n",
   1435        "646        b'World Development Indicators (World Bank)'  Economy & Growth       \n",
   1436        "9108  b'World Bank national accounts data, and OECD ...  Economy & Growth       \n",
   1437        "9110  b'World Bank national accounts data, and OECD ...  Economy & Growth       \n",
   1438        "9112  b'World Bank, International Comparison Program...  Economy & Growth       \n",
   1439        "9113                                                b''                         "
   1440       ]
   1441      },
   1442      "execution_count": 16,
   1443      "metadata": {},
   1444      "output_type": "execute_result"
   1445     }
   1446    ],
   1447    "source": [
   1448     "from pandas_datareader import wb\n",
   1449     "gdp_variables = wb.search('gdp.*capita.*const')\n",
   1450     "gdp_variables.head()"
   1451    ]
   1452   },
   1453   {
   1454    "cell_type": "code",
   1455    "execution_count": 18,
   1456    "metadata": {},
   1457    "outputs": [
   1458     {
   1459      "data": {
   1460       "text/html": [
   1461        "<div>\n",
   1462        "<style scoped>\n",
   1463        "    .dataframe tbody tr th:only-of-type {\n",
   1464        "        vertical-align: middle;\n",
   1465        "    }\n",
   1466        "\n",
   1467        "    .dataframe tbody tr th {\n",
   1468        "        vertical-align: top;\n",
   1469        "    }\n",
   1470        "\n",
   1471        "    .dataframe thead th {\n",
   1472        "        text-align: right;\n",
   1473        "    }\n",
   1474        "</style>\n",
   1475        "<table border=\"1\" class=\"dataframe\">\n",
   1476        "  <thead>\n",
   1477        "    <tr style=\"text-align: right;\">\n",
   1478        "      <th></th>\n",
   1479        "      <th></th>\n",
   1480        "      <th>NY.GDP.PCAP.KD</th>\n",
   1481        "    </tr>\n",
   1482        "    <tr>\n",
   1483        "      <th>country</th>\n",
   1484        "      <th>year</th>\n",
   1485        "      <th></th>\n",
   1486        "    </tr>\n",
   1487        "  </thead>\n",
   1488        "  <tbody>\n",
   1489        "    <tr>\n",
   1490        "      <th rowspan=\"5\" valign=\"top\">Canada</th>\n",
   1491        "      <th>2018</th>\n",
   1492        "      <td>NaN</td>\n",
   1493        "    </tr>\n",
   1494        "    <tr>\n",
   1495        "      <th>2017</th>\n",
   1496        "      <td>51315.888975</td>\n",
   1497        "    </tr>\n",
   1498        "    <tr>\n",
   1499        "      <th>2016</th>\n",
   1500        "      <td>50407.341330</td>\n",
   1501        "    </tr>\n",
   1502        "    <tr>\n",
   1503        "      <th>2015</th>\n",
   1504        "      <td>50303.836848</td>\n",
   1505        "    </tr>\n",
   1506        "    <tr>\n",
   1507        "      <th>2014</th>\n",
   1508        "      <td>50221.841982</td>\n",
   1509        "    </tr>\n",
   1510        "  </tbody>\n",
   1511        "</table>\n",
   1512        "</div>"
   1513       ],
   1514       "text/plain": [
   1515        "              NY.GDP.PCAP.KD\n",
   1516        "country year                \n",
   1517        "Canada  2018             NaN\n",
   1518        "        2017    51315.888975\n",
   1519        "        2016    50407.341330\n",
   1520        "        2015    50303.836848\n",
   1521        "        2014    50221.841982"
   1522       ]
   1523      },
   1524      "execution_count": 18,
   1525      "metadata": {},
   1526      "output_type": "execute_result"
   1527     }
   1528    ],
   1529    "source": [
   1530     "wb_data = wb.download(indicator='NY.GDP.PCAP.KD', \n",
   1531     "                      country=['US', 'CA', 'MX'], \n",
   1532     "                      start=1990, \n",
   1533     "                      end=2019)\n",
   1534     "wb_data.head()"
   1535    ]
   1536   },
   1537   {
   1538    "cell_type": "markdown",
   1539    "metadata": {},
   1540    "source": [
   1541     "### OECD"
   1542    ]
   1543   },
   1544   {
   1545    "cell_type": "code",
   1546    "execution_count": 20,
   1547    "metadata": {},
   1548    "outputs": [
   1549     {
   1550      "data": {
   1551       "text/html": [
   1552        "<div>\n",
   1553        "<style scoped>\n",
   1554        "    .dataframe tbody tr th:only-of-type {\n",
   1555        "        vertical-align: middle;\n",
   1556        "    }\n",
   1557        "\n",
   1558        "    .dataframe tbody tr th {\n",
   1559        "        vertical-align: top;\n",
   1560        "    }\n",
   1561        "\n",
   1562        "    .dataframe thead tr th {\n",
   1563        "        text-align: left;\n",
   1564        "    }\n",
   1565        "\n",
   1566        "    .dataframe thead tr:last-of-type th {\n",
   1567        "        text-align: right;\n",
   1568        "    }\n",
   1569        "</style>\n",
   1570        "<table border=\"1\" class=\"dataframe\">\n",
   1571        "  <thead>\n",
   1572        "    <tr>\n",
   1573        "      <th>Country</th>\n",
   1574        "      <th colspan=\"10\" halign=\"left\">Japan</th>\n",
   1575        "      <th>...</th>\n",
   1576        "      <th colspan=\"10\" halign=\"left\">United States</th>\n",
   1577        "    </tr>\n",
   1578        "    <tr>\n",
   1579        "      <th>Frequency</th>\n",
   1580        "      <th colspan=\"10\" halign=\"left\">Annual</th>\n",
   1581        "      <th>...</th>\n",
   1582        "      <th colspan=\"10\" halign=\"left\">Annual</th>\n",
   1583        "    </tr>\n",
   1584        "    <tr>\n",
   1585        "      <th>Source</th>\n",
   1586        "      <th colspan=\"6\" halign=\"left\">Survey data</th>\n",
   1587        "      <th colspan=\"4\" halign=\"left\">Administrative data</th>\n",
   1588        "      <th>...</th>\n",
   1589        "      <th colspan=\"4\" halign=\"left\">Survey data</th>\n",
   1590        "      <th colspan=\"6\" halign=\"left\">Administrative data</th>\n",
   1591        "    </tr>\n",
   1592        "    <tr>\n",
   1593        "      <th>Series</th>\n",
   1594        "      <th colspan=\"2\" halign=\"left\">Union members</th>\n",
   1595        "      <th colspan=\"2\" halign=\"left\">Trade union  density</th>\n",
   1596        "      <th colspan=\"2\" halign=\"left\">Employees</th>\n",
   1597        "      <th colspan=\"2\" halign=\"left\">Union members</th>\n",
   1598        "      <th colspan=\"2\" halign=\"left\">Trade union  density</th>\n",
   1599        "      <th>...</th>\n",
   1600        "      <th colspan=\"2\" halign=\"left\">Trade union  density</th>\n",
   1601        "      <th colspan=\"2\" halign=\"left\">Employees</th>\n",
   1602        "      <th colspan=\"2\" halign=\"left\">Union members</th>\n",
   1603        "      <th colspan=\"2\" halign=\"left\">Trade union  density</th>\n",
   1604        "      <th colspan=\"2\" halign=\"left\">Employees</th>\n",
   1605        "    </tr>\n",
   1606        "    <tr>\n",
   1607        "      <th>Measure</th>\n",
   1608        "      <th>Thousands</th>\n",
   1609        "      <th>Percentage</th>\n",
   1610        "      <th>Thousands</th>\n",
   1611        "      <th>Percentage</th>\n",
   1612        "      <th>Thousands</th>\n",
   1613        "      <th>Percentage</th>\n",
   1614        "      <th>Thousands</th>\n",
   1615        "      <th>Percentage</th>\n",
   1616        "      <th>Thousands</th>\n",
   1617        "      <th>Percentage</th>\n",
   1618        "      <th>...</th>\n",
   1619        "      <th>Thousands</th>\n",
   1620        "      <th>Percentage</th>\n",
   1621        "      <th>Thousands</th>\n",
   1622        "      <th>Percentage</th>\n",
   1623        "      <th>Thousands</th>\n",
   1624        "      <th>Percentage</th>\n",
   1625        "      <th>Thousands</th>\n",
   1626        "      <th>Percentage</th>\n",
   1627        "      <th>Thousands</th>\n",
   1628        "      <th>Percentage</th>\n",
   1629        "    </tr>\n",
   1630        "    <tr>\n",
   1631        "      <th>Year</th>\n",
   1632        "      <th></th>\n",
   1633        "      <th></th>\n",
   1634        "      <th></th>\n",
   1635        "      <th></th>\n",
   1636        "      <th></th>\n",
   1637        "      <th></th>\n",
   1638        "      <th></th>\n",
   1639        "      <th></th>\n",
   1640        "      <th></th>\n",
   1641        "      <th></th>\n",
   1642        "      <th></th>\n",
   1643        "      <th></th>\n",
   1644        "      <th></th>\n",
   1645        "      <th></th>\n",
   1646        "      <th></th>\n",
   1647        "      <th></th>\n",
   1648        "      <th></th>\n",
   1649        "      <th></th>\n",
   1650        "      <th></th>\n",
   1651        "      <th></th>\n",
   1652        "      <th></th>\n",
   1653        "    </tr>\n",
   1654        "  </thead>\n",
   1655        "  <tbody>\n",
   1656        "    <tr>\n",
   1657        "      <th>2010-01-01</th>\n",
   1658        "      <td>NaN</td>\n",
   1659        "      <td>NaN</td>\n",
   1660        "      <td>NaN</td>\n",
   1661        "      <td>NaN</td>\n",
   1662        "      <td>NaN</td>\n",
   1663        "      <td>NaN</td>\n",
   1664        "      <td>12417.527</td>\n",
   1665        "      <td>NaN</td>\n",
   1666        "      <td>NaN</td>\n",
   1667        "      <td>28.871256</td>\n",
   1668        "      <td>...</td>\n",
   1669        "      <td>NaN</td>\n",
   1670        "      <td>17.448617</td>\n",
   1671        "      <td>97406.0</td>\n",
   1672        "      <td>NaN</td>\n",
   1673        "      <td>NaN</td>\n",
   1674        "      <td>NaN</td>\n",
   1675        "      <td>NaN</td>\n",
   1676        "      <td>NaN</td>\n",
   1677        "      <td>NaN</td>\n",
   1678        "      <td>NaN</td>\n",
   1679        "    </tr>\n",
   1680        "    <tr>\n",
   1681        "      <th>2011-01-01</th>\n",
   1682        "      <td>NaN</td>\n",
   1683        "      <td>NaN</td>\n",
   1684        "      <td>NaN</td>\n",
   1685        "      <td>NaN</td>\n",
   1686        "      <td>NaN</td>\n",
   1687        "      <td>NaN</td>\n",
   1688        "      <td>12271.909</td>\n",
   1689        "      <td>NaN</td>\n",
   1690        "      <td>NaN</td>\n",
   1691        "      <td>27.589723</td>\n",
   1692        "      <td>...</td>\n",
   1693        "      <td>NaN</td>\n",
   1694        "      <td>16.516118</td>\n",
   1695        "      <td>102403.0</td>\n",
   1696        "      <td>NaN</td>\n",
   1697        "      <td>NaN</td>\n",
   1698        "      <td>NaN</td>\n",
   1699        "      <td>NaN</td>\n",
   1700        "      <td>NaN</td>\n",
   1701        "      <td>NaN</td>\n",
   1702        "      <td>NaN</td>\n",
   1703        "    </tr>\n",
   1704        "    <tr>\n",
   1705        "      <th>2012-01-01</th>\n",
   1706        "      <td>NaN</td>\n",
   1707        "      <td>NaN</td>\n",
   1708        "      <td>NaN</td>\n",
   1709        "      <td>NaN</td>\n",
   1710        "      <td>NaN</td>\n",
   1711        "      <td>NaN</td>\n",
   1712        "      <td>12227.073</td>\n",
   1713        "      <td>NaN</td>\n",
   1714        "      <td>NaN</td>\n",
   1715        "      <td>25.899329</td>\n",
   1716        "      <td>...</td>\n",
   1717        "      <td>NaN</td>\n",
   1718        "      <td>15.861734</td>\n",
   1719        "      <td>106924.0</td>\n",
   1720        "      <td>NaN</td>\n",
   1721        "      <td>NaN</td>\n",
   1722        "      <td>NaN</td>\n",
   1723        "      <td>NaN</td>\n",
   1724        "      <td>NaN</td>\n",
   1725        "      <td>NaN</td>\n",
   1726        "      <td>NaN</td>\n",
   1727        "    </tr>\n",
   1728        "    <tr>\n",
   1729        "      <th>2013-01-01</th>\n",
   1730        "      <td>NaN</td>\n",
   1731        "      <td>NaN</td>\n",
   1732        "      <td>NaN</td>\n",
   1733        "      <td>NaN</td>\n",
   1734        "      <td>NaN</td>\n",
   1735        "      <td>NaN</td>\n",
   1736        "      <td>12396.592</td>\n",
   1737        "      <td>NaN</td>\n",
   1738        "      <td>NaN</td>\n",
   1739        "      <td>24.489514</td>\n",
   1740        "      <td>...</td>\n",
   1741        "      <td>NaN</td>\n",
   1742        "      <td>15.510448</td>\n",
   1743        "      <td>107102.0</td>\n",
   1744        "      <td>NaN</td>\n",
   1745        "      <td>NaN</td>\n",
   1746        "      <td>NaN</td>\n",
   1747        "      <td>NaN</td>\n",
   1748        "      <td>NaN</td>\n",
   1749        "      <td>NaN</td>\n",
   1750        "      <td>NaN</td>\n",
   1751        "    </tr>\n",
   1752        "    <tr>\n",
   1753        "      <th>2014-01-01</th>\n",
   1754        "      <td>NaN</td>\n",
   1755        "      <td>NaN</td>\n",
   1756        "      <td>NaN</td>\n",
   1757        "      <td>NaN</td>\n",
   1758        "      <td>NaN</td>\n",
   1759        "      <td>NaN</td>\n",
   1760        "      <td>7661.568</td>\n",
   1761        "      <td>NaN</td>\n",
   1762        "      <td>NaN</td>\n",
   1763        "      <td>32.164433</td>\n",
   1764        "      <td>...</td>\n",
   1765        "      <td>NaN</td>\n",
   1766        "      <td>NaN</td>\n",
   1767        "      <td>NaN</td>\n",
   1768        "      <td>NaN</td>\n",
   1769        "      <td>17049.0</td>\n",
   1770        "      <td>NaN</td>\n",
   1771        "      <td>NaN</td>\n",
   1772        "      <td>30.897064</td>\n",
   1773        "      <td>55180.0</td>\n",
   1774        "      <td>NaN</td>\n",
   1775        "    </tr>\n",
   1776        "    <tr>\n",
   1777        "      <th>2015-01-01</th>\n",
   1778        "      <td>NaN</td>\n",
   1779        "      <td>NaN</td>\n",
   1780        "      <td>NaN</td>\n",
   1781        "      <td>NaN</td>\n",
   1782        "      <td>NaN</td>\n",
   1783        "      <td>NaN</td>\n",
   1784        "      <td>8359.876</td>\n",
   1785        "      <td>NaN</td>\n",
   1786        "      <td>NaN</td>\n",
   1787        "      <td>34.516416</td>\n",
   1788        "      <td>...</td>\n",
   1789        "      <td>NaN</td>\n",
   1790        "      <td>NaN</td>\n",
   1791        "      <td>NaN</td>\n",
   1792        "      <td>NaN</td>\n",
   1793        "      <td>16303.0</td>\n",
   1794        "      <td>NaN</td>\n",
   1795        "      <td>NaN</td>\n",
   1796        "      <td>29.518378</td>\n",
   1797        "      <td>55230.0</td>\n",
   1798        "      <td>NaN</td>\n",
   1799        "    </tr>\n",
   1800        "  </tbody>\n",
   1801        "</table>\n",
   1802        "<p>6 rows × 24 columns</p>\n",
   1803        "</div>"
   1804       ],
   1805       "text/plain": [
   1806        "Country            Japan                                                       \\\n",
   1807        "Frequency         Annual                                                        \n",
   1808        "Source       Survey data                                                        \n",
   1809        "Series     Union members            Trade union  density            Employees   \n",
   1810        "Measure        Thousands Percentage            Thousands Percentage Thousands   \n",
   1811        "Year                                                                            \n",
   1812        "2010-01-01           NaN        NaN                  NaN        NaN       NaN   \n",
   1813        "2011-01-01           NaN        NaN                  NaN        NaN       NaN   \n",
   1814        "2012-01-01           NaN        NaN                  NaN        NaN       NaN   \n",
   1815        "2013-01-01           NaN        NaN                  NaN        NaN       NaN   \n",
   1816        "2014-01-01           NaN        NaN                  NaN        NaN       NaN   \n",
   1817        "2015-01-01           NaN        NaN                  NaN        NaN       NaN   \n",
   1818        "\n",
   1819        "Country                                                                    \\\n",
   1820        "Frequency                                                                   \n",
   1821        "Source                Administrative data                                   \n",
   1822        "Series                      Union members            Trade union  density   \n",
   1823        "Measure    Percentage           Thousands Percentage            Thousands   \n",
   1824        "Year                                                                        \n",
   1825        "2010-01-01        NaN           12417.527        NaN                  NaN   \n",
   1826        "2011-01-01        NaN           12271.909        NaN                  NaN   \n",
   1827        "2012-01-01        NaN           12227.073        NaN                  NaN   \n",
   1828        "2013-01-01        NaN           12396.592        NaN                  NaN   \n",
   1829        "2014-01-01        NaN            7661.568        NaN                  NaN   \n",
   1830        "2015-01-01        NaN            8359.876        NaN                  NaN   \n",
   1831        "\n",
   1832        "Country                ...        United States                       \\\n",
   1833        "Frequency              ...               Annual                        \n",
   1834        "Source                 ...          Survey data                        \n",
   1835        "Series                 ... Trade union  density            Employees   \n",
   1836        "Measure    Percentage  ...            Thousands Percentage Thousands   \n",
   1837        "Year                   ...                                             \n",
   1838        "2010-01-01  28.871256  ...                  NaN  17.448617   97406.0   \n",
   1839        "2011-01-01  27.589723  ...                  NaN  16.516118  102403.0   \n",
   1840        "2012-01-01  25.899329  ...                  NaN  15.861734  106924.0   \n",
   1841        "2013-01-01  24.489514  ...                  NaN  15.510448  107102.0   \n",
   1842        "2014-01-01  32.164433  ...                  NaN        NaN       NaN   \n",
   1843        "2015-01-01  34.516416  ...                  NaN        NaN       NaN   \n",
   1844        "\n",
   1845        "Country                                                                    \\\n",
   1846        "Frequency                                                                   \n",
   1847        "Source                Administrative data                                   \n",
   1848        "Series                      Union members            Trade union  density   \n",
   1849        "Measure    Percentage           Thousands Percentage            Thousands   \n",
   1850        "Year                                                                        \n",
   1851        "2010-01-01        NaN                 NaN        NaN                  NaN   \n",
   1852        "2011-01-01        NaN                 NaN        NaN                  NaN   \n",
   1853        "2012-01-01        NaN                 NaN        NaN                  NaN   \n",
   1854        "2013-01-01        NaN                 NaN        NaN                  NaN   \n",
   1855        "2014-01-01        NaN             17049.0        NaN                  NaN   \n",
   1856        "2015-01-01        NaN             16303.0        NaN                  NaN   \n",
   1857        "\n",
   1858        "Country                                     \n",
   1859        "Frequency                                   \n",
   1860        "Source                                      \n",
   1861        "Series                Employees             \n",
   1862        "Measure    Percentage Thousands Percentage  \n",
   1863        "Year                                        \n",
   1864        "2010-01-01        NaN       NaN        NaN  \n",
   1865        "2011-01-01        NaN       NaN        NaN  \n",
   1866        "2012-01-01        NaN       NaN        NaN  \n",
   1867        "2013-01-01        NaN       NaN        NaN  \n",
   1868        "2014-01-01  30.897064   55180.0        NaN  \n",
   1869        "2015-01-01  29.518378   55230.0        NaN  \n",
   1870        "\n",
   1871        "[6 rows x 24 columns]"
   1872       ]
   1873      },
   1874      "execution_count": 20,
   1875      "metadata": {},
   1876      "output_type": "execute_result"
   1877     }
   1878    ],
   1879    "source": [
   1880     "df = web.DataReader('TUD', 'oecd', end='2015')\n",
   1881     "df[['Japan', 'United States']]"
   1882    ]
   1883   },
   1884   {
   1885    "cell_type": "markdown",
   1886    "metadata": {},
   1887    "source": [
   1888     "### EuroStat"
   1889    ]
   1890   },
   1891   {
   1892    "cell_type": "code",
   1893    "execution_count": 21,
   1894    "metadata": {},
   1895    "outputs": [],
   1896    "source": [
   1897     "df = web.DataReader('tran_sf_railac', 'eurostat')"
   1898    ]
   1899   },
   1900   {
   1901    "cell_type": "code",
   1902    "execution_count": 22,
   1903    "metadata": {},
   1904    "outputs": [
   1905     {
   1906      "data": {
   1907       "text/html": [
   1908        "<div>\n",
   1909        "<style scoped>\n",
   1910        "    .dataframe tbody tr th:only-of-type {\n",
   1911        "        vertical-align: middle;\n",
   1912        "    }\n",
   1913        "\n",
   1914        "    .dataframe tbody tr th {\n",
   1915        "        vertical-align: top;\n",
   1916        "    }\n",
   1917        "\n",
   1918        "    .dataframe thead tr th {\n",
   1919        "        text-align: left;\n",
   1920        "    }\n",
   1921        "\n",
   1922        "    .dataframe thead tr:last-of-type th {\n",
   1923        "        text-align: right;\n",
   1924        "    }\n",
   1925        "</style>\n",
   1926        "<table border=\"1\" class=\"dataframe\">\n",
   1927        "  <thead>\n",
   1928        "    <tr>\n",
   1929        "      <th>ACCIDENT</th>\n",
   1930        "      <th colspan=\"10\" halign=\"left\">Collisions of trains, including collisions with obstacles within the clearance gauge</th>\n",
   1931        "      <th>...</th>\n",
   1932        "      <th colspan=\"10\" halign=\"left\">Unknown</th>\n",
   1933        "    </tr>\n",
   1934        "    <tr>\n",
   1935        "      <th>UNIT</th>\n",
   1936        "      <th colspan=\"10\" halign=\"left\">Number</th>\n",
   1937        "      <th>...</th>\n",
   1938        "      <th colspan=\"10\" halign=\"left\">Number</th>\n",
   1939        "    </tr>\n",
   1940        "    <tr>\n",
   1941        "      <th>GEO</th>\n",
   1942        "      <th>Austria</th>\n",
   1943        "      <th>Belgium</th>\n",
   1944        "      <th>Bulgaria</th>\n",
   1945        "      <th>Switzerland</th>\n",
   1946        "      <th>Channel Tunnel</th>\n",
   1947        "      <th>Czechia</th>\n",
   1948        "      <th>Germany (until 1990 former territory of the FRG)</th>\n",
   1949        "      <th>Denmark</th>\n",
   1950        "      <th>Estonia</th>\n",
   1951        "      <th>Greece</th>\n",
   1952        "      <th>...</th>\n",
   1953        "      <th>Netherlands</th>\n",
   1954        "      <th>Norway</th>\n",
   1955        "      <th>Poland</th>\n",
   1956        "      <th>Portugal</th>\n",
   1957        "      <th>Romania</th>\n",
   1958        "      <th>Sweden</th>\n",
   1959        "      <th>Slovenia</th>\n",
   1960        "      <th>Slovakia</th>\n",
   1961        "      <th>Turkey</th>\n",
   1962        "      <th>United Kingdom</th>\n",
   1963        "    </tr>\n",
   1964        "    <tr>\n",
   1965        "      <th>FREQ</th>\n",
   1966        "      <th>Annual</th>\n",
   1967        "      <th>Annual</th>\n",
   1968        "      <th>Annual</th>\n",
   1969        "      <th>Annual</th>\n",
   1970        "      <th>Annual</th>\n",
   1971        "      <th>Annual</th>\n",
   1972        "      <th>Annual</th>\n",
   1973        "      <th>Annual</th>\n",
   1974        "      <th>Annual</th>\n",
   1975        "      <th>Annual</th>\n",
   1976        "      <th>...</th>\n",
   1977        "      <th>Annual</th>\n",
   1978        "      <th>Annual</th>\n",
   1979        "      <th>Annual</th>\n",
   1980        "      <th>Annual</th>\n",
   1981        "      <th>Annual</th>\n",
   1982        "      <th>Annual</th>\n",
   1983        "      <th>Annual</th>\n",
   1984        "      <th>Annual</th>\n",
   1985        "      <th>Annual</th>\n",
   1986        "      <th>Annual</th>\n",
   1987        "    </tr>\n",
   1988        "    <tr>\n",
   1989        "      <th>TIME_PERIOD</th>\n",
   1990        "      <th></th>\n",
   1991        "      <th></th>\n",
   1992        "      <th></th>\n",
   1993        "      <th></th>\n",
   1994        "      <th></th>\n",
   1995        "      <th></th>\n",
   1996        "      <th></th>\n",
   1997        "      <th></th>\n",
   1998        "      <th></th>\n",
   1999        "      <th></th>\n",
   2000        "      <th></th>\n",
   2001        "      <th></th>\n",
   2002        "      <th></th>\n",
   2003        "      <th></th>\n",
   2004        "      <th></th>\n",
   2005        "      <th></th>\n",
   2006        "      <th></th>\n",
   2007        "      <th></th>\n",
   2008        "      <th></th>\n",
   2009        "      <th></th>\n",
   2010        "      <th></th>\n",
   2011        "    </tr>\n",
   2012        "  </thead>\n",
   2013        "  <tbody>\n",
   2014        "    <tr>\n",
   2015        "      <th>2010-01-01</th>\n",
   2016        "      <td>3.0</td>\n",
   2017        "      <td>5.0</td>\n",
   2018        "      <td>2.0</td>\n",
   2019        "      <td>5.0</td>\n",
   2020        "      <td>0.0</td>\n",
   2021        "      <td>3.0</td>\n",
   2022        "      <td>13.0</td>\n",
   2023        "      <td>0.0</td>\n",
   2024        "      <td>1.0</td>\n",
   2025        "      <td>4.0</td>\n",
   2026        "      <td>...</td>\n",
   2027        "      <td>NaN</td>\n",
   2028        "      <td>NaN</td>\n",
   2029        "      <td>NaN</td>\n",
   2030        "      <td>NaN</td>\n",
   2031        "      <td>NaN</td>\n",
   2032        "      <td>NaN</td>\n",
   2033        "      <td>NaN</td>\n",
   2034        "      <td>NaN</td>\n",
   2035        "      <td>0.0</td>\n",
   2036        "      <td>NaN</td>\n",
   2037        "    </tr>\n",
   2038        "    <tr>\n",
   2039        "      <th>2011-01-01</th>\n",
   2040        "      <td>2.0</td>\n",
   2041        "      <td>0.0</td>\n",
   2042        "      <td>0.0</td>\n",
   2043        "      <td>4.0</td>\n",
   2044        "      <td>0.0</td>\n",
   2045        "      <td>6.0</td>\n",
   2046        "      <td>18.0</td>\n",
   2047        "      <td>1.0</td>\n",
   2048        "      <td>0.0</td>\n",
   2049        "      <td>1.0</td>\n",
   2050        "      <td>...</td>\n",
   2051        "      <td>NaN</td>\n",
   2052        "      <td>NaN</td>\n",
   2053        "      <td>NaN</td>\n",
   2054        "      <td>NaN</td>\n",
   2055        "      <td>NaN</td>\n",
   2056        "      <td>NaN</td>\n",
   2057        "      <td>NaN</td>\n",
   2058        "      <td>NaN</td>\n",
   2059        "      <td>0.0</td>\n",
   2060        "      <td>NaN</td>\n",
   2061        "    </tr>\n",
   2062        "    <tr>\n",
   2063        "      <th>2012-01-01</th>\n",
   2064        "      <td>1.0</td>\n",
   2065        "      <td>3.0</td>\n",
   2066        "      <td>3.0</td>\n",
   2067        "      <td>4.0</td>\n",
   2068        "      <td>0.0</td>\n",
   2069        "      <td>6.0</td>\n",
   2070        "      <td>23.0</td>\n",
   2071        "      <td>1.0</td>\n",
   2072        "      <td>3.0</td>\n",
   2073        "      <td>2.0</td>\n",
   2074        "      <td>...</td>\n",
   2075        "      <td>NaN</td>\n",
   2076        "      <td>NaN</td>\n",
   2077        "      <td>NaN</td>\n",
   2078        "      <td>NaN</td>\n",
   2079        "      <td>NaN</td>\n",
   2080        "      <td>NaN</td>\n",
   2081        "      <td>NaN</td>\n",
   2082        "      <td>NaN</td>\n",
   2083        "      <td>0.0</td>\n",
   2084        "      <td>NaN</td>\n",
   2085        "    </tr>\n",
   2086        "    <tr>\n",
   2087        "      <th>2013-01-01</th>\n",
   2088        "      <td>4.0</td>\n",
   2089        "      <td>1.0</td>\n",
   2090        "      <td>2.0</td>\n",
   2091        "      <td>6.0</td>\n",
   2092        "      <td>0.0</td>\n",
   2093        "      <td>5.0</td>\n",
   2094        "      <td>29.0</td>\n",
   2095        "      <td>0.0</td>\n",
   2096        "      <td>0.0</td>\n",
   2097        "      <td>2.0</td>\n",
   2098        "      <td>...</td>\n",
   2099        "      <td>NaN</td>\n",
   2100        "      <td>NaN</td>\n",
   2101        "      <td>NaN</td>\n",
   2102        "      <td>NaN</td>\n",
   2103        "      <td>NaN</td>\n",
   2104        "      <td>NaN</td>\n",
   2105        "      <td>NaN</td>\n",
   2106        "      <td>NaN</td>\n",
   2107        "      <td>0.0</td>\n",
   2108        "      <td>NaN</td>\n",
   2109        "    </tr>\n",
   2110        "    <tr>\n",
   2111        "      <th>2014-01-01</th>\n",
   2112        "      <td>1.0</td>\n",
   2113        "      <td>3.0</td>\n",
   2114        "      <td>4.0</td>\n",
   2115        "      <td>0.0</td>\n",
   2116        "      <td>0.0</td>\n",
   2117        "      <td>13.0</td>\n",
   2118        "      <td>32.0</td>\n",
   2119        "      <td>0.0</td>\n",
   2120        "      <td>0.0</td>\n",
   2121        "      <td>1.0</td>\n",
   2122        "      <td>...</td>\n",
   2123        "      <td>NaN</td>\n",
   2124        "      <td>NaN</td>\n",
   2125        "      <td>NaN</td>\n",
   2126        "      <td>NaN</td>\n",
   2127        "      <td>NaN</td>\n",
   2128        "      <td>NaN</td>\n",
   2129        "      <td>NaN</td>\n",
   2130        "      <td>NaN</td>\n",
   2131        "      <td>0.0</td>\n",
   2132        "      <td>NaN</td>\n",
   2133        "    </tr>\n",
   2134        "  </tbody>\n",
   2135        "</table>\n",
   2136        "<p>5 rows × 264 columns</p>\n",
   2137        "</div>"
   2138       ],
   2139       "text/plain": [
   2140        "ACCIDENT    Collisions of trains, including collisions with obstacles within the clearance gauge  \\\n",
   2141        "UNIT                                                                                      Number   \n",
   2142        "GEO                                                                                      Austria   \n",
   2143        "FREQ                                                                                      Annual   \n",
   2144        "TIME_PERIOD                                                                                        \n",
   2145        "2010-01-01                                                 3.0                                     \n",
   2146        "2011-01-01                                                 2.0                                     \n",
   2147        "2012-01-01                                                 1.0                                     \n",
   2148        "2013-01-01                                                 4.0                                     \n",
   2149        "2014-01-01                                                 1.0                                     \n",
   2150        "\n",
   2151        "ACCIDENT                                                         \\\n",
   2152        "UNIT                                                              \n",
   2153        "GEO         Belgium Bulgaria Switzerland Channel Tunnel Czechia   \n",
   2154        "FREQ         Annual   Annual      Annual         Annual  Annual   \n",
   2155        "TIME_PERIOD                                                       \n",
   2156        "2010-01-01      5.0      2.0         5.0            0.0     3.0   \n",
   2157        "2011-01-01      0.0      0.0         4.0            0.0     6.0   \n",
   2158        "2012-01-01      3.0      3.0         4.0            0.0     6.0   \n",
   2159        "2013-01-01      1.0      2.0         6.0            0.0     5.0   \n",
   2160        "2014-01-01      3.0      4.0         0.0            0.0    13.0   \n",
   2161        "\n",
   2162        "ACCIDENT                                                                      \\\n",
   2163        "UNIT                                                                           \n",
   2164        "GEO         Germany (until 1990 former territory of the FRG) Denmark Estonia   \n",
   2165        "FREQ                                                  Annual  Annual  Annual   \n",
   2166        "TIME_PERIOD                                                                    \n",
   2167        "2010-01-01                                              13.0     0.0     1.0   \n",
   2168        "2011-01-01                                              18.0     1.0     0.0   \n",
   2169        "2012-01-01                                              23.0     1.0     3.0   \n",
   2170        "2013-01-01                                              29.0     0.0     0.0   \n",
   2171        "2014-01-01                                              32.0     0.0     0.0   \n",
   2172        "\n",
   2173        "ACCIDENT            ...     Unknown                                        \\\n",
   2174        "UNIT                ...      Number                                         \n",
   2175        "GEO         Greece  ... Netherlands Norway Poland Portugal Romania Sweden   \n",
   2176        "FREQ        Annual  ...      Annual Annual Annual   Annual  Annual Annual   \n",
   2177        "TIME_PERIOD         ...                                                     \n",
   2178        "2010-01-01     4.0  ...         NaN    NaN    NaN      NaN     NaN    NaN   \n",
   2179        "2011-01-01     1.0  ...         NaN    NaN    NaN      NaN     NaN    NaN   \n",
   2180        "2012-01-01     2.0  ...         NaN    NaN    NaN      NaN     NaN    NaN   \n",
   2181        "2013-01-01     2.0  ...         NaN    NaN    NaN      NaN     NaN    NaN   \n",
   2182        "2014-01-01     1.0  ...         NaN    NaN    NaN      NaN     NaN    NaN   \n",
   2183        "\n",
   2184        "ACCIDENT                                             \n",
   2185        "UNIT                                                 \n",
   2186        "GEO         Slovenia Slovakia Turkey United Kingdom  \n",
   2187        "FREQ          Annual   Annual Annual         Annual  \n",
   2188        "TIME_PERIOD                                          \n",
   2189        "2010-01-01       NaN      NaN    0.0            NaN  \n",
   2190        "2011-01-01       NaN      NaN    0.0            NaN  \n",
   2191        "2012-01-01       NaN      NaN    0.0            NaN  \n",
   2192        "2013-01-01       NaN      NaN    0.0            NaN  \n",
   2193        "2014-01-01       NaN      NaN    0.0            NaN  \n",
   2194        "\n",
   2195        "[5 rows x 264 columns]"
   2196       ]
   2197      },
   2198      "execution_count": 22,
   2199      "metadata": {},
   2200      "output_type": "execute_result"
   2201     }
   2202    ],
   2203    "source": [
   2204     "df.head()"
   2205    ]
   2206   },
   2207   {
   2208    "cell_type": "markdown",
   2209    "metadata": {},
   2210    "source": [
   2211     "\n",
   2212     "\n",
   2213     "### Stooq"
   2214    ]
   2215   },
   2216   {
   2217    "cell_type": "markdown",
   2218    "metadata": {},
   2219    "source": [
   2220     "Google finance stopped providing common index data download. The Stooq site had this data for download for a while but is currently broken, awaiting release of [fix](https://github.com/pydata/pandas-datareader/issues/594)"
   2221    ]
   2222   },
   2223   {
   2224    "cell_type": "code",
   2225    "execution_count": 13,
   2226    "metadata": {
   2227     "ExecuteTime": {
   2228      "end_time": "2018-12-25T19:17:11.052846Z",
   2229      "start_time": "2018-12-25T19:17:08.631634Z"
   2230     }
   2231    },
   2232    "outputs": [
   2233     {
   2234      "data": {
   2235       "text/plain": [
   2236        "46"
   2237       ]
   2238      },
   2239      "execution_count": 13,
   2240      "metadata": {},
   2241      "output_type": "execute_result"
   2242     }
   2243    ],
   2244    "source": [
   2245     "index_url = 'https://stooq.com/t/'\n",
   2246     "ix = pd.read_html(index_url)\n",
   2247     "len(ix)"
   2248    ]
   2249   },
   2250   {
   2251    "cell_type": "code",
   2252    "execution_count": 14,
   2253    "metadata": {
   2254     "ExecuteTime": {
   2255      "end_time": "2018-12-25T19:17:11.497938Z",
   2256      "start_time": "2018-12-25T19:17:11.054004Z"
   2257     }
   2258    },
   2259    "outputs": [
   2260     {
   2261      "name": "stdout",
   2262      "output_type": "stream",
   2263      "text": [
   2264       "<class 'pandas.core.frame.DataFrame'>\n",
   2265       "Index: 0 entries\n",
   2266       "Empty DataFrame"
   2267      ]
   2268     }
   2269    ],
   2270    "source": [
   2271     "f = web.DataReader('^SPX', 'stooq', start='20000101')\n",
   2272     "f.info()"
   2273    ]
   2274   },
   2275   {
   2276    "cell_type": "code",
   2277    "execution_count": 15,
   2278    "metadata": {
   2279     "ExecuteTime": {
   2280      "end_time": "2018-12-25T19:17:11.521392Z",
   2281      "start_time": "2018-12-25T19:17:11.503799Z"
   2282     }
   2283    },
   2284    "outputs": [
   2285     {
   2286      "data": {
   2287       "text/html": [
   2288        "<div>\n",
   2289        "<style scoped>\n",
   2290        "    .dataframe tbody tr th:only-of-type {\n",
   2291        "        vertical-align: middle;\n",
   2292        "    }\n",
   2293        "\n",
   2294        "    .dataframe tbody tr th {\n",
   2295        "        vertical-align: top;\n",
   2296        "    }\n",
   2297        "\n",
   2298        "    .dataframe thead th {\n",
   2299        "        text-align: right;\n",
   2300        "    }\n",
   2301        "</style>\n",
   2302        "<table border=\"1\" class=\"dataframe\">\n",
   2303        "  <thead>\n",
   2304        "    <tr style=\"text-align: right;\">\n",
   2305        "      <th></th>\n",
   2306        "    </tr>\n",
   2307        "    <tr>\n",
   2308        "      <th>No data</th>\n",
   2309        "    </tr>\n",
   2310        "  </thead>\n",
   2311        "  <tbody>\n",
   2312        "  </tbody>\n",
   2313        "</table>\n",
   2314        "</div>"
   2315       ],
   2316       "text/plain": [
   2317        "Empty DataFrame\n",
   2318        "Columns: []\n",
   2319        "Index: []"
   2320       ]
   2321      },
   2322      "execution_count": 15,
   2323      "metadata": {},
   2324      "output_type": "execute_result"
   2325     }
   2326    ],
   2327    "source": [
   2328     "f.head()"
   2329    ]
   2330   },
   2331   {
   2332    "cell_type": "markdown",
   2333    "metadata": {},
   2334    "source": [
   2335     "### NASDAQ Symbols"
   2336    ]
   2337   },
   2338   {
   2339    "cell_type": "code",
   2340    "execution_count": 23,
   2341    "metadata": {
   2342     "ExecuteTime": {
   2343      "end_time": "2018-12-25T19:20:56.673795Z",
   2344      "start_time": "2018-12-25T19:20:55.625622Z"
   2345     }
   2346    },
   2347    "outputs": [
   2348     {
   2349      "name": "stdout",
   2350      "output_type": "stream",
   2351      "text": [
   2352       "<class 'pandas.core.frame.DataFrame'>\n",
   2353       "Index: 8701 entries, A to ZYXI\n",
   2354       "Data columns (total 11 columns):\n",
   2355       "Nasdaq Traded       8701 non-null bool\n",
   2356       "Security Name       8701 non-null object\n",
   2357       "Listing Exchange    8701 non-null category\n",
   2358       "Market Category     8701 non-null object\n",
   2359       "ETF                 8701 non-null bool\n",
   2360       "Round Lot Size      8701 non-null float64\n",
   2361       "Test Issue          8701 non-null bool\n",
   2362       "Financial Status    3411 non-null category\n",
   2363       "CQS Symbol          5290 non-null object\n",
   2364       "NASDAQ Symbol       8701 non-null object\n",
   2365       "NextShares          8701 non-null bool\n",
   2366       "dtypes: bool(4), category(2), float64(1), object(4)\n",
   2367       "memory usage: 459.2+ KB\n"
   2368      ]
   2369     }
   2370    ],
   2371    "source": [
   2372     "from pandas_datareader.nasdaq_trader import get_nasdaq_symbols\n",
   2373     "symbols = get_nasdaq_symbols()\n",
   2374     "symbols.info()"
   2375    ]
   2376   },
   2377   {
   2378    "cell_type": "code",
   2379    "execution_count": 24,
   2380    "metadata": {
   2381     "ExecuteTime": {
   2382      "end_time": "2018-12-25T19:20:59.748392Z",
   2383      "start_time": "2018-12-25T19:20:58.646206Z"
   2384     }
   2385    },
   2386    "outputs": [
   2387     {
   2388      "data": {
   2389       "text/plain": [
   2390        "4"
   2391       ]
   2392      },
   2393      "execution_count": 24,
   2394      "metadata": {},
   2395      "output_type": "execute_result"
   2396     }
   2397    ],
   2398    "source": [
   2399     "url = 'https://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NASDAQ'\n",
   2400     "res = pd.read_html(url)\n",
   2401     "len(res)"
   2402    ]
   2403   },
   2404   {
   2405    "cell_type": "code",
   2406    "execution_count": 25,
   2407    "metadata": {
   2408     "ExecuteTime": {
   2409      "end_time": "2018-12-25T19:21:02.385427Z",
   2410      "start_time": "2018-12-25T19:21:02.352487Z"
   2411     },
   2412     "scrolled": true
   2413    },
   2414    "outputs": [
   2415     {
   2416      "name": "stdout",
   2417      "output_type": "stream",
   2418      "text": [
   2419       "<class 'pandas.core.frame.DataFrame'>\n",
   2420       "RangeIndex: 1 entries, 0 to 0\n",
   2421       "Data columns (total 2 columns):\n",
   2422       "0    1 non-null object\n",
   2423       "1    1 non-null object\n",
   2424       "dtypes: object(2)\n",
   2425       "memory usage: 96.0+ bytes\n",
   2426       "None\n",
   2427       "<class 'pandas.core.frame.DataFrame'>\n",
   2428       "RangeIndex: 101 entries, 0 to 100\n",
   2429       "Data columns (total 6 columns):\n",
   2430       "Name          101 non-null object\n",
   2431       "Symbol        51 non-null object\n",
   2432       "Market Cap    47 non-null object\n",
   2433       "Country       51 non-null object\n",
   2434       "IPO Year      28 non-null object\n",
   2435       "Subsector     51 non-null object\n",
   2436       "dtypes: object(6)\n",
   2437       "memory usage: 4.8+ KB\n",
   2438       "None\n",
   2439       "<class 'pandas.core.frame.DataFrame'>\n",
   2440       "RangeIndex: 1 entries, 0 to 0\n",
   2441       "Data columns (total 1 columns):\n",
   2442       "0    1 non-null object\n",
   2443       "dtypes: object(1)\n",
   2444       "memory usage: 88.0+ bytes\n",
   2445       "None\n",
   2446       "<class 'pandas.core.frame.DataFrame'>\n",
   2447       "RangeIndex: 1 entries, 0 to 0\n",
   2448       "Data columns (total 1 columns):\n",
   2449       "0    1 non-null object\n",
   2450       "dtypes: object(1)\n",
   2451       "memory usage: 88.0+ bytes\n",
   2452       "None\n"
   2453      ]
   2454     }
   2455    ],
   2456    "source": [
   2457     "for r in res:\n",
   2458     "    print(r.info())"
   2459    ]
   2460   },
   2461   {
   2462    "cell_type": "markdown",
   2463    "metadata": {},
   2464    "source": [
   2465     "### Tiingo"
   2466    ]
   2467   },
   2468   {
   2469    "cell_type": "markdown",
   2470    "metadata": {},
   2471    "source": [
   2472     "Requires [signing up](https://api.tiingo.com/) and storing API key in environment"
   2473    ]
   2474   },
   2475   {
   2476    "cell_type": "code",
   2477    "execution_count": 26,
   2478    "metadata": {
   2479     "ExecuteTime": {
   2480      "end_time": "2018-12-25T19:21:26.396222Z",
   2481      "start_time": "2018-12-25T19:21:26.150734Z"
   2482     }
   2483    },
   2484    "outputs": [],
   2485    "source": [
   2486     "df = web.get_data_tiingo('GOOG', api_key=os.getenv('TIINGO_API_KEY'))"
   2487    ]
   2488   },
   2489   {
   2490    "cell_type": "code",
   2491    "execution_count": 27,
   2492    "metadata": {
   2493     "ExecuteTime": {
   2494      "end_time": "2018-12-25T19:21:27.003018Z",
   2495      "start_time": "2018-12-25T19:21:26.986133Z"
   2496     }
   2497    },
   2498    "outputs": [
   2499     {
   2500      "name": "stdout",
   2501      "output_type": "stream",
   2502      "text": [
   2503       "<class 'pandas.core.frame.DataFrame'>\n",
   2504       "MultiIndex: 1244 entries, (GOOG, 2014-03-27 00:00:00) to (GOOG, 2019-03-06 00:00:00)\n",
   2505       "Data columns (total 12 columns):\n",
   2506       "adjClose       1244 non-null float64\n",
   2507       "adjHigh        1244 non-null float64\n",
   2508       "adjLow         1244 non-null float64\n",
   2509       "adjOpen        1244 non-null float64\n",
   2510       "adjVolume      1244 non-null int64\n",
   2511       "close          1244 non-null float64\n",
   2512       "divCash        1244 non-null float64\n",
   2513       "high           1244 non-null float64\n",
   2514       "low            1244 non-null float64\n",
   2515       "open           1244 non-null float64\n",
   2516       "splitFactor    1244 non-null float64\n",
   2517       "volume         1244 non-null int64\n",
   2518       "dtypes: float64(10), int64(2)\n",
   2519       "memory usage: 130.1+ KB\n"
   2520      ]
   2521     }
   2522    ],
   2523    "source": [
   2524     "df.info()"
   2525    ]
   2526   }
   2527  ],
   2528  "metadata": {
   2529   "kernelspec": {
   2530    "display_name": "Python 3",
   2531    "language": "python",
   2532    "name": "python3"
   2533   },
   2534   "language_info": {
   2535    "codemirror_mode": {
   2536     "name": "ipython",
   2537     "version": 3
   2538    },
   2539    "file_extension": ".py",
   2540    "mimetype": "text/x-python",
   2541    "name": "python",
   2542    "nbconvert_exporter": "python",
   2543    "pygments_lexer": "ipython3",
   2544    "version": "3.7.3"
   2545   },
   2546   "toc": {
   2547    "base_numbering": 1,
   2548    "nav_menu": {},
   2549    "number_sections": true,
   2550    "sideBar": true,
   2551    "skip_h1_title": true,
   2552    "title_cell": "Table of Contents",
   2553    "title_sidebar": "Contents",
   2554    "toc_cell": false,
   2555    "toc_position": {},
   2556    "toc_section_display": true,
   2557    "toc_window_display": true
   2558   }
   2559  },
   2560  "nbformat": 4,
   2561  "nbformat_minor": 2
   2562 }