ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
01_datareader.ipynb
(88628B)
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "# Remote data access using pandas"
8 ]
9 },
10 {
11 "cell_type": "markdown",
12 "metadata": {},
13 "source": [
14 "The pandas library enables access to data displayed on websites using the `read_html()` function and access to the API endpoints of various data providers through the related `pandas-datareader` library."
15 ]
16 },
17 {
18 "cell_type": "code",
19 "execution_count": 1,
20 "metadata": {
21 "ExecuteTime": {
22 "end_time": "2018-12-25T19:14:54.455404Z",
23 "start_time": "2018-12-25T19:14:54.447513Z"
24 }
25 },
26 "outputs": [],
27 "source": [
28 "import os\n",
29 "import pandas_datareader.data as web\n",
30 "from datetime import datetime\n",
31 "from pprint import pprint"
32 ]
33 },
34 {
35 "cell_type": "markdown",
36 "metadata": {},
37 "source": [
38 "## Download html table with SP500 constituents"
39 ]
40 },
41 {
42 "cell_type": "markdown",
43 "metadata": {},
44 "source": [
45 "The download of the content of one or more html tables works as follows, for instance for the constituents of the S&P500 index from Wikipedia"
46 ]
47 },
48 {
49 "cell_type": "code",
50 "execution_count": 2,
51 "metadata": {
52 "ExecuteTime": {
53 "end_time": "2018-12-25T19:14:55.680031Z",
54 "start_time": "2018-12-25T19:14:54.460128Z"
55 }
56 },
57 "outputs": [],
58 "source": [
59 "sp_url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'\n",
60 "sp500_constituents = pd.read_html(sp_url, header=0)[0]"
61 ]
62 },
63 {
64 "cell_type": "code",
65 "execution_count": 3,
66 "metadata": {
67 "ExecuteTime": {
68 "end_time": "2018-12-25T19:14:55.685673Z",
69 "start_time": "2018-12-25T19:14:55.681237Z"
70 },
71 "scrolled": true
72 },
73 "outputs": [
74 {
75 "name": "stdout",
76 "output_type": "stream",
77 "text": [
78 "<class 'pandas.core.frame.DataFrame'>\n",
79 "RangeIndex: 505 entries, 0 to 504\n",
80 "Data columns (total 9 columns):\n",
81 "Security 505 non-null object\n",
82 "Symbol 505 non-null object\n",
83 "SEC filings 505 non-null object\n",
84 "GICS Sector 505 non-null object\n",
85 "GICS Sub Industry 505 non-null object\n",
86 "Headquarters Location 505 non-null object\n",
87 "Date first added 402 non-null object\n",
88 "CIK 505 non-null int64\n",
89 "Founded 172 non-null object\n",
90 "dtypes: int64(1), object(8)\n",
91 "memory usage: 35.6+ KB\n"
92 ]
93 }
94 ],
95 "source": [
96 "sp500_constituents.info()"
97 ]
98 },
99 {
100 "cell_type": "code",
101 "execution_count": 4,
102 "metadata": {
103 "ExecuteTime": {
104 "end_time": "2018-12-25T19:14:55.702503Z",
105 "start_time": "2018-12-25T19:14:55.686865Z"
106 }
107 },
108 "outputs": [
109 {
110 "data": {
111 "text/html": [
112 "<div>\n",
113 "<style scoped>\n",
114 " .dataframe tbody tr th:only-of-type {\n",
115 " vertical-align: middle;\n",
116 " }\n",
117 "\n",
118 " .dataframe tbody tr th {\n",
119 " vertical-align: top;\n",
120 " }\n",
121 "\n",
122 " .dataframe thead th {\n",
123 " text-align: right;\n",
124 " }\n",
125 "</style>\n",
126 "<table border=\"1\" class=\"dataframe\">\n",
127 " <thead>\n",
128 " <tr style=\"text-align: right;\">\n",
129 " <th></th>\n",
130 " <th>Security</th>\n",
131 " <th>Symbol</th>\n",
132 " <th>SEC filings</th>\n",
133 " <th>GICS Sector</th>\n",
134 " <th>GICS Sub Industry</th>\n",
135 " <th>Headquarters Location</th>\n",
136 " <th>Date first added</th>\n",
137 " <th>CIK</th>\n",
138 " <th>Founded</th>\n",
139 " </tr>\n",
140 " </thead>\n",
141 " <tbody>\n",
142 " <tr>\n",
143 " <th>0</th>\n",
144 " <td>3M Company</td>\n",
145 " <td>MMM</td>\n",
146 " <td>reports</td>\n",
147 " <td>Industrials</td>\n",
148 " <td>Industrial Conglomerates</td>\n",
149 " <td>St. Paul, Minnesota</td>\n",
150 " <td>NaN</td>\n",
151 " <td>66740</td>\n",
152 " <td>1902</td>\n",
153 " </tr>\n",
154 " <tr>\n",
155 " <th>1</th>\n",
156 " <td>Abbott Laboratories</td>\n",
157 " <td>ABT</td>\n",
158 " <td>reports</td>\n",
159 " <td>Health Care</td>\n",
160 " <td>Health Care Equipment</td>\n",
161 " <td>North Chicago, Illinois</td>\n",
162 " <td>1964-03-31</td>\n",
163 " <td>1800</td>\n",
164 " <td>1888</td>\n",
165 " </tr>\n",
166 " <tr>\n",
167 " <th>2</th>\n",
168 " <td>AbbVie Inc.</td>\n",
169 " <td>ABBV</td>\n",
170 " <td>reports</td>\n",
171 " <td>Health Care</td>\n",
172 " <td>Pharmaceuticals</td>\n",
173 " <td>North Chicago, Illinois</td>\n",
174 " <td>2012-12-31</td>\n",
175 " <td>1551152</td>\n",
176 " <td>2013 (1888)</td>\n",
177 " </tr>\n",
178 " <tr>\n",
179 " <th>3</th>\n",
180 " <td>ABIOMED Inc</td>\n",
181 " <td>ABMD</td>\n",
182 " <td>reports</td>\n",
183 " <td>Health Care</td>\n",
184 " <td>Health Care Equipment</td>\n",
185 " <td>Danvers, Massachusetts</td>\n",
186 " <td>2018-05-31</td>\n",
187 " <td>815094</td>\n",
188 " <td>1981</td>\n",
189 " </tr>\n",
190 " <tr>\n",
191 " <th>4</th>\n",
192 " <td>Accenture plc</td>\n",
193 " <td>ACN</td>\n",
194 " <td>reports</td>\n",
195 " <td>Information Technology</td>\n",
196 " <td>IT Consulting & Other Services</td>\n",
197 " <td>Dublin, Ireland</td>\n",
198 " <td>2011-07-06</td>\n",
199 " <td>1467373</td>\n",
200 " <td>1989</td>\n",
201 " </tr>\n",
202 " </tbody>\n",
203 "</table>\n",
204 "</div>"
205 ],
206 "text/plain": [
207 " Security Symbol SEC filings GICS Sector \\\n",
208 "0 3M Company MMM reports Industrials \n",
209 "1 Abbott Laboratories ABT reports Health Care \n",
210 "2 AbbVie Inc. ABBV reports Health Care \n",
211 "3 ABIOMED Inc ABMD reports Health Care \n",
212 "4 Accenture plc ACN reports Information Technology \n",
213 "\n",
214 " GICS Sub Industry Headquarters Location Date first added \\\n",
215 "0 Industrial Conglomerates St. Paul, Minnesota NaN \n",
216 "1 Health Care Equipment North Chicago, Illinois 1964-03-31 \n",
217 "2 Pharmaceuticals North Chicago, Illinois 2012-12-31 \n",
218 "3 Health Care Equipment Danvers, Massachusetts 2018-05-31 \n",
219 "4 IT Consulting & Other Services Dublin, Ireland 2011-07-06 \n",
220 "\n",
221 " CIK Founded \n",
222 "0 66740 1902 \n",
223 "1 1800 1888 \n",
224 "2 1551152 2013 (1888) \n",
225 "3 815094 1981 \n",
226 "4 1467373 1989 "
227 ]
228 },
229 "execution_count": 4,
230 "metadata": {},
231 "output_type": "execute_result"
232 }
233 ],
234 "source": [
235 "sp500_constituents.head()"
236 ]
237 },
238 {
239 "cell_type": "markdown",
240 "metadata": {},
241 "source": [
242 "## pandas-datareader for Market Data"
243 ]
244 },
245 {
246 "cell_type": "markdown",
247 "metadata": {},
248 "source": [
249 "`pandas` used to facilitate access to data providers' APIs directly, but this functionality has moved to the related pandas-datareader library. The stability of the APIs varies with provider policies, and as of June 2o18 at version 0.7, the following sources are available"
250 ]
251 },
252 {
253 "cell_type": "markdown",
254 "metadata": {},
255 "source": [
256 "See [documentation](https://pandas-datareader.readthedocs.io/en/latest/); functionality frequently changes as underlying provider APIs evolve."
257 ]
258 },
259 {
260 "cell_type": "markdown",
261 "metadata": {},
262 "source": [
263 "### Yahoo Finance"
264 ]
265 },
266 {
267 "cell_type": "code",
268 "execution_count": 5,
269 "metadata": {
270 "ExecuteTime": {
271 "end_time": "2018-12-25T19:14:56.812409Z",
272 "start_time": "2018-12-25T19:14:55.703363Z"
273 }
274 },
275 "outputs": [
276 {
277 "name": "stdout",
278 "output_type": "stream",
279 "text": [
280 "<class 'pandas.core.frame.DataFrame'>\n",
281 "DatetimeIndex: 856 entries, 2014-01-02 to 2017-05-25\n",
282 "Data columns (total 6 columns):\n",
283 "High 856 non-null float64\n",
284 "Low 856 non-null float64\n",
285 "Open 856 non-null float64\n",
286 "Close 856 non-null float64\n",
287 "Volume 856 non-null int64\n",
288 "Adj Close 856 non-null float64\n",
289 "dtypes: float64(5), int64(1)\n",
290 "memory usage: 46.8 KB\n"
291 ]
292 }
293 ],
294 "source": [
295 "start = '2014'\n",
296 "end = datetime(2017, 5, 24)\n",
297 "\n",
298 "yahoo= web.DataReader('FB', 'yahoo', start=start, end=end)\n",
299 "yahoo.info()"
300 ]
301 },
302 {
303 "cell_type": "markdown",
304 "metadata": {},
305 "source": [
306 "### IEX"
307 ]
308 },
309 {
310 "cell_type": "markdown",
311 "metadata": {},
312 "source": [
313 "IEX is an alternative exchange started in response to the HFT controversy and portrayed in Michael Lewis' controversial Flash Boys. It aims to slow down the speed of trading to create a more level playing field and has been growing rapidly since launch in 2016 while still small with a market share of around 2.5% in June 2018."
314 ]
315 },
316 {
317 "cell_type": "code",
318 "execution_count": 6,
319 "metadata": {
320 "ExecuteTime": {
321 "end_time": "2018-12-25T19:14:57.093439Z",
322 "start_time": "2018-12-25T19:14:56.813719Z"
323 }
324 },
325 "outputs": [
326 {
327 "name": "stdout",
328 "output_type": "stream",
329 "text": [
330 "<class 'pandas.core.frame.DataFrame'>\n",
331 "Index: 1054 entries, 2015-02-09 to 2019-04-16\n",
332 "Data columns (total 5 columns):\n",
333 "open 1054 non-null float64\n",
334 "high 1054 non-null float64\n",
335 "low 1054 non-null float64\n",
336 "close 1054 non-null float64\n",
337 "volume 1054 non-null int64\n",
338 "dtypes: float64(4), int64(1)\n",
339 "memory usage: 49.4+ KB\n"
340 ]
341 }
342 ],
343 "source": [
344 "start = datetime(2015, 2, 9)\n",
345 "# end = datetime(2017, 5, 24)\n",
346 "\n",
347 "iex = web.DataReader('FB', 'iex', start)\n",
348 "iex.info()"
349 ]
350 },
351 {
352 "cell_type": "code",
353 "execution_count": 7,
354 "metadata": {
355 "ExecuteTime": {
356 "end_time": "2018-12-25T19:14:57.108760Z",
357 "start_time": "2018-12-25T19:14:57.095941Z"
358 }
359 },
360 "outputs": [
361 {
362 "data": {
363 "text/html": [
364 "<div>\n",
365 "<style scoped>\n",
366 " .dataframe tbody tr th:only-of-type {\n",
367 " vertical-align: middle;\n",
368 " }\n",
369 "\n",
370 " .dataframe tbody tr th {\n",
371 " vertical-align: top;\n",
372 " }\n",
373 "\n",
374 " .dataframe thead th {\n",
375 " text-align: right;\n",
376 " }\n",
377 "</style>\n",
378 "<table border=\"1\" class=\"dataframe\">\n",
379 " <thead>\n",
380 " <tr style=\"text-align: right;\">\n",
381 " <th></th>\n",
382 " <th>open</th>\n",
383 " <th>high</th>\n",
384 " <th>low</th>\n",
385 " <th>close</th>\n",
386 " <th>volume</th>\n",
387 " </tr>\n",
388 " <tr>\n",
389 " <th>date</th>\n",
390 " <th></th>\n",
391 " <th></th>\n",
392 " <th></th>\n",
393 " <th></th>\n",
394 " <th></th>\n",
395 " </tr>\n",
396 " </thead>\n",
397 " <tbody>\n",
398 " <tr>\n",
399 " <th>2019-04-10</th>\n",
400 " <td>178.18</td>\n",
401 " <td>178.79</td>\n",
402 " <td>176.54</td>\n",
403 " <td>177.82</td>\n",
404 " <td>11701479</td>\n",
405 " </tr>\n",
406 " <tr>\n",
407 " <th>2019-04-11</th>\n",
408 " <td>178.24</td>\n",
409 " <td>178.40</td>\n",
410 " <td>177.00</td>\n",
411 " <td>177.51</td>\n",
412 " <td>8070967</td>\n",
413 " </tr>\n",
414 " <tr>\n",
415 " <th>2019-04-12</th>\n",
416 " <td>178.00</td>\n",
417 " <td>179.63</td>\n",
418 " <td>177.95</td>\n",
419 " <td>179.10</td>\n",
420 " <td>12329812</td>\n",
421 " </tr>\n",
422 " <tr>\n",
423 " <th>2019-04-15</th>\n",
424 " <td>178.50</td>\n",
425 " <td>180.50</td>\n",
426 " <td>176.87</td>\n",
427 " <td>179.65</td>\n",
428 " <td>10834762</td>\n",
429 " </tr>\n",
430 " <tr>\n",
431 " <th>2019-04-16</th>\n",
432 " <td>179.00</td>\n",
433 " <td>180.17</td>\n",
434 " <td>178.30</td>\n",
435 " <td>178.87</td>\n",
436 " <td>11215193</td>\n",
437 " </tr>\n",
438 " </tbody>\n",
439 "</table>\n",
440 "</div>"
441 ],
442 "text/plain": [
443 " open high low close volume\n",
444 "date \n",
445 "2019-04-10 178.18 178.79 176.54 177.82 11701479\n",
446 "2019-04-11 178.24 178.40 177.00 177.51 8070967\n",
447 "2019-04-12 178.00 179.63 177.95 179.10 12329812\n",
448 "2019-04-15 178.50 180.50 176.87 179.65 10834762\n",
449 "2019-04-16 179.00 180.17 178.30 178.87 11215193"
450 ]
451 },
452 "execution_count": 7,
453 "metadata": {},
454 "output_type": "execute_result"
455 }
456 ],
457 "source": [
458 "iex.tail()"
459 ]
460 },
461 {
462 "cell_type": "markdown",
463 "metadata": {},
464 "source": [
465 "#### Book Data\n",
466 "\n",
467 "In addition to historical EOD price and volume data, IEX provides real-time depth of book quotations that offer an aggregated size of orders by price and side. This service also includes last trade price and size information.\n",
468 "\n",
469 "DEEP is used to receive real-time depth of book quotations direct from IEX. The depth of book quotations received via DEEP provide an aggregated size of resting displayed orders at a price and side, and do not indicate the size or number of individual orders at any price level. Non-displayed orders and non-displayed portions of reserve orders are not represented in DEEP.\n",
470 "\n",
471 "DEEP also provides last trade price and size information. Trades resulting from either displayed or non-displayed orders matching on IEX will be reported. Routed executions will not be reported."
472 ]
473 },
474 {
475 "cell_type": "markdown",
476 "metadata": {},
477 "source": [
478 "Only works on trading days."
479 ]
480 },
481 {
482 "cell_type": "code",
483 "execution_count": 8,
484 "metadata": {
485 "ExecuteTime": {
486 "end_time": "2018-12-25T19:14:57.278095Z",
487 "start_time": "2018-12-25T19:14:57.110743Z"
488 }
489 },
490 "outputs": [],
491 "source": [
492 "book = web.get_iex_book('AAPL')"
493 ]
494 },
495 {
496 "cell_type": "code",
497 "execution_count": 9,
498 "metadata": {
499 "ExecuteTime": {
500 "end_time": "2018-12-25T19:14:57.302122Z",
501 "start_time": "2018-12-25T19:14:57.289530Z"
502 }
503 },
504 "outputs": [
505 {
506 "data": {
507 "text/plain": [
508 "['symbol',\n",
509 " 'marketPercent',\n",
510 " 'volume',\n",
511 " 'lastSalePrice',\n",
512 " 'lastSaleSize',\n",
513 " 'lastSaleTime',\n",
514 " 'lastUpdated',\n",
515 " 'bids',\n",
516 " 'asks',\n",
517 " 'systemEvent',\n",
518 " 'tradingStatus',\n",
519 " 'opHaltStatus',\n",
520 " 'ssrStatus',\n",
521 " 'securityEvent',\n",
522 " 'trades',\n",
523 " 'tradeBreaks']"
524 ]
525 },
526 "execution_count": 9,
527 "metadata": {},
528 "output_type": "execute_result"
529 }
530 ],
531 "source": [
532 "list(book.keys())"
533 ]
534 },
535 {
536 "cell_type": "code",
537 "execution_count": 10,
538 "metadata": {
539 "ExecuteTime": {
540 "end_time": "2018-12-25T19:14:57.318139Z",
541 "start_time": "2018-12-25T19:14:57.307227Z"
542 }
543 },
544 "outputs": [
545 {
546 "data": {
547 "text/html": [
548 "<div>\n",
549 "<style scoped>\n",
550 " .dataframe tbody tr th:only-of-type {\n",
551 " vertical-align: middle;\n",
552 " }\n",
553 "\n",
554 " .dataframe tbody tr th {\n",
555 " vertical-align: top;\n",
556 " }\n",
557 "\n",
558 " .dataframe thead th {\n",
559 " text-align: right;\n",
560 " }\n",
561 "</style>\n",
562 "<table border=\"1\" class=\"dataframe\">\n",
563 " <thead>\n",
564 " <tr style=\"text-align: right;\">\n",
565 " <th></th>\n",
566 " <th>side</th>\n",
567 " </tr>\n",
568 " </thead>\n",
569 " <tbody>\n",
570 " </tbody>\n",
571 "</table>\n",
572 "</div>"
573 ],
574 "text/plain": [
575 "Empty DataFrame\n",
576 "Columns: [side]\n",
577 "Index: []"
578 ]
579 },
580 "execution_count": 10,
581 "metadata": {},
582 "output_type": "execute_result"
583 }
584 ],
585 "source": [
586 "orders = pd.concat([pd.DataFrame(book[side]).assign(side=side) for side in ['bids', 'asks']])\n",
587 "orders.head()"
588 ]
589 },
590 {
591 "cell_type": "code",
592 "execution_count": 11,
593 "metadata": {
594 "ExecuteTime": {
595 "end_time": "2018-12-25T19:14:57.333155Z",
596 "start_time": "2018-12-25T19:14:57.319823Z"
597 }
598 },
599 "outputs": [
600 {
601 "name": "stdout",
602 "output_type": "stream",
603 "text": [
604 "\n",
605 "symbol\n",
606 "AAPL\n",
607 "\n",
608 "marketPercent\n",
609 "0.03324\n",
610 "\n",
611 "volume\n",
612 "977659\n",
613 "\n",
614 "lastSalePrice\n",
615 "203.19\n",
616 "\n",
617 "lastSaleSize\n",
618 "3\n",
619 "\n",
620 "lastSaleTime\n",
621 "1555531318248\n",
622 "\n",
623 "lastUpdated\n",
624 "1555532174025\n",
625 "\n",
626 "bids\n",
627 "Empty DataFrame\n",
628 "Columns: []\n",
629 "Index: []\n",
630 "\n",
631 "asks\n",
632 "Empty DataFrame\n",
633 "Columns: []\n",
634 "Index: []\n",
635 "\n",
636 "systemEvent\n",
637 "{'systemEvent': 'C', 'timestamp': 1555535400001}\n",
638 "\n",
639 "tradingStatus\n",
640 "{'status': 'T', 'reason': ' ', 'timestamp': 1555500532036}\n",
641 "\n",
642 "opHaltStatus\n",
643 "{'isHalted': False, 'timestamp': 1555500532036}\n",
644 "\n",
645 "ssrStatus\n",
646 "{'isSSR': False, 'detail': ' ', 'timestamp': 1555500532036}\n",
647 "\n",
648 "securityEvent\n",
649 "{'securityEvent': 'MarketClose', 'timestamp': 1555531200000}\n",
650 "\n",
651 "trades\n",
652 " isISO isOddLot isOutsideRegularHours isSinglePriceCross \\\n",
653 "0 True True True False \n",
654 "1 False False False False \n",
655 "2 True False False False \n",
656 "3 False False False False \n",
657 "4 True False False False \n",
658 "5 False False False False \n",
659 "6 False False False False \n",
660 "7 True False False False \n",
661 "8 True False False False \n",
662 "9 True False False False \n",
663 "10 False True False False \n",
664 "11 False False False False \n",
665 "12 True True False False \n",
666 "13 False False False False \n",
667 "14 False False False False \n",
668 "15 False False False False \n",
669 "16 True True False False \n",
670 "17 True False False False \n",
671 "18 True True False False \n",
672 "19 False False False False \n",
673 "\n",
674 " isTradeThroughExempt price size timestamp tradeId \n",
675 "0 False 203.190 3 1555531318248 891604355 \n",
676 "1 False 203.200 100 1555531197248 890272160 \n",
677 "2 False 203.140 100 1555531195857 889977488 \n",
678 "3 False 203.155 100 1555531195465 889859713 \n",
679 "4 False 203.140 100 1555531195292 889808657 \n",
680 "5 False 203.110 100 1555531194852 889619571 \n",
681 "6 False 203.100 100 1555531194822 889606541 \n",
682 "7 False 203.180 100 1555531194794 889585630 \n",
683 "8 True 203.180 100 1555531194794 889585556 \n",
684 "9 False 203.180 100 1555531191476 889002606 \n",
685 "10 False 203.190 25 1555531190110 888694678 \n",
686 "11 False 203.140 100 1555531187948 888262843 \n",
687 "12 False 203.160 64 1555531187684 888228377 \n",
688 "13 False 203.160 100 1555531187682 888227956 \n",
689 "14 False 203.160 100 1555531187681 888227469 \n",
690 "15 False 203.160 100 1555531187680 888227241 \n",
691 "16 False 203.160 36 1555531187680 888227079 \n",
692 "17 False 203.160 100 1555531187680 888227061 \n",
693 "18 False 203.170 9 1555531185364 887813837 \n",
694 "19 False 203.150 200 1555531184053 887531882 \n",
695 "\n",
696 "tradeBreaks\n",
697 "Empty DataFrame\n",
698 "Columns: []\n",
699 "Index: []\n"
700 ]
701 }
702 ],
703 "source": [
704 "for key in book.keys():\n",
705 " try:\n",
706 " print(f'\\n{key}')\n",
707 " print(pd.DataFrame(book[key]))\n",
708 " except:\n",
709 " print(book[key])"
710 ]
711 },
712 {
713 "cell_type": "code",
714 "execution_count": 12,
715 "metadata": {
716 "ExecuteTime": {
717 "end_time": "2018-12-25T19:14:57.420316Z",
718 "start_time": "2018-12-25T19:14:57.334612Z"
719 }
720 },
721 "outputs": [
722 {
723 "data": {
724 "text/html": [
725 "<div>\n",
726 "<style scoped>\n",
727 " .dataframe tbody tr th:only-of-type {\n",
728 " vertical-align: middle;\n",
729 " }\n",
730 "\n",
731 " .dataframe tbody tr th {\n",
732 " vertical-align: top;\n",
733 " }\n",
734 "\n",
735 " .dataframe thead th {\n",
736 " text-align: right;\n",
737 " }\n",
738 "</style>\n",
739 "<table border=\"1\" class=\"dataframe\">\n",
740 " <thead>\n",
741 " <tr style=\"text-align: right;\">\n",
742 " <th></th>\n",
743 " <th>isISO</th>\n",
744 " <th>isOddLot</th>\n",
745 " <th>isOutsideRegularHours</th>\n",
746 " <th>isSinglePriceCross</th>\n",
747 " <th>isTradeThroughExempt</th>\n",
748 " <th>price</th>\n",
749 " <th>size</th>\n",
750 " <th>timestamp</th>\n",
751 " <th>tradeId</th>\n",
752 " </tr>\n",
753 " </thead>\n",
754 " <tbody>\n",
755 " <tr>\n",
756 " <th>0</th>\n",
757 " <td>True</td>\n",
758 " <td>True</td>\n",
759 " <td>True</td>\n",
760 " <td>False</td>\n",
761 " <td>False</td>\n",
762 " <td>203.190</td>\n",
763 " <td>3</td>\n",
764 " <td>1555531318248</td>\n",
765 " <td>891604355</td>\n",
766 " </tr>\n",
767 " <tr>\n",
768 " <th>1</th>\n",
769 " <td>False</td>\n",
770 " <td>False</td>\n",
771 " <td>False</td>\n",
772 " <td>False</td>\n",
773 " <td>False</td>\n",
774 " <td>203.200</td>\n",
775 " <td>100</td>\n",
776 " <td>1555531197248</td>\n",
777 " <td>890272160</td>\n",
778 " </tr>\n",
779 " <tr>\n",
780 " <th>2</th>\n",
781 " <td>True</td>\n",
782 " <td>False</td>\n",
783 " <td>False</td>\n",
784 " <td>False</td>\n",
785 " <td>False</td>\n",
786 " <td>203.140</td>\n",
787 " <td>100</td>\n",
788 " <td>1555531195857</td>\n",
789 " <td>889977488</td>\n",
790 " </tr>\n",
791 " <tr>\n",
792 " <th>3</th>\n",
793 " <td>False</td>\n",
794 " <td>False</td>\n",
795 " <td>False</td>\n",
796 " <td>False</td>\n",
797 " <td>False</td>\n",
798 " <td>203.155</td>\n",
799 " <td>100</td>\n",
800 " <td>1555531195465</td>\n",
801 " <td>889859713</td>\n",
802 " </tr>\n",
803 " <tr>\n",
804 " <th>4</th>\n",
805 " <td>True</td>\n",
806 " <td>False</td>\n",
807 " <td>False</td>\n",
808 " <td>False</td>\n",
809 " <td>False</td>\n",
810 " <td>203.140</td>\n",
811 " <td>100</td>\n",
812 " <td>1555531195292</td>\n",
813 " <td>889808657</td>\n",
814 " </tr>\n",
815 " </tbody>\n",
816 "</table>\n",
817 "</div>"
818 ],
819 "text/plain": [
820 " isISO isOddLot isOutsideRegularHours isSinglePriceCross \\\n",
821 "0 True True True False \n",
822 "1 False False False False \n",
823 "2 True False False False \n",
824 "3 False False False False \n",
825 "4 True False False False \n",
826 "\n",
827 " isTradeThroughExempt price size timestamp tradeId \n",
828 "0 False 203.190 3 1555531318248 891604355 \n",
829 "1 False 203.200 100 1555531197248 890272160 \n",
830 "2 False 203.140 100 1555531195857 889977488 \n",
831 "3 False 203.155 100 1555531195465 889859713 \n",
832 "4 False 203.140 100 1555531195292 889808657 "
833 ]
834 },
835 "execution_count": 12,
836 "metadata": {},
837 "output_type": "execute_result"
838 }
839 ],
840 "source": [
841 "pd.DataFrame(book['trades']).head()"
842 ]
843 },
844 {
845 "cell_type": "markdown",
846 "metadata": {},
847 "source": [
848 "### Quandl"
849 ]
850 },
851 {
852 "cell_type": "code",
853 "execution_count": 13,
854 "metadata": {
855 "ExecuteTime": {
856 "end_time": "2018-12-25T19:15:07.500237Z",
857 "start_time": "2018-12-25T19:15:06.978720Z"
858 }
859 },
860 "outputs": [
861 {
862 "name": "stdout",
863 "output_type": "stream",
864 "text": [
865 "<class 'pandas.core.frame.DataFrame'>\n",
866 "DatetimeIndex: 813 entries, 2018-03-27 to 2015-01-02\n",
867 "Data columns (total 12 columns):\n",
868 "Open 813 non-null float64\n",
869 "High 813 non-null float64\n",
870 "Low 813 non-null float64\n",
871 "Close 813 non-null float64\n",
872 "Volume 813 non-null float64\n",
873 "ExDividend 813 non-null float64\n",
874 "SplitRatio 813 non-null float64\n",
875 "AdjOpen 813 non-null float64\n",
876 "AdjHigh 813 non-null float64\n",
877 "AdjLow 813 non-null float64\n",
878 "AdjClose 813 non-null float64\n",
879 "AdjVolume 813 non-null float64\n",
880 "dtypes: float64(12)\n",
881 "memory usage: 82.6 KB\n"
882 ]
883 }
884 ],
885 "source": [
886 "symbol = 'FB.US'\n",
887 "\n",
888 "quandl = web.DataReader(symbol, 'quandl', '2015-01-01')\n",
889 "quandl.info()"
890 ]
891 },
892 {
893 "cell_type": "markdown",
894 "metadata": {},
895 "source": [
896 "### FRED"
897 ]
898 },
899 {
900 "cell_type": "code",
901 "execution_count": 14,
902 "metadata": {
903 "ExecuteTime": {
904 "end_time": "2018-12-25T19:15:07.662461Z",
905 "start_time": "2018-12-25T19:15:07.501573Z"
906 }
907 },
908 "outputs": [
909 {
910 "name": "stdout",
911 "output_type": "stream",
912 "text": [
913 "<class 'pandas.core.frame.DataFrame'>\n",
914 "DatetimeIndex: 13 entries, 2010-01-01 to 2013-01-01\n",
915 "Data columns (total 1 columns):\n",
916 "GDP 13 non-null float64\n",
917 "dtypes: float64(1)\n",
918 "memory usage: 208.0 bytes\n"
919 ]
920 }
921 ],
922 "source": [
923 "start = datetime(2010, 1, 1)\n",
924 "\n",
925 "end = datetime(2013, 1, 27)\n",
926 "\n",
927 "gdp = web.DataReader('GDP', 'fred', start, end)\n",
928 "\n",
929 "gdp.info()"
930 ]
931 },
932 {
933 "cell_type": "code",
934 "execution_count": 15,
935 "metadata": {
936 "ExecuteTime": {
937 "end_time": "2018-12-25T19:15:09.584269Z",
938 "start_time": "2018-12-25T19:15:09.399866Z"
939 }
940 },
941 "outputs": [
942 {
943 "name": "stdout",
944 "output_type": "stream",
945 "text": [
946 "<class 'pandas.core.frame.DataFrame'>\n",
947 "DatetimeIndex: 37 entries, 2010-01-01 to 2013-01-01\n",
948 "Freq: MS\n",
949 "Data columns (total 2 columns):\n",
950 "CPIAUCSL 37 non-null float64\n",
951 "CPILFESL 37 non-null float64\n",
952 "dtypes: float64(2)\n",
953 "memory usage: 888.0 bytes\n"
954 ]
955 }
956 ],
957 "source": [
958 "inflation = web.DataReader(['CPIAUCSL', 'CPILFESL'], 'fred', start, end)\n",
959 "inflation.info()"
960 ]
961 },
962 {
963 "cell_type": "markdown",
964 "metadata": {},
965 "source": [
966 "### Fama/French"
967 ]
968 },
969 {
970 "cell_type": "code",
971 "execution_count": 16,
972 "metadata": {
973 "ExecuteTime": {
974 "end_time": "2018-12-25T19:15:10.764850Z",
975 "start_time": "2018-12-25T19:15:10.755565Z"
976 }
977 },
978 "outputs": [
979 {
980 "data": {
981 "text/plain": [
982 "['F-F_Research_Data_Factors',\n",
983 " 'F-F_Research_Data_Factors_weekly',\n",
984 " 'F-F_Research_Data_Factors_daily',\n",
985 " 'F-F_Research_Data_5_Factors_2x3',\n",
986 " 'F-F_Research_Data_5_Factors_2x3_daily',\n",
987 " 'Portfolios_Formed_on_ME',\n",
988 " 'Portfolios_Formed_on_ME_Wout_Div',\n",
989 " 'Portfolios_Formed_on_ME_Daily',\n",
990 " 'Portfolios_Formed_on_BE-ME',\n",
991 " 'Portfolios_Formed_on_BE-ME_Wout_Div',\n",
992 " 'Portfolios_Formed_on_BE-ME_Daily',\n",
993 " 'Portfolios_Formed_on_OP',\n",
994 " 'Portfolios_Formed_on_OP_Wout_Div',\n",
995 " 'Portfolios_Formed_on_INV',\n",
996 " 'Portfolios_Formed_on_INV_Wout_Div',\n",
997 " '6_Portfolios_2x3',\n",
998 " '6_Portfolios_2x3_Wout_Div',\n",
999 " '6_Portfolios_2x3_weekly',\n",
1000 " '6_Portfolios_2x3_daily',\n",
1001 " '25_Portfolios_5x5',\n",
1002 " '25_Portfolios_5x5_Wout_Div',\n",
1003 " '25_Portfolios_5x5_Daily',\n",
1004 " '100_Portfolios_10x10',\n",
1005 " '100_Portfolios_10x10_Wout_Div',\n",
1006 " '100_Portfolios_10x10_Daily',\n",
1007 " '6_Portfolios_ME_OP_2x3',\n",
1008 " '6_Portfolios_ME_OP_2x3_Wout_Div',\n",
1009 " '6_Portfolios_ME_OP_2x3_daily',\n",
1010 " '25_Portfolios_ME_OP_5x5',\n",
1011 " '25_Portfolios_ME_OP_5x5_Wout_Div',\n",
1012 " '25_Portfolios_ME_OP_5x5_daily',\n",
1013 " '100_Portfolios_ME_OP_10x10',\n",
1014 " '100_Portfolios_10x10_ME_OP_Wout_Div',\n",
1015 " '100_Portfolios_ME_OP_10x10_daily',\n",
1016 " '6_Portfolios_ME_INV_2x3',\n",
1017 " '6_Portfolios_ME_INV_2x3_Wout_Div',\n",
1018 " '6_Portfolios_ME_INV_2x3_daily',\n",
1019 " '25_Portfolios_ME_INV_5x5',\n",
1020 " '25_Portfolios_ME_INV_5x5_Wout_Div',\n",
1021 " '25_Portfolios_ME_INV_5x5_daily',\n",
1022 " '100_Portfolios_ME_INV_10x10',\n",
1023 " '100_Portfolios_10x10_ME_INV_Wout_Div',\n",
1024 " '100_Portfolios_ME_INV_10x10_daily',\n",
1025 " '25_Portfolios_BEME_OP_5x5',\n",
1026 " '25_Portfolios_BEME_OP_5x5_Wout_Div',\n",
1027 " '25_Portfolios_BEME_OP_5x5_daily',\n",
1028 " '25_Portfolios_BEME_INV_5x5',\n",
1029 " '25_Portfolios_BEME_INV_5x5_Wout_Div',\n",
1030 " '25_Portfolios_BEME_INV_5x5_daily',\n",
1031 " '25_Portfolios_OP_INV_5x5',\n",
1032 " '25_Portfolios_OP_INV_5x5_Wout_Div',\n",
1033 " '25_Portfolios_OP_INV_5x5_daily',\n",
1034 " '32_Portfolios_ME_BEME_OP_2x4x4',\n",
1035 " '32_Portfolios_ME_BEME_OP_2x4x4_Wout_Div',\n",
1036 " '32_Portfolios_ME_BEME_INV_2x4x4',\n",
1037 " '32_Portfolios_ME_BEME_INV_2x4x4_Wout_Div',\n",
1038 " '32_Portfolios_ME_OP_INV_2x4x4',\n",
1039 " '32_Portfolios_ME_OP_INV_2x4x4_Wout_Div',\n",
1040 " 'Portfolios_Formed_on_E-P',\n",
1041 " 'Portfolios_Formed_on_E-P_Wout_Div',\n",
1042 " 'Portfolios_Formed_on_CF-P',\n",
1043 " 'Portfolios_Formed_on_CF-P_Wout_Div',\n",
1044 " 'Portfolios_Formed_on_D-P',\n",
1045 " 'Portfolios_Formed_on_D-P_Wout_Div',\n",
1046 " '6_Portfolios_ME_EP_2x3',\n",
1047 " '6_Portfolios_ME_EP_2x3_Wout_Div',\n",
1048 " '6_Portfolios_ME_CFP_2x3',\n",
1049 " '6_Portfolios_ME_CFP_2x3_Wout_Div',\n",
1050 " '6_Portfolios_ME_DP_2x3',\n",
1051 " '6_Portfolios_ME_DP_2x3_Wout_Div',\n",
1052 " 'F-F_Momentum_Factor',\n",
1053 " 'F-F_Momentum_Factor_daily',\n",
1054 " '6_Portfolios_ME_Prior_12_2',\n",
1055 " '6_Portfolios_ME_Prior_12_2_Daily',\n",
1056 " '25_Portfolios_ME_Prior_12_2',\n",
1057 " '25_Portfolios_ME_Prior_12_2_Daily',\n",
1058 " '10_Portfolios_Prior_12_2',\n",
1059 " '10_Portfolios_Prior_12_2_Daily',\n",
1060 " 'F-F_ST_Reversal_Factor',\n",
1061 " 'F-F_ST_Reversal_Factor_daily',\n",
1062 " '6_Portfolios_ME_Prior_1_0',\n",
1063 " '6_Portfolios_ME_Prior_1_0_Daily',\n",
1064 " '25_Portfolios_ME_Prior_1_0',\n",
1065 " '25_Portfolios_ME_Prior_1_0_Daily',\n",
1066 " '10_Portfolios_Prior_1_0',\n",
1067 " '10_Portfolios_Prior_1_0_Daily',\n",
1068 " 'F-F_LT_Reversal_Factor',\n",
1069 " 'F-F_LT_Reversal_Factor_daily',\n",
1070 " '6_Portfolios_ME_Prior_60_13',\n",
1071 " '6_Portfolios_ME_Prior_60_13_Daily',\n",
1072 " '25_Portfolios_ME_Prior_60_13',\n",
1073 " '25_Portfolios_ME_Prior_60_13_Daily',\n",
1074 " '10_Portfolios_Prior_60_13',\n",
1075 " '10_Portfolios_Prior_60_13_Daily',\n",
1076 " 'Portfolios_Formed_on_AC',\n",
1077 " '25_Portfolios_ME_AC_5x5',\n",
1078 " 'Portfolios_Formed_on_BETA',\n",
1079 " '25_Portfolios_ME_BETA_5x5',\n",
1080 " 'Portfolios_Formed_on_NI',\n",
1081 " '25_Portfolios_ME_NI_5x5',\n",
1082 " 'Portfolios_Formed_on_VAR',\n",
1083 " '25_Portfolios_ME_VAR_5x5',\n",
1084 " 'Portfolios_Formed_on_RESVAR',\n",
1085 " '25_Portfolios_ME_RESVAR_5x5',\n",
1086 " '5_Industry_Portfolios',\n",
1087 " '5_Industry_Portfolios_Wout_Div',\n",
1088 " '5_Industry_Portfolios_daily',\n",
1089 " '10_Industry_Portfolios',\n",
1090 " '10_Industry_Portfolios_Wout_Div',\n",
1091 " '10_Industry_Portfolios_daily',\n",
1092 " '12_Industry_Portfolios',\n",
1093 " '12_Industry_Portfolios_Wout_Div',\n",
1094 " '12_Industry_Portfolios_daily',\n",
1095 " '17_Industry_Portfolios',\n",
1096 " '17_Industry_Portfolios_Wout_Div',\n",
1097 " '17_Industry_Portfolios_daily',\n",
1098 " '30_Industry_Portfolios',\n",
1099 " '30_Industry_Portfolios_Wout_Div',\n",
1100 " '30_Industry_Portfolios_daily',\n",
1101 " '38_Industry_Portfolios',\n",
1102 " '38_Industry_Portfolios_Wout_Div',\n",
1103 " '38_Industry_Portfolios_daily',\n",
1104 " '48_Industry_Portfolios',\n",
1105 " '48_Industry_Portfolios_Wout_Div',\n",
1106 " '48_Industry_Portfolios_daily',\n",
1107 " '49_Industry_Portfolios',\n",
1108 " '49_Industry_Portfolios_Wout_Div',\n",
1109 " '49_Industry_Portfolios_daily',\n",
1110 " 'ME_Breakpoints',\n",
1111 " 'BE-ME_Breakpoints',\n",
1112 " 'OP_Breakpoints',\n",
1113 " 'INV_Breakpoints',\n",
1114 " 'E-P_Breakpoints',\n",
1115 " 'CF-P_Breakpoints',\n",
1116 " 'D-P_Breakpoints',\n",
1117 " 'Prior_2-12_Breakpoints',\n",
1118 " 'Global_3_Factors',\n",
1119 " 'Global_3_Factors_Daily',\n",
1120 " 'Global_ex_US_3_Factors',\n",
1121 " 'Global_ex_US_3_Factors_Daily',\n",
1122 " 'Europe_3_Factors',\n",
1123 " 'Europe_3_Factors_Daily',\n",
1124 " 'Japan_3_Factors',\n",
1125 " 'Japan_3_Factors_Daily',\n",
1126 " 'Asia_Pacific_ex_Japan_3_Factors',\n",
1127 " 'Asia_Pacific_ex_Japan_3_Factors_Daily',\n",
1128 " 'North_America_3_Factors',\n",
1129 " 'North_America_3_Factors_Daily',\n",
1130 " 'Global_5_Factors',\n",
1131 " 'Global_5_Factors_Daily',\n",
1132 " 'Global_ex_US_5_Factors',\n",
1133 " 'Global_ex_US_5_Factors_Daily',\n",
1134 " 'Europe_5_Factors',\n",
1135 " 'Europe_5_Factors_Daily',\n",
1136 " 'Japan_5_Factors',\n",
1137 " 'Japan_5_Factors_Daily',\n",
1138 " 'Asia_Pacific_ex_Japan_5_Factors',\n",
1139 " 'Asia_Pacific_ex_Japan_5_Factors_Daily',\n",
1140 " 'North_America_5_Factors',\n",
1141 " 'North_America_5_Factors_Daily',\n",
1142 " 'Global_Mom_Factor',\n",
1143 " 'Global_Mom_Factor_Daily',\n",
1144 " 'Global_ex_US_Mom_Factor',\n",
1145 " 'Global_ex_US_Mom_Factor_Daily',\n",
1146 " 'Europe_Mom_Factor',\n",
1147 " 'Europe_Mom_Factor_Daily',\n",
1148 " 'Japan_Mom_Factor',\n",
1149 " 'Japan_Mom_Factor_Daily',\n",
1150 " 'Asia_Pacific_ex_Japan_MOM_Factor',\n",
1151 " 'Asia_Pacific_ex_Japan_MOM_Factor_Daily',\n",
1152 " 'North_America_Mom_Factor',\n",
1153 " 'North_America_Mom_Factor_Daily',\n",
1154 " 'Global_6_Portfolios_ME_BE-ME',\n",
1155 " 'Global_6_Portfolios_ME_BE-ME_daily',\n",
1156 " 'Global_ex_US_6_Portfolios_ME_BE-ME',\n",
1157 " 'Global_ex_US_6_Portfolios_ME_BE-ME_daily',\n",
1158 " 'Europe_6_Portfolios_ME_BE-ME',\n",
1159 " 'Europe_6_Portfolios_ME_BE-ME_daily',\n",
1160 " 'Japan_6_Portfolios_ME_BE-ME',\n",
1161 " 'Japan_6_Portfolios_ME_BE-ME_daily',\n",
1162 " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_BE-ME',\n",
1163 " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_BE-ME_daily',\n",
1164 " 'North_America_6_Portfolios_ME_BE-ME',\n",
1165 " 'North_America_6_Portfolios_ME_BE-ME_daily',\n",
1166 " 'Global_25_Portfolios_ME_BE-ME',\n",
1167 " 'Global_25_Portfolios_ME_BE-ME_daily',\n",
1168 " 'Global_ex_US_25_Portfolios_ME_BE-ME',\n",
1169 " 'Global_ex_US_25_Portfolios_ME_BE-ME_daily',\n",
1170 " 'Europe_25_Portfolios_ME_BE-ME',\n",
1171 " 'Europe_25_Portfolios_ME_BE-ME_daily',\n",
1172 " 'Japan_25_Portfolios_ME_BE-ME',\n",
1173 " 'Japan_25_Portfolios_ME_BE-ME_daily',\n",
1174 " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_BE-ME',\n",
1175 " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_BE-ME_daily',\n",
1176 " 'North_America_25_Portfolios_ME_BE-ME',\n",
1177 " 'North_America_25_Portfolios_ME_BE-ME_daily',\n",
1178 " 'Global_6_Portfolios_ME_OP',\n",
1179 " 'Global_6_Portfolios_ME_OP_Daily',\n",
1180 " 'Global_ex_US_6_Portfolios_ME_OP',\n",
1181 " 'Global_ex_US_6_Portfolios_ME_OP_Daily',\n",
1182 " 'Europe_6_Portfolios_ME_OP',\n",
1183 " 'Europe_6_Portfolios_ME_OP_Daily',\n",
1184 " 'Japan_6_Portfolios_ME_OP',\n",
1185 " 'Japan_6_Portfolios_ME_OP_Daily',\n",
1186 " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_OP',\n",
1187 " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_OP_Daily',\n",
1188 " 'North_America_6_Portfolios_ME_OP',\n",
1189 " 'North_America_6_Portfolios_ME_OP_Daily',\n",
1190 " 'Global_25_Portfolios_ME_OP',\n",
1191 " 'Global_25_Portfolios_ME_OP_Daily',\n",
1192 " 'Global_ex_US_25_Portfolios_ME_OP',\n",
1193 " 'Global_ex_US_25_Portfolios_ME_OP_Daily',\n",
1194 " 'Europe_25_Portfolios_ME_OP',\n",
1195 " 'Europe_25_Portfolios_ME_OP_Daily',\n",
1196 " 'Japan_25_Portfolios_ME_OP',\n",
1197 " 'Japan_25_Portfolios_ME_OP_Daily',\n",
1198 " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_OP',\n",
1199 " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_OP_Daily',\n",
1200 " 'North_America_25_Portfolios_ME_OP',\n",
1201 " 'North_America_25_Portfolios_ME_OP_Daily',\n",
1202 " 'Global_6_Portfolios_ME_INV',\n",
1203 " 'Global_6_Portfolios_ME_INV_Daily',\n",
1204 " 'Global_ex_US_6_Portfolios_ME_INV',\n",
1205 " 'Global_ex_US_6_Portfolios_ME_INV_Daily',\n",
1206 " 'Europe_6_Portfolios_ME_INV',\n",
1207 " 'Europe_6_Portfolios_ME_INV_Daily',\n",
1208 " 'Japan_6_Portfolios_ME_INV',\n",
1209 " 'Japan_6_Portfolios_ME_INV_Daily',\n",
1210 " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_INV',\n",
1211 " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_INV_Daily',\n",
1212 " 'North_America_6_Portfolios_ME_INV',\n",
1213 " 'North_America_6_Portfolios_ME_INV_Daily',\n",
1214 " 'Global_25_Portfolios_ME_INV',\n",
1215 " 'Global_25_Portfolios_ME_INV_Daily',\n",
1216 " 'Global_ex_US_25_Portfolios_ME_INV',\n",
1217 " 'Global_ex_US_25_Portfolios_ME_INV_Daily',\n",
1218 " 'Europe_25_Portfolios_ME_INV',\n",
1219 " 'Europe_25_Portfolios_ME_INV_Daily',\n",
1220 " 'Japan_25_Portfolios_ME_INV',\n",
1221 " 'Japan_25_Portfolios_ME_INV_Daily',\n",
1222 " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_INV',\n",
1223 " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_INV_Daily',\n",
1224 " 'North_America_25_Portfolios_ME_INV',\n",
1225 " 'North_America_25_Portfolios_ME_INV_Daily',\n",
1226 " 'Global_6_Portfolios_ME_Prior_12_2',\n",
1227 " 'Global_6_Portfolios_ME_Prior_250_20_daily',\n",
1228 " 'Global_ex_US_6_Portfolios_ME_Prior_12_2',\n",
1229 " 'Global_ex_US_6_Portfolios_ME_Prior_250_20_daily',\n",
1230 " 'Europe_6_Portfolios_ME_Prior_12_2',\n",
1231 " 'Europe_6_Portfolios_ME_Prior_250_20_daily',\n",
1232 " 'Japan_6_Portfolios_ME_Prior_12_2',\n",
1233 " 'Japan_6_Portfolios_ME_Prior_250_20_daily',\n",
1234 " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_Prior_12_2',\n",
1235 " 'Asia_Pacific_ex_Japan_6_Portfolios_ME_Prior_250_20_daily',\n",
1236 " 'North_America_6_Portfolios_ME_Prior_12_2',\n",
1237 " 'North_America_6_Portfolios_ME_Prior_250_20_daily',\n",
1238 " 'Global_25_Portfolios_ME_Prior_12_2',\n",
1239 " 'Global_25_Portfolios_ME_Prior_250_20_daily',\n",
1240 " 'Global_ex_US_25_Portfolios_ME_Prior_12_2',\n",
1241 " 'Global_ex_US_25_Portfolios_ME_Prior_250_20_daily',\n",
1242 " 'Europe_25_Portfolios_ME_Prior_12_2',\n",
1243 " 'Europe_25_Portfolios_ME_Prior_250_20_daily',\n",
1244 " 'Japan_25_Portfolios_ME_Prior_12_2',\n",
1245 " 'Japan_25_Portfolios_ME_Prior_250_20_daily',\n",
1246 " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_Prior_12_2',\n",
1247 " 'Asia_Pacific_ex_Japan_25_Portfolios_ME_Prior_250_20_daily',\n",
1248 " 'North_America_25_Portfolios_ME_Prior_12_2',\n",
1249 " 'North_America_25_Portfolios_ME_Prior_250_20_daily',\n",
1250 " 'Global_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
1251 " 'Global_ex_US_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
1252 " 'Europe_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
1253 " 'Japan_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
1254 " 'Asia_Pacific_ex_Japan_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
1255 " 'North_America_32_Portfolios_ME_BE-ME_OP_2x4x4',\n",
1256 " 'Global_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
1257 " 'Global_ex_US_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
1258 " 'Europe_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
1259 " 'Japan_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
1260 " 'Asia_Pacific_ex_Japan_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
1261 " 'North_America_32_Portfolios_ME_BE-ME_INV(TA)_2x4x4',\n",
1262 " 'Global_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
1263 " 'Global_ex_US_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
1264 " 'Europe_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
1265 " 'Japan_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
1266 " 'Asia_Pacific_ex_Japan_32_Portfolios_ME_INV(TA)_OP_2x4x4',\n",
1267 " 'North_America_32_Portfolios_ME_INV(TA)_OP_2x4x4']"
1268 ]
1269 },
1270 "execution_count": 16,
1271 "metadata": {},
1272 "output_type": "execute_result"
1273 }
1274 ],
1275 "source": [
1276 "from pandas_datareader.famafrench import get_available_datasets\n",
1277 "get_available_datasets()"
1278 ]
1279 },
1280 {
1281 "cell_type": "code",
1282 "execution_count": 17,
1283 "metadata": {
1284 "ExecuteTime": {
1285 "end_time": "2018-12-25T19:16:23.570899Z",
1286 "start_time": "2018-12-25T19:16:23.299817Z"
1287 }
1288 },
1289 "outputs": [
1290 {
1291 "name": "stdout",
1292 "output_type": "stream",
1293 "text": [
1294 "5 Industry Portfolios\n",
1295 "---------------------\n",
1296 "\n",
1297 "This file was created by CMPT_IND_RETS using the 201901 CRSP database. It contains value- and equal-weighted returns for 5 industry portfolios. The portfolios are constructed at the end of June. The annual returns are from January to December. Missing data are indicated by -99.99 or -999. Copyright 2019 Kenneth R. French\n",
1298 "\n",
1299 " 0 : Average Value Weighted Returns -- Monthly (109 rows x 5 cols)\n",
1300 " 1 : Average Equal Weighted Returns -- Monthly (109 rows x 5 cols)\n",
1301 " 2 : Average Value Weighted Returns -- Annual (9 rows x 5 cols)\n",
1302 " 3 : Average Equal Weighted Returns -- Annual (9 rows x 5 cols)\n",
1303 " 4 : Number of Firms in Portfolios (109 rows x 5 cols)\n",
1304 " 5 : Average Firm Size (109 rows x 5 cols)\n",
1305 " 6 : Sum of BE / Sum of ME (9 rows x 5 cols)\n",
1306 " 7 : Value-Weighted Average of BE/ME (9 rows x 5 cols)\n"
1307 ]
1308 }
1309 ],
1310 "source": [
1311 "ds = web.DataReader('5_Industry_Portfolios', 'famafrench')\n",
1312 "print(ds['DESCR'])"
1313 ]
1314 },
1315 {
1316 "cell_type": "markdown",
1317 "metadata": {},
1318 "source": [
1319 "### World Bank"
1320 ]
1321 },
1322 {
1323 "cell_type": "code",
1324 "execution_count": 16,
1325 "metadata": {},
1326 "outputs": [
1327 {
1328 "data": {
1329 "text/html": [
1330 "<div>\n",
1331 "<style scoped>\n",
1332 " .dataframe tbody tr th:only-of-type {\n",
1333 " vertical-align: middle;\n",
1334 " }\n",
1335 "\n",
1336 " .dataframe tbody tr th {\n",
1337 " vertical-align: top;\n",
1338 " }\n",
1339 "\n",
1340 " .dataframe thead th {\n",
1341 " text-align: right;\n",
1342 " }\n",
1343 "</style>\n",
1344 "<table border=\"1\" class=\"dataframe\">\n",
1345 " <thead>\n",
1346 " <tr style=\"text-align: right;\">\n",
1347 " <th></th>\n",
1348 " <th>id</th>\n",
1349 " <th>name</th>\n",
1350 " <th>source</th>\n",
1351 " <th>sourceNote</th>\n",
1352 " <th>sourceOrganization</th>\n",
1353 " <th>topics</th>\n",
1354 " <th>unit</th>\n",
1355 " </tr>\n",
1356 " </thead>\n",
1357 " <tbody>\n",
1358 " <tr>\n",
1359 " <th>646</th>\n",
1360 " <td>6.0.GDPpc_constant</td>\n",
1361 " <td>GDP per capita, PPP (constant 2011 internation...</td>\n",
1362 " <td>LAC Equity Lab</td>\n",
1363 " <td>GDP per capita based on purchasing power parit...</td>\n",
1364 " <td>b'World Development Indicators (World Bank)'</td>\n",
1365 " <td>Economy & Growth</td>\n",
1366 " <td></td>\n",
1367 " </tr>\n",
1368 " <tr>\n",
1369 " <th>9108</th>\n",
1370 " <td>NY.GDP.PCAP.KD</td>\n",
1371 " <td>GDP per capita (constant 2010 US$)</td>\n",
1372 " <td>World Development Indicators</td>\n",
1373 " <td>GDP per capita is gross domestic product divid...</td>\n",
1374 " <td>b'World Bank national accounts data, and OECD ...</td>\n",
1375 " <td>Economy & Growth</td>\n",
1376 " <td></td>\n",
1377 " </tr>\n",
1378 " <tr>\n",
1379 " <th>9110</th>\n",
1380 " <td>NY.GDP.PCAP.KN</td>\n",
1381 " <td>GDP per capita (constant LCU)</td>\n",
1382 " <td>World Development Indicators</td>\n",
1383 " <td>GDP per capita is gross domestic product divid...</td>\n",
1384 " <td>b'World Bank national accounts data, and OECD ...</td>\n",
1385 " <td>Economy & Growth</td>\n",
1386 " <td></td>\n",
1387 " </tr>\n",
1388 " <tr>\n",
1389 " <th>9112</th>\n",
1390 " <td>NY.GDP.PCAP.PP.KD</td>\n",
1391 " <td>GDP per capita, PPP (constant 2011 internation...</td>\n",
1392 " <td>World Development Indicators</td>\n",
1393 " <td>GDP per capita based on purchasing power parit...</td>\n",
1394 " <td>b'World Bank, International Comparison Program...</td>\n",
1395 " <td>Economy & Growth</td>\n",
1396 " <td></td>\n",
1397 " </tr>\n",
1398 " <tr>\n",
1399 " <th>9113</th>\n",
1400 " <td>NY.GDP.PCAP.PP.KD.87</td>\n",
1401 " <td>GDP per capita, PPP (constant 1987 internation...</td>\n",
1402 " <td>WDI Database Archives</td>\n",
1403 " <td></td>\n",
1404 " <td>b''</td>\n",
1405 " <td></td>\n",
1406 " <td></td>\n",
1407 " </tr>\n",
1408 " </tbody>\n",
1409 "</table>\n",
1410 "</div>"
1411 ],
1412 "text/plain": [
1413 " id name \\\n",
1414 "646 6.0.GDPpc_constant GDP per capita, PPP (constant 2011 internation... \n",
1415 "9108 NY.GDP.PCAP.KD GDP per capita (constant 2010 US$) \n",
1416 "9110 NY.GDP.PCAP.KN GDP per capita (constant LCU) \n",
1417 "9112 NY.GDP.PCAP.PP.KD GDP per capita, PPP (constant 2011 internation... \n",
1418 "9113 NY.GDP.PCAP.PP.KD.87 GDP per capita, PPP (constant 1987 internation... \n",
1419 "\n",
1420 " source \\\n",
1421 "646 LAC Equity Lab \n",
1422 "9108 World Development Indicators \n",
1423 "9110 World Development Indicators \n",
1424 "9112 World Development Indicators \n",
1425 "9113 WDI Database Archives \n",
1426 "\n",
1427 " sourceNote \\\n",
1428 "646 GDP per capita based on purchasing power parit... \n",
1429 "9108 GDP per capita is gross domestic product divid... \n",
1430 "9110 GDP per capita is gross domestic product divid... \n",
1431 "9112 GDP per capita based on purchasing power parit... \n",
1432 "9113 \n",
1433 "\n",
1434 " sourceOrganization topics unit \n",
1435 "646 b'World Development Indicators (World Bank)' Economy & Growth \n",
1436 "9108 b'World Bank national accounts data, and OECD ... Economy & Growth \n",
1437 "9110 b'World Bank national accounts data, and OECD ... Economy & Growth \n",
1438 "9112 b'World Bank, International Comparison Program... Economy & Growth \n",
1439 "9113 b'' "
1440 ]
1441 },
1442 "execution_count": 16,
1443 "metadata": {},
1444 "output_type": "execute_result"
1445 }
1446 ],
1447 "source": [
1448 "from pandas_datareader import wb\n",
1449 "gdp_variables = wb.search('gdp.*capita.*const')\n",
1450 "gdp_variables.head()"
1451 ]
1452 },
1453 {
1454 "cell_type": "code",
1455 "execution_count": 18,
1456 "metadata": {},
1457 "outputs": [
1458 {
1459 "data": {
1460 "text/html": [
1461 "<div>\n",
1462 "<style scoped>\n",
1463 " .dataframe tbody tr th:only-of-type {\n",
1464 " vertical-align: middle;\n",
1465 " }\n",
1466 "\n",
1467 " .dataframe tbody tr th {\n",
1468 " vertical-align: top;\n",
1469 " }\n",
1470 "\n",
1471 " .dataframe thead th {\n",
1472 " text-align: right;\n",
1473 " }\n",
1474 "</style>\n",
1475 "<table border=\"1\" class=\"dataframe\">\n",
1476 " <thead>\n",
1477 " <tr style=\"text-align: right;\">\n",
1478 " <th></th>\n",
1479 " <th></th>\n",
1480 " <th>NY.GDP.PCAP.KD</th>\n",
1481 " </tr>\n",
1482 " <tr>\n",
1483 " <th>country</th>\n",
1484 " <th>year</th>\n",
1485 " <th></th>\n",
1486 " </tr>\n",
1487 " </thead>\n",
1488 " <tbody>\n",
1489 " <tr>\n",
1490 " <th rowspan=\"5\" valign=\"top\">Canada</th>\n",
1491 " <th>2018</th>\n",
1492 " <td>NaN</td>\n",
1493 " </tr>\n",
1494 " <tr>\n",
1495 " <th>2017</th>\n",
1496 " <td>51315.888975</td>\n",
1497 " </tr>\n",
1498 " <tr>\n",
1499 " <th>2016</th>\n",
1500 " <td>50407.341330</td>\n",
1501 " </tr>\n",
1502 " <tr>\n",
1503 " <th>2015</th>\n",
1504 " <td>50303.836848</td>\n",
1505 " </tr>\n",
1506 " <tr>\n",
1507 " <th>2014</th>\n",
1508 " <td>50221.841982</td>\n",
1509 " </tr>\n",
1510 " </tbody>\n",
1511 "</table>\n",
1512 "</div>"
1513 ],
1514 "text/plain": [
1515 " NY.GDP.PCAP.KD\n",
1516 "country year \n",
1517 "Canada 2018 NaN\n",
1518 " 2017 51315.888975\n",
1519 " 2016 50407.341330\n",
1520 " 2015 50303.836848\n",
1521 " 2014 50221.841982"
1522 ]
1523 },
1524 "execution_count": 18,
1525 "metadata": {},
1526 "output_type": "execute_result"
1527 }
1528 ],
1529 "source": [
1530 "wb_data = wb.download(indicator='NY.GDP.PCAP.KD', \n",
1531 " country=['US', 'CA', 'MX'], \n",
1532 " start=1990, \n",
1533 " end=2019)\n",
1534 "wb_data.head()"
1535 ]
1536 },
1537 {
1538 "cell_type": "markdown",
1539 "metadata": {},
1540 "source": [
1541 "### OECD"
1542 ]
1543 },
1544 {
1545 "cell_type": "code",
1546 "execution_count": 20,
1547 "metadata": {},
1548 "outputs": [
1549 {
1550 "data": {
1551 "text/html": [
1552 "<div>\n",
1553 "<style scoped>\n",
1554 " .dataframe tbody tr th:only-of-type {\n",
1555 " vertical-align: middle;\n",
1556 " }\n",
1557 "\n",
1558 " .dataframe tbody tr th {\n",
1559 " vertical-align: top;\n",
1560 " }\n",
1561 "\n",
1562 " .dataframe thead tr th {\n",
1563 " text-align: left;\n",
1564 " }\n",
1565 "\n",
1566 " .dataframe thead tr:last-of-type th {\n",
1567 " text-align: right;\n",
1568 " }\n",
1569 "</style>\n",
1570 "<table border=\"1\" class=\"dataframe\">\n",
1571 " <thead>\n",
1572 " <tr>\n",
1573 " <th>Country</th>\n",
1574 " <th colspan=\"10\" halign=\"left\">Japan</th>\n",
1575 " <th>...</th>\n",
1576 " <th colspan=\"10\" halign=\"left\">United States</th>\n",
1577 " </tr>\n",
1578 " <tr>\n",
1579 " <th>Frequency</th>\n",
1580 " <th colspan=\"10\" halign=\"left\">Annual</th>\n",
1581 " <th>...</th>\n",
1582 " <th colspan=\"10\" halign=\"left\">Annual</th>\n",
1583 " </tr>\n",
1584 " <tr>\n",
1585 " <th>Source</th>\n",
1586 " <th colspan=\"6\" halign=\"left\">Survey data</th>\n",
1587 " <th colspan=\"4\" halign=\"left\">Administrative data</th>\n",
1588 " <th>...</th>\n",
1589 " <th colspan=\"4\" halign=\"left\">Survey data</th>\n",
1590 " <th colspan=\"6\" halign=\"left\">Administrative data</th>\n",
1591 " </tr>\n",
1592 " <tr>\n",
1593 " <th>Series</th>\n",
1594 " <th colspan=\"2\" halign=\"left\">Union members</th>\n",
1595 " <th colspan=\"2\" halign=\"left\">Trade union density</th>\n",
1596 " <th colspan=\"2\" halign=\"left\">Employees</th>\n",
1597 " <th colspan=\"2\" halign=\"left\">Union members</th>\n",
1598 " <th colspan=\"2\" halign=\"left\">Trade union density</th>\n",
1599 " <th>...</th>\n",
1600 " <th colspan=\"2\" halign=\"left\">Trade union density</th>\n",
1601 " <th colspan=\"2\" halign=\"left\">Employees</th>\n",
1602 " <th colspan=\"2\" halign=\"left\">Union members</th>\n",
1603 " <th colspan=\"2\" halign=\"left\">Trade union density</th>\n",
1604 " <th colspan=\"2\" halign=\"left\">Employees</th>\n",
1605 " </tr>\n",
1606 " <tr>\n",
1607 " <th>Measure</th>\n",
1608 " <th>Thousands</th>\n",
1609 " <th>Percentage</th>\n",
1610 " <th>Thousands</th>\n",
1611 " <th>Percentage</th>\n",
1612 " <th>Thousands</th>\n",
1613 " <th>Percentage</th>\n",
1614 " <th>Thousands</th>\n",
1615 " <th>Percentage</th>\n",
1616 " <th>Thousands</th>\n",
1617 " <th>Percentage</th>\n",
1618 " <th>...</th>\n",
1619 " <th>Thousands</th>\n",
1620 " <th>Percentage</th>\n",
1621 " <th>Thousands</th>\n",
1622 " <th>Percentage</th>\n",
1623 " <th>Thousands</th>\n",
1624 " <th>Percentage</th>\n",
1625 " <th>Thousands</th>\n",
1626 " <th>Percentage</th>\n",
1627 " <th>Thousands</th>\n",
1628 " <th>Percentage</th>\n",
1629 " </tr>\n",
1630 " <tr>\n",
1631 " <th>Year</th>\n",
1632 " <th></th>\n",
1633 " <th></th>\n",
1634 " <th></th>\n",
1635 " <th></th>\n",
1636 " <th></th>\n",
1637 " <th></th>\n",
1638 " <th></th>\n",
1639 " <th></th>\n",
1640 " <th></th>\n",
1641 " <th></th>\n",
1642 " <th></th>\n",
1643 " <th></th>\n",
1644 " <th></th>\n",
1645 " <th></th>\n",
1646 " <th></th>\n",
1647 " <th></th>\n",
1648 " <th></th>\n",
1649 " <th></th>\n",
1650 " <th></th>\n",
1651 " <th></th>\n",
1652 " <th></th>\n",
1653 " </tr>\n",
1654 " </thead>\n",
1655 " <tbody>\n",
1656 " <tr>\n",
1657 " <th>2010-01-01</th>\n",
1658 " <td>NaN</td>\n",
1659 " <td>NaN</td>\n",
1660 " <td>NaN</td>\n",
1661 " <td>NaN</td>\n",
1662 " <td>NaN</td>\n",
1663 " <td>NaN</td>\n",
1664 " <td>12417.527</td>\n",
1665 " <td>NaN</td>\n",
1666 " <td>NaN</td>\n",
1667 " <td>28.871256</td>\n",
1668 " <td>...</td>\n",
1669 " <td>NaN</td>\n",
1670 " <td>17.448617</td>\n",
1671 " <td>97406.0</td>\n",
1672 " <td>NaN</td>\n",
1673 " <td>NaN</td>\n",
1674 " <td>NaN</td>\n",
1675 " <td>NaN</td>\n",
1676 " <td>NaN</td>\n",
1677 " <td>NaN</td>\n",
1678 " <td>NaN</td>\n",
1679 " </tr>\n",
1680 " <tr>\n",
1681 " <th>2011-01-01</th>\n",
1682 " <td>NaN</td>\n",
1683 " <td>NaN</td>\n",
1684 " <td>NaN</td>\n",
1685 " <td>NaN</td>\n",
1686 " <td>NaN</td>\n",
1687 " <td>NaN</td>\n",
1688 " <td>12271.909</td>\n",
1689 " <td>NaN</td>\n",
1690 " <td>NaN</td>\n",
1691 " <td>27.589723</td>\n",
1692 " <td>...</td>\n",
1693 " <td>NaN</td>\n",
1694 " <td>16.516118</td>\n",
1695 " <td>102403.0</td>\n",
1696 " <td>NaN</td>\n",
1697 " <td>NaN</td>\n",
1698 " <td>NaN</td>\n",
1699 " <td>NaN</td>\n",
1700 " <td>NaN</td>\n",
1701 " <td>NaN</td>\n",
1702 " <td>NaN</td>\n",
1703 " </tr>\n",
1704 " <tr>\n",
1705 " <th>2012-01-01</th>\n",
1706 " <td>NaN</td>\n",
1707 " <td>NaN</td>\n",
1708 " <td>NaN</td>\n",
1709 " <td>NaN</td>\n",
1710 " <td>NaN</td>\n",
1711 " <td>NaN</td>\n",
1712 " <td>12227.073</td>\n",
1713 " <td>NaN</td>\n",
1714 " <td>NaN</td>\n",
1715 " <td>25.899329</td>\n",
1716 " <td>...</td>\n",
1717 " <td>NaN</td>\n",
1718 " <td>15.861734</td>\n",
1719 " <td>106924.0</td>\n",
1720 " <td>NaN</td>\n",
1721 " <td>NaN</td>\n",
1722 " <td>NaN</td>\n",
1723 " <td>NaN</td>\n",
1724 " <td>NaN</td>\n",
1725 " <td>NaN</td>\n",
1726 " <td>NaN</td>\n",
1727 " </tr>\n",
1728 " <tr>\n",
1729 " <th>2013-01-01</th>\n",
1730 " <td>NaN</td>\n",
1731 " <td>NaN</td>\n",
1732 " <td>NaN</td>\n",
1733 " <td>NaN</td>\n",
1734 " <td>NaN</td>\n",
1735 " <td>NaN</td>\n",
1736 " <td>12396.592</td>\n",
1737 " <td>NaN</td>\n",
1738 " <td>NaN</td>\n",
1739 " <td>24.489514</td>\n",
1740 " <td>...</td>\n",
1741 " <td>NaN</td>\n",
1742 " <td>15.510448</td>\n",
1743 " <td>107102.0</td>\n",
1744 " <td>NaN</td>\n",
1745 " <td>NaN</td>\n",
1746 " <td>NaN</td>\n",
1747 " <td>NaN</td>\n",
1748 " <td>NaN</td>\n",
1749 " <td>NaN</td>\n",
1750 " <td>NaN</td>\n",
1751 " </tr>\n",
1752 " <tr>\n",
1753 " <th>2014-01-01</th>\n",
1754 " <td>NaN</td>\n",
1755 " <td>NaN</td>\n",
1756 " <td>NaN</td>\n",
1757 " <td>NaN</td>\n",
1758 " <td>NaN</td>\n",
1759 " <td>NaN</td>\n",
1760 " <td>7661.568</td>\n",
1761 " <td>NaN</td>\n",
1762 " <td>NaN</td>\n",
1763 " <td>32.164433</td>\n",
1764 " <td>...</td>\n",
1765 " <td>NaN</td>\n",
1766 " <td>NaN</td>\n",
1767 " <td>NaN</td>\n",
1768 " <td>NaN</td>\n",
1769 " <td>17049.0</td>\n",
1770 " <td>NaN</td>\n",
1771 " <td>NaN</td>\n",
1772 " <td>30.897064</td>\n",
1773 " <td>55180.0</td>\n",
1774 " <td>NaN</td>\n",
1775 " </tr>\n",
1776 " <tr>\n",
1777 " <th>2015-01-01</th>\n",
1778 " <td>NaN</td>\n",
1779 " <td>NaN</td>\n",
1780 " <td>NaN</td>\n",
1781 " <td>NaN</td>\n",
1782 " <td>NaN</td>\n",
1783 " <td>NaN</td>\n",
1784 " <td>8359.876</td>\n",
1785 " <td>NaN</td>\n",
1786 " <td>NaN</td>\n",
1787 " <td>34.516416</td>\n",
1788 " <td>...</td>\n",
1789 " <td>NaN</td>\n",
1790 " <td>NaN</td>\n",
1791 " <td>NaN</td>\n",
1792 " <td>NaN</td>\n",
1793 " <td>16303.0</td>\n",
1794 " <td>NaN</td>\n",
1795 " <td>NaN</td>\n",
1796 " <td>29.518378</td>\n",
1797 " <td>55230.0</td>\n",
1798 " <td>NaN</td>\n",
1799 " </tr>\n",
1800 " </tbody>\n",
1801 "</table>\n",
1802 "<p>6 rows × 24 columns</p>\n",
1803 "</div>"
1804 ],
1805 "text/plain": [
1806 "Country Japan \\\n",
1807 "Frequency Annual \n",
1808 "Source Survey data \n",
1809 "Series Union members Trade union density Employees \n",
1810 "Measure Thousands Percentage Thousands Percentage Thousands \n",
1811 "Year \n",
1812 "2010-01-01 NaN NaN NaN NaN NaN \n",
1813 "2011-01-01 NaN NaN NaN NaN NaN \n",
1814 "2012-01-01 NaN NaN NaN NaN NaN \n",
1815 "2013-01-01 NaN NaN NaN NaN NaN \n",
1816 "2014-01-01 NaN NaN NaN NaN NaN \n",
1817 "2015-01-01 NaN NaN NaN NaN NaN \n",
1818 "\n",
1819 "Country \\\n",
1820 "Frequency \n",
1821 "Source Administrative data \n",
1822 "Series Union members Trade union density \n",
1823 "Measure Percentage Thousands Percentage Thousands \n",
1824 "Year \n",
1825 "2010-01-01 NaN 12417.527 NaN NaN \n",
1826 "2011-01-01 NaN 12271.909 NaN NaN \n",
1827 "2012-01-01 NaN 12227.073 NaN NaN \n",
1828 "2013-01-01 NaN 12396.592 NaN NaN \n",
1829 "2014-01-01 NaN 7661.568 NaN NaN \n",
1830 "2015-01-01 NaN 8359.876 NaN NaN \n",
1831 "\n",
1832 "Country ... United States \\\n",
1833 "Frequency ... Annual \n",
1834 "Source ... Survey data \n",
1835 "Series ... Trade union density Employees \n",
1836 "Measure Percentage ... Thousands Percentage Thousands \n",
1837 "Year ... \n",
1838 "2010-01-01 28.871256 ... NaN 17.448617 97406.0 \n",
1839 "2011-01-01 27.589723 ... NaN 16.516118 102403.0 \n",
1840 "2012-01-01 25.899329 ... NaN 15.861734 106924.0 \n",
1841 "2013-01-01 24.489514 ... NaN 15.510448 107102.0 \n",
1842 "2014-01-01 32.164433 ... NaN NaN NaN \n",
1843 "2015-01-01 34.516416 ... NaN NaN NaN \n",
1844 "\n",
1845 "Country \\\n",
1846 "Frequency \n",
1847 "Source Administrative data \n",
1848 "Series Union members Trade union density \n",
1849 "Measure Percentage Thousands Percentage Thousands \n",
1850 "Year \n",
1851 "2010-01-01 NaN NaN NaN NaN \n",
1852 "2011-01-01 NaN NaN NaN NaN \n",
1853 "2012-01-01 NaN NaN NaN NaN \n",
1854 "2013-01-01 NaN NaN NaN NaN \n",
1855 "2014-01-01 NaN 17049.0 NaN NaN \n",
1856 "2015-01-01 NaN 16303.0 NaN NaN \n",
1857 "\n",
1858 "Country \n",
1859 "Frequency \n",
1860 "Source \n",
1861 "Series Employees \n",
1862 "Measure Percentage Thousands Percentage \n",
1863 "Year \n",
1864 "2010-01-01 NaN NaN NaN \n",
1865 "2011-01-01 NaN NaN NaN \n",
1866 "2012-01-01 NaN NaN NaN \n",
1867 "2013-01-01 NaN NaN NaN \n",
1868 "2014-01-01 30.897064 55180.0 NaN \n",
1869 "2015-01-01 29.518378 55230.0 NaN \n",
1870 "\n",
1871 "[6 rows x 24 columns]"
1872 ]
1873 },
1874 "execution_count": 20,
1875 "metadata": {},
1876 "output_type": "execute_result"
1877 }
1878 ],
1879 "source": [
1880 "df = web.DataReader('TUD', 'oecd', end='2015')\n",
1881 "df[['Japan', 'United States']]"
1882 ]
1883 },
1884 {
1885 "cell_type": "markdown",
1886 "metadata": {},
1887 "source": [
1888 "### EuroStat"
1889 ]
1890 },
1891 {
1892 "cell_type": "code",
1893 "execution_count": 21,
1894 "metadata": {},
1895 "outputs": [],
1896 "source": [
1897 "df = web.DataReader('tran_sf_railac', 'eurostat')"
1898 ]
1899 },
1900 {
1901 "cell_type": "code",
1902 "execution_count": 22,
1903 "metadata": {},
1904 "outputs": [
1905 {
1906 "data": {
1907 "text/html": [
1908 "<div>\n",
1909 "<style scoped>\n",
1910 " .dataframe tbody tr th:only-of-type {\n",
1911 " vertical-align: middle;\n",
1912 " }\n",
1913 "\n",
1914 " .dataframe tbody tr th {\n",
1915 " vertical-align: top;\n",
1916 " }\n",
1917 "\n",
1918 " .dataframe thead tr th {\n",
1919 " text-align: left;\n",
1920 " }\n",
1921 "\n",
1922 " .dataframe thead tr:last-of-type th {\n",
1923 " text-align: right;\n",
1924 " }\n",
1925 "</style>\n",
1926 "<table border=\"1\" class=\"dataframe\">\n",
1927 " <thead>\n",
1928 " <tr>\n",
1929 " <th>ACCIDENT</th>\n",
1930 " <th colspan=\"10\" halign=\"left\">Collisions of trains, including collisions with obstacles within the clearance gauge</th>\n",
1931 " <th>...</th>\n",
1932 " <th colspan=\"10\" halign=\"left\">Unknown</th>\n",
1933 " </tr>\n",
1934 " <tr>\n",
1935 " <th>UNIT</th>\n",
1936 " <th colspan=\"10\" halign=\"left\">Number</th>\n",
1937 " <th>...</th>\n",
1938 " <th colspan=\"10\" halign=\"left\">Number</th>\n",
1939 " </tr>\n",
1940 " <tr>\n",
1941 " <th>GEO</th>\n",
1942 " <th>Austria</th>\n",
1943 " <th>Belgium</th>\n",
1944 " <th>Bulgaria</th>\n",
1945 " <th>Switzerland</th>\n",
1946 " <th>Channel Tunnel</th>\n",
1947 " <th>Czechia</th>\n",
1948 " <th>Germany (until 1990 former territory of the FRG)</th>\n",
1949 " <th>Denmark</th>\n",
1950 " <th>Estonia</th>\n",
1951 " <th>Greece</th>\n",
1952 " <th>...</th>\n",
1953 " <th>Netherlands</th>\n",
1954 " <th>Norway</th>\n",
1955 " <th>Poland</th>\n",
1956 " <th>Portugal</th>\n",
1957 " <th>Romania</th>\n",
1958 " <th>Sweden</th>\n",
1959 " <th>Slovenia</th>\n",
1960 " <th>Slovakia</th>\n",
1961 " <th>Turkey</th>\n",
1962 " <th>United Kingdom</th>\n",
1963 " </tr>\n",
1964 " <tr>\n",
1965 " <th>FREQ</th>\n",
1966 " <th>Annual</th>\n",
1967 " <th>Annual</th>\n",
1968 " <th>Annual</th>\n",
1969 " <th>Annual</th>\n",
1970 " <th>Annual</th>\n",
1971 " <th>Annual</th>\n",
1972 " <th>Annual</th>\n",
1973 " <th>Annual</th>\n",
1974 " <th>Annual</th>\n",
1975 " <th>Annual</th>\n",
1976 " <th>...</th>\n",
1977 " <th>Annual</th>\n",
1978 " <th>Annual</th>\n",
1979 " <th>Annual</th>\n",
1980 " <th>Annual</th>\n",
1981 " <th>Annual</th>\n",
1982 " <th>Annual</th>\n",
1983 " <th>Annual</th>\n",
1984 " <th>Annual</th>\n",
1985 " <th>Annual</th>\n",
1986 " <th>Annual</th>\n",
1987 " </tr>\n",
1988 " <tr>\n",
1989 " <th>TIME_PERIOD</th>\n",
1990 " <th></th>\n",
1991 " <th></th>\n",
1992 " <th></th>\n",
1993 " <th></th>\n",
1994 " <th></th>\n",
1995 " <th></th>\n",
1996 " <th></th>\n",
1997 " <th></th>\n",
1998 " <th></th>\n",
1999 " <th></th>\n",
2000 " <th></th>\n",
2001 " <th></th>\n",
2002 " <th></th>\n",
2003 " <th></th>\n",
2004 " <th></th>\n",
2005 " <th></th>\n",
2006 " <th></th>\n",
2007 " <th></th>\n",
2008 " <th></th>\n",
2009 " <th></th>\n",
2010 " <th></th>\n",
2011 " </tr>\n",
2012 " </thead>\n",
2013 " <tbody>\n",
2014 " <tr>\n",
2015 " <th>2010-01-01</th>\n",
2016 " <td>3.0</td>\n",
2017 " <td>5.0</td>\n",
2018 " <td>2.0</td>\n",
2019 " <td>5.0</td>\n",
2020 " <td>0.0</td>\n",
2021 " <td>3.0</td>\n",
2022 " <td>13.0</td>\n",
2023 " <td>0.0</td>\n",
2024 " <td>1.0</td>\n",
2025 " <td>4.0</td>\n",
2026 " <td>...</td>\n",
2027 " <td>NaN</td>\n",
2028 " <td>NaN</td>\n",
2029 " <td>NaN</td>\n",
2030 " <td>NaN</td>\n",
2031 " <td>NaN</td>\n",
2032 " <td>NaN</td>\n",
2033 " <td>NaN</td>\n",
2034 " <td>NaN</td>\n",
2035 " <td>0.0</td>\n",
2036 " <td>NaN</td>\n",
2037 " </tr>\n",
2038 " <tr>\n",
2039 " <th>2011-01-01</th>\n",
2040 " <td>2.0</td>\n",
2041 " <td>0.0</td>\n",
2042 " <td>0.0</td>\n",
2043 " <td>4.0</td>\n",
2044 " <td>0.0</td>\n",
2045 " <td>6.0</td>\n",
2046 " <td>18.0</td>\n",
2047 " <td>1.0</td>\n",
2048 " <td>0.0</td>\n",
2049 " <td>1.0</td>\n",
2050 " <td>...</td>\n",
2051 " <td>NaN</td>\n",
2052 " <td>NaN</td>\n",
2053 " <td>NaN</td>\n",
2054 " <td>NaN</td>\n",
2055 " <td>NaN</td>\n",
2056 " <td>NaN</td>\n",
2057 " <td>NaN</td>\n",
2058 " <td>NaN</td>\n",
2059 " <td>0.0</td>\n",
2060 " <td>NaN</td>\n",
2061 " </tr>\n",
2062 " <tr>\n",
2063 " <th>2012-01-01</th>\n",
2064 " <td>1.0</td>\n",
2065 " <td>3.0</td>\n",
2066 " <td>3.0</td>\n",
2067 " <td>4.0</td>\n",
2068 " <td>0.0</td>\n",
2069 " <td>6.0</td>\n",
2070 " <td>23.0</td>\n",
2071 " <td>1.0</td>\n",
2072 " <td>3.0</td>\n",
2073 " <td>2.0</td>\n",
2074 " <td>...</td>\n",
2075 " <td>NaN</td>\n",
2076 " <td>NaN</td>\n",
2077 " <td>NaN</td>\n",
2078 " <td>NaN</td>\n",
2079 " <td>NaN</td>\n",
2080 " <td>NaN</td>\n",
2081 " <td>NaN</td>\n",
2082 " <td>NaN</td>\n",
2083 " <td>0.0</td>\n",
2084 " <td>NaN</td>\n",
2085 " </tr>\n",
2086 " <tr>\n",
2087 " <th>2013-01-01</th>\n",
2088 " <td>4.0</td>\n",
2089 " <td>1.0</td>\n",
2090 " <td>2.0</td>\n",
2091 " <td>6.0</td>\n",
2092 " <td>0.0</td>\n",
2093 " <td>5.0</td>\n",
2094 " <td>29.0</td>\n",
2095 " <td>0.0</td>\n",
2096 " <td>0.0</td>\n",
2097 " <td>2.0</td>\n",
2098 " <td>...</td>\n",
2099 " <td>NaN</td>\n",
2100 " <td>NaN</td>\n",
2101 " <td>NaN</td>\n",
2102 " <td>NaN</td>\n",
2103 " <td>NaN</td>\n",
2104 " <td>NaN</td>\n",
2105 " <td>NaN</td>\n",
2106 " <td>NaN</td>\n",
2107 " <td>0.0</td>\n",
2108 " <td>NaN</td>\n",
2109 " </tr>\n",
2110 " <tr>\n",
2111 " <th>2014-01-01</th>\n",
2112 " <td>1.0</td>\n",
2113 " <td>3.0</td>\n",
2114 " <td>4.0</td>\n",
2115 " <td>0.0</td>\n",
2116 " <td>0.0</td>\n",
2117 " <td>13.0</td>\n",
2118 " <td>32.0</td>\n",
2119 " <td>0.0</td>\n",
2120 " <td>0.0</td>\n",
2121 " <td>1.0</td>\n",
2122 " <td>...</td>\n",
2123 " <td>NaN</td>\n",
2124 " <td>NaN</td>\n",
2125 " <td>NaN</td>\n",
2126 " <td>NaN</td>\n",
2127 " <td>NaN</td>\n",
2128 " <td>NaN</td>\n",
2129 " <td>NaN</td>\n",
2130 " <td>NaN</td>\n",
2131 " <td>0.0</td>\n",
2132 " <td>NaN</td>\n",
2133 " </tr>\n",
2134 " </tbody>\n",
2135 "</table>\n",
2136 "<p>5 rows × 264 columns</p>\n",
2137 "</div>"
2138 ],
2139 "text/plain": [
2140 "ACCIDENT Collisions of trains, including collisions with obstacles within the clearance gauge \\\n",
2141 "UNIT Number \n",
2142 "GEO Austria \n",
2143 "FREQ Annual \n",
2144 "TIME_PERIOD \n",
2145 "2010-01-01 3.0 \n",
2146 "2011-01-01 2.0 \n",
2147 "2012-01-01 1.0 \n",
2148 "2013-01-01 4.0 \n",
2149 "2014-01-01 1.0 \n",
2150 "\n",
2151 "ACCIDENT \\\n",
2152 "UNIT \n",
2153 "GEO Belgium Bulgaria Switzerland Channel Tunnel Czechia \n",
2154 "FREQ Annual Annual Annual Annual Annual \n",
2155 "TIME_PERIOD \n",
2156 "2010-01-01 5.0 2.0 5.0 0.0 3.0 \n",
2157 "2011-01-01 0.0 0.0 4.0 0.0 6.0 \n",
2158 "2012-01-01 3.0 3.0 4.0 0.0 6.0 \n",
2159 "2013-01-01 1.0 2.0 6.0 0.0 5.0 \n",
2160 "2014-01-01 3.0 4.0 0.0 0.0 13.0 \n",
2161 "\n",
2162 "ACCIDENT \\\n",
2163 "UNIT \n",
2164 "GEO Germany (until 1990 former territory of the FRG) Denmark Estonia \n",
2165 "FREQ Annual Annual Annual \n",
2166 "TIME_PERIOD \n",
2167 "2010-01-01 13.0 0.0 1.0 \n",
2168 "2011-01-01 18.0 1.0 0.0 \n",
2169 "2012-01-01 23.0 1.0 3.0 \n",
2170 "2013-01-01 29.0 0.0 0.0 \n",
2171 "2014-01-01 32.0 0.0 0.0 \n",
2172 "\n",
2173 "ACCIDENT ... Unknown \\\n",
2174 "UNIT ... Number \n",
2175 "GEO Greece ... Netherlands Norway Poland Portugal Romania Sweden \n",
2176 "FREQ Annual ... Annual Annual Annual Annual Annual Annual \n",
2177 "TIME_PERIOD ... \n",
2178 "2010-01-01 4.0 ... NaN NaN NaN NaN NaN NaN \n",
2179 "2011-01-01 1.0 ... NaN NaN NaN NaN NaN NaN \n",
2180 "2012-01-01 2.0 ... NaN NaN NaN NaN NaN NaN \n",
2181 "2013-01-01 2.0 ... NaN NaN NaN NaN NaN NaN \n",
2182 "2014-01-01 1.0 ... NaN NaN NaN NaN NaN NaN \n",
2183 "\n",
2184 "ACCIDENT \n",
2185 "UNIT \n",
2186 "GEO Slovenia Slovakia Turkey United Kingdom \n",
2187 "FREQ Annual Annual Annual Annual \n",
2188 "TIME_PERIOD \n",
2189 "2010-01-01 NaN NaN 0.0 NaN \n",
2190 "2011-01-01 NaN NaN 0.0 NaN \n",
2191 "2012-01-01 NaN NaN 0.0 NaN \n",
2192 "2013-01-01 NaN NaN 0.0 NaN \n",
2193 "2014-01-01 NaN NaN 0.0 NaN \n",
2194 "\n",
2195 "[5 rows x 264 columns]"
2196 ]
2197 },
2198 "execution_count": 22,
2199 "metadata": {},
2200 "output_type": "execute_result"
2201 }
2202 ],
2203 "source": [
2204 "df.head()"
2205 ]
2206 },
2207 {
2208 "cell_type": "markdown",
2209 "metadata": {},
2210 "source": [
2211 "\n",
2212 "\n",
2213 "### Stooq"
2214 ]
2215 },
2216 {
2217 "cell_type": "markdown",
2218 "metadata": {},
2219 "source": [
2220 "Google finance stopped providing common index data download. The Stooq site had this data for download for a while but is currently broken, awaiting release of [fix](https://github.com/pydata/pandas-datareader/issues/594)"
2221 ]
2222 },
2223 {
2224 "cell_type": "code",
2225 "execution_count": 13,
2226 "metadata": {
2227 "ExecuteTime": {
2228 "end_time": "2018-12-25T19:17:11.052846Z",
2229 "start_time": "2018-12-25T19:17:08.631634Z"
2230 }
2231 },
2232 "outputs": [
2233 {
2234 "data": {
2235 "text/plain": [
2236 "46"
2237 ]
2238 },
2239 "execution_count": 13,
2240 "metadata": {},
2241 "output_type": "execute_result"
2242 }
2243 ],
2244 "source": [
2245 "index_url = 'https://stooq.com/t/'\n",
2246 "ix = pd.read_html(index_url)\n",
2247 "len(ix)"
2248 ]
2249 },
2250 {
2251 "cell_type": "code",
2252 "execution_count": 14,
2253 "metadata": {
2254 "ExecuteTime": {
2255 "end_time": "2018-12-25T19:17:11.497938Z",
2256 "start_time": "2018-12-25T19:17:11.054004Z"
2257 }
2258 },
2259 "outputs": [
2260 {
2261 "name": "stdout",
2262 "output_type": "stream",
2263 "text": [
2264 "<class 'pandas.core.frame.DataFrame'>\n",
2265 "Index: 0 entries\n",
2266 "Empty DataFrame"
2267 ]
2268 }
2269 ],
2270 "source": [
2271 "f = web.DataReader('^SPX', 'stooq', start='20000101')\n",
2272 "f.info()"
2273 ]
2274 },
2275 {
2276 "cell_type": "code",
2277 "execution_count": 15,
2278 "metadata": {
2279 "ExecuteTime": {
2280 "end_time": "2018-12-25T19:17:11.521392Z",
2281 "start_time": "2018-12-25T19:17:11.503799Z"
2282 }
2283 },
2284 "outputs": [
2285 {
2286 "data": {
2287 "text/html": [
2288 "<div>\n",
2289 "<style scoped>\n",
2290 " .dataframe tbody tr th:only-of-type {\n",
2291 " vertical-align: middle;\n",
2292 " }\n",
2293 "\n",
2294 " .dataframe tbody tr th {\n",
2295 " vertical-align: top;\n",
2296 " }\n",
2297 "\n",
2298 " .dataframe thead th {\n",
2299 " text-align: right;\n",
2300 " }\n",
2301 "</style>\n",
2302 "<table border=\"1\" class=\"dataframe\">\n",
2303 " <thead>\n",
2304 " <tr style=\"text-align: right;\">\n",
2305 " <th></th>\n",
2306 " </tr>\n",
2307 " <tr>\n",
2308 " <th>No data</th>\n",
2309 " </tr>\n",
2310 " </thead>\n",
2311 " <tbody>\n",
2312 " </tbody>\n",
2313 "</table>\n",
2314 "</div>"
2315 ],
2316 "text/plain": [
2317 "Empty DataFrame\n",
2318 "Columns: []\n",
2319 "Index: []"
2320 ]
2321 },
2322 "execution_count": 15,
2323 "metadata": {},
2324 "output_type": "execute_result"
2325 }
2326 ],
2327 "source": [
2328 "f.head()"
2329 ]
2330 },
2331 {
2332 "cell_type": "markdown",
2333 "metadata": {},
2334 "source": [
2335 "### NASDAQ Symbols"
2336 ]
2337 },
2338 {
2339 "cell_type": "code",
2340 "execution_count": 23,
2341 "metadata": {
2342 "ExecuteTime": {
2343 "end_time": "2018-12-25T19:20:56.673795Z",
2344 "start_time": "2018-12-25T19:20:55.625622Z"
2345 }
2346 },
2347 "outputs": [
2348 {
2349 "name": "stdout",
2350 "output_type": "stream",
2351 "text": [
2352 "<class 'pandas.core.frame.DataFrame'>\n",
2353 "Index: 8701 entries, A to ZYXI\n",
2354 "Data columns (total 11 columns):\n",
2355 "Nasdaq Traded 8701 non-null bool\n",
2356 "Security Name 8701 non-null object\n",
2357 "Listing Exchange 8701 non-null category\n",
2358 "Market Category 8701 non-null object\n",
2359 "ETF 8701 non-null bool\n",
2360 "Round Lot Size 8701 non-null float64\n",
2361 "Test Issue 8701 non-null bool\n",
2362 "Financial Status 3411 non-null category\n",
2363 "CQS Symbol 5290 non-null object\n",
2364 "NASDAQ Symbol 8701 non-null object\n",
2365 "NextShares 8701 non-null bool\n",
2366 "dtypes: bool(4), category(2), float64(1), object(4)\n",
2367 "memory usage: 459.2+ KB\n"
2368 ]
2369 }
2370 ],
2371 "source": [
2372 "from pandas_datareader.nasdaq_trader import get_nasdaq_symbols\n",
2373 "symbols = get_nasdaq_symbols()\n",
2374 "symbols.info()"
2375 ]
2376 },
2377 {
2378 "cell_type": "code",
2379 "execution_count": 24,
2380 "metadata": {
2381 "ExecuteTime": {
2382 "end_time": "2018-12-25T19:20:59.748392Z",
2383 "start_time": "2018-12-25T19:20:58.646206Z"
2384 }
2385 },
2386 "outputs": [
2387 {
2388 "data": {
2389 "text/plain": [
2390 "4"
2391 ]
2392 },
2393 "execution_count": 24,
2394 "metadata": {},
2395 "output_type": "execute_result"
2396 }
2397 ],
2398 "source": [
2399 "url = 'https://www.nasdaq.com/screening/companies-by-industry.aspx?exchange=NASDAQ'\n",
2400 "res = pd.read_html(url)\n",
2401 "len(res)"
2402 ]
2403 },
2404 {
2405 "cell_type": "code",
2406 "execution_count": 25,
2407 "metadata": {
2408 "ExecuteTime": {
2409 "end_time": "2018-12-25T19:21:02.385427Z",
2410 "start_time": "2018-12-25T19:21:02.352487Z"
2411 },
2412 "scrolled": true
2413 },
2414 "outputs": [
2415 {
2416 "name": "stdout",
2417 "output_type": "stream",
2418 "text": [
2419 "<class 'pandas.core.frame.DataFrame'>\n",
2420 "RangeIndex: 1 entries, 0 to 0\n",
2421 "Data columns (total 2 columns):\n",
2422 "0 1 non-null object\n",
2423 "1 1 non-null object\n",
2424 "dtypes: object(2)\n",
2425 "memory usage: 96.0+ bytes\n",
2426 "None\n",
2427 "<class 'pandas.core.frame.DataFrame'>\n",
2428 "RangeIndex: 101 entries, 0 to 100\n",
2429 "Data columns (total 6 columns):\n",
2430 "Name 101 non-null object\n",
2431 "Symbol 51 non-null object\n",
2432 "Market Cap 47 non-null object\n",
2433 "Country 51 non-null object\n",
2434 "IPO Year 28 non-null object\n",
2435 "Subsector 51 non-null object\n",
2436 "dtypes: object(6)\n",
2437 "memory usage: 4.8+ KB\n",
2438 "None\n",
2439 "<class 'pandas.core.frame.DataFrame'>\n",
2440 "RangeIndex: 1 entries, 0 to 0\n",
2441 "Data columns (total 1 columns):\n",
2442 "0 1 non-null object\n",
2443 "dtypes: object(1)\n",
2444 "memory usage: 88.0+ bytes\n",
2445 "None\n",
2446 "<class 'pandas.core.frame.DataFrame'>\n",
2447 "RangeIndex: 1 entries, 0 to 0\n",
2448 "Data columns (total 1 columns):\n",
2449 "0 1 non-null object\n",
2450 "dtypes: object(1)\n",
2451 "memory usage: 88.0+ bytes\n",
2452 "None\n"
2453 ]
2454 }
2455 ],
2456 "source": [
2457 "for r in res:\n",
2458 " print(r.info())"
2459 ]
2460 },
2461 {
2462 "cell_type": "markdown",
2463 "metadata": {},
2464 "source": [
2465 "### Tiingo"
2466 ]
2467 },
2468 {
2469 "cell_type": "markdown",
2470 "metadata": {},
2471 "source": [
2472 "Requires [signing up](https://api.tiingo.com/) and storing API key in environment"
2473 ]
2474 },
2475 {
2476 "cell_type": "code",
2477 "execution_count": 26,
2478 "metadata": {
2479 "ExecuteTime": {
2480 "end_time": "2018-12-25T19:21:26.396222Z",
2481 "start_time": "2018-12-25T19:21:26.150734Z"
2482 }
2483 },
2484 "outputs": [],
2485 "source": [
2486 "df = web.get_data_tiingo('GOOG', api_key=os.getenv('TIINGO_API_KEY'))"
2487 ]
2488 },
2489 {
2490 "cell_type": "code",
2491 "execution_count": 27,
2492 "metadata": {
2493 "ExecuteTime": {
2494 "end_time": "2018-12-25T19:21:27.003018Z",
2495 "start_time": "2018-12-25T19:21:26.986133Z"
2496 }
2497 },
2498 "outputs": [
2499 {
2500 "name": "stdout",
2501 "output_type": "stream",
2502 "text": [
2503 "<class 'pandas.core.frame.DataFrame'>\n",
2504 "MultiIndex: 1244 entries, (GOOG, 2014-03-27 00:00:00) to (GOOG, 2019-03-06 00:00:00)\n",
2505 "Data columns (total 12 columns):\n",
2506 "adjClose 1244 non-null float64\n",
2507 "adjHigh 1244 non-null float64\n",
2508 "adjLow 1244 non-null float64\n",
2509 "adjOpen 1244 non-null float64\n",
2510 "adjVolume 1244 non-null int64\n",
2511 "close 1244 non-null float64\n",
2512 "divCash 1244 non-null float64\n",
2513 "high 1244 non-null float64\n",
2514 "low 1244 non-null float64\n",
2515 "open 1244 non-null float64\n",
2516 "splitFactor 1244 non-null float64\n",
2517 "volume 1244 non-null int64\n",
2518 "dtypes: float64(10), int64(2)\n",
2519 "memory usage: 130.1+ KB\n"
2520 ]
2521 }
2522 ],
2523 "source": [
2524 "df.info()"
2525 ]
2526 }
2527 ],
2528 "metadata": {
2529 "kernelspec": {
2530 "display_name": "Python 3",
2531 "language": "python",
2532 "name": "python3"
2533 },
2534 "language_info": {
2535 "codemirror_mode": {
2536 "name": "ipython",
2537 "version": 3
2538 },
2539 "file_extension": ".py",
2540 "mimetype": "text/x-python",
2541 "name": "python",
2542 "nbconvert_exporter": "python",
2543 "pygments_lexer": "ipython3",
2544 "version": "3.7.3"
2545 },
2546 "toc": {
2547 "base_numbering": 1,
2548 "nav_menu": {},
2549 "number_sections": true,
2550 "sideBar": true,
2551 "skip_h1_title": true,
2552 "title_cell": "Table of Contents",
2553 "title_sidebar": "Contents",
2554 "toc_cell": false,
2555 "toc_position": {},
2556 "toc_section_display": true,
2557 "toc_window_display": true
2558 }
2559 },
2560 "nbformat": 4,
2561 "nbformat_minor": 2
2562 }