ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
02_using_trained_vectors.ipynb
(28150B)
1 {
2 "cells": [
3 {
4 "cell_type": "markdown",
5 "metadata": {},
6 "source": [
7 "## Imports & Settings"
8 ]
9 },
10 {
11 "cell_type": "code",
12 "execution_count": 1,
13 "metadata": {
14 "ExecuteTime": {
15 "end_time": "2018-12-10T05:26:22.735641Z",
16 "start_time": "2018-12-10T05:26:22.732284Z"
17 }
18 },
19 "outputs": [],
20 "source": [
21 "from time import time\n",
22 "import warnings\n",
23 "from collections import Counter\n",
24 "from pathlib import Path\n",
25 "import pandas as pd\n",
26 "import numpy as np\n",
27 "\n",
28 "import matplotlib.pyplot as plt\n",
29 "\n",
30 "from gensim.models import Word2Vec, KeyedVectors\n",
31 "from gensim.scripts.glove2word2vec import glove2word2vec"
32 ]
33 },
34 {
35 "cell_type": "code",
36 "execution_count": 2,
37 "metadata": {
38 "ExecuteTime": {
39 "end_time": "2018-12-10T05:26:22.809782Z",
40 "start_time": "2018-12-10T05:26:22.807930Z"
41 }
42 },
43 "outputs": [],
44 "source": [
45 "warnings.filterwarnings('ignore')"
46 ]
47 },
48 {
49 "cell_type": "code",
50 "execution_count": 3,
51 "metadata": {
52 "ExecuteTime": {
53 "end_time": "2018-12-10T05:23:31.270587Z",
54 "start_time": "2018-12-10T05:23:31.261745Z"
55 }
56 },
57 "outputs": [],
58 "source": [
59 "analogies_path = Path('data', 'analogies', 'analogies-en.txt')"
60 ]
61 },
62 {
63 "cell_type": "markdown",
64 "metadata": {},
65 "source": [
66 "## Convert GloVE Vectors to gensim format"
67 ]
68 },
69 {
70 "cell_type": "markdown",
71 "metadata": {},
72 "source": [
73 "The various GloVE vectors are available [here](https://nlp.stanford.edu/projects/glove/). Download link for the [wikipedia](http://nlp.stanford.edu/data/glove.6B.zip) version. Unzip and store in `data/glove`."
74 ]
75 },
76 {
77 "cell_type": "markdown",
78 "metadata": {},
79 "source": [
80 "### WikiPedia"
81 ]
82 },
83 {
84 "cell_type": "code",
85 "execution_count": 4,
86 "metadata": {
87 "ExecuteTime": {
88 "end_time": "2018-12-10T06:21:41.349764Z",
89 "start_time": "2018-12-10T06:21:41.347458Z"
90 }
91 },
92 "outputs": [],
93 "source": [
94 "glove_path = Path('data/glove')\n",
95 "glove_wiki_file= glove_path / 'glove.6B.300d.txt'\n",
96 "word2vec_wiki_file = glove_path / 'glove.wiki.gensim.txt'"
97 ]
98 },
99 {
100 "cell_type": "code",
101 "execution_count": null,
102 "metadata": {
103 "ExecuteTime": {
104 "end_time": "2018-12-10T06:21:44.300116Z",
105 "start_time": "2018-12-10T06:21:41.533781Z"
106 }
107 },
108 "outputs": [],
109 "source": [
110 "glove2word2vec(glove_input_file=glove_wiki_file, word2vec_output_file=word2vec_wiki_file)"
111 ]
112 },
113 {
114 "cell_type": "markdown",
115 "metadata": {},
116 "source": [
117 "### Twitter Data"
118 ]
119 },
120 {
121 "cell_type": "code",
122 "execution_count": 18,
123 "metadata": {
124 "ExecuteTime": {
125 "end_time": "2018-12-10T05:44:52.104643Z",
126 "start_time": "2018-12-10T05:44:52.096912Z"
127 }
128 },
129 "outputs": [],
130 "source": [
131 "glove_twitter_file= glove_path / 'glove.twitter.27B.200d.txt'\n",
132 "word2vec_twitter_file = glove_path / 'glove.twitter.gensim.txt'"
133 ]
134 },
135 {
136 "cell_type": "code",
137 "execution_count": 19,
138 "metadata": {
139 "ExecuteTime": {
140 "end_time": "2018-12-10T05:45:02.864556Z",
141 "start_time": "2018-12-10T05:44:59.034198Z"
142 }
143 },
144 "outputs": [
145 {
146 "data": {
147 "text/plain": [
148 "(1193517, 200)"
149 ]
150 },
151 "execution_count": 19,
152 "metadata": {},
153 "output_type": "execute_result"
154 }
155 ],
156 "source": [
157 "glove2word2vec(glove_input_file=glove_twitter_file, word2vec_output_file=word2vec_twitter_file)"
158 ]
159 },
160 {
161 "cell_type": "markdown",
162 "metadata": {},
163 "source": [
164 "### Common Crawl"
165 ]
166 },
167 {
168 "cell_type": "code",
169 "execution_count": 26,
170 "metadata": {
171 "ExecuteTime": {
172 "end_time": "2018-12-10T05:59:20.729441Z",
173 "start_time": "2018-12-10T05:59:20.721920Z"
174 }
175 },
176 "outputs": [],
177 "source": [
178 "glove_crawl_file= glove_path / 'glove.840B.300d.txt'\n",
179 "word2vec_crawl_file = glove_path / 'glove.crawl.gensim.txt'"
180 ]
181 },
182 {
183 "cell_type": "code",
184 "execution_count": 27,
185 "metadata": {
186 "ExecuteTime": {
187 "end_time": "2018-12-10T05:59:40.404114Z",
188 "start_time": "2018-12-10T05:59:28.731439Z"
189 }
190 },
191 "outputs": [
192 {
193 "data": {
194 "text/plain": [
195 "(2196018, 300)"
196 ]
197 },
198 "execution_count": 27,
199 "metadata": {},
200 "output_type": "execute_result"
201 }
202 ],
203 "source": [
204 "glove2word2vec(glove_input_file=glove_crawl_file, word2vec_output_file=word2vec_crawl_file)"
205 ]
206 },
207 {
208 "cell_type": "markdown",
209 "metadata": {},
210 "source": [
211 "## Evaluate embeddings"
212 ]
213 },
214 {
215 "cell_type": "code",
216 "execution_count": 37,
217 "metadata": {
218 "ExecuteTime": {
219 "end_time": "2018-12-10T06:21:48.973717Z",
220 "start_time": "2018-12-10T06:21:48.965153Z"
221 }
222 },
223 "outputs": [],
224 "source": [
225 "def eval_analogies(file_name, vocab=30000):\n",
226 " model = KeyedVectors.load_word2vec_format(file_name, binary=False)\n",
227 " accuracy = model.wv.accuracy(analogies_path,\n",
228 " restrict_vocab=vocab,\n",
229 " case_insensitive=True)\n",
230 " return (pd.DataFrame([[c['section'],\n",
231 " len(c['correct']),\n",
232 " len(c['incorrect'])] for c in accuracy],\n",
233 " columns=['category', 'correct', 'incorrect'])\n",
234 " .assign(samples=lambda x: x.correct.add(x.incorrect))\n",
235 " .assign(average=lambda x: x.correct.div(x.samples))\n",
236 " .drop(['correct', 'incorrect'], axis=1))"
237 ]
238 },
239 {
240 "cell_type": "code",
241 "execution_count": 40,
242 "metadata": {
243 "ExecuteTime": {
244 "end_time": "2018-12-10T06:33:06.540388Z",
245 "start_time": "2018-12-10T06:28:21.484660Z"
246 }
247 },
248 "outputs": [],
249 "source": [
250 "result = eval_analogies(word2vec_twitter_file, vocab=100000)"
251 ]
252 },
253 {
254 "cell_type": "markdown",
255 "metadata": {},
256 "source": [
257 "### twitter result"
258 ]
259 },
260 {
261 "cell_type": "code",
262 "execution_count": 41,
263 "metadata": {
264 "ExecuteTime": {
265 "end_time": "2018-12-10T06:33:06.559308Z",
266 "start_time": "2018-12-10T06:33:06.553450Z"
267 }
268 },
269 "outputs": [
270 {
271 "data": {
272 "text/html": [
273 "<div>\n",
274 "<style scoped>\n",
275 " .dataframe tbody tr th:only-of-type {\n",
276 " vertical-align: middle;\n",
277 " }\n",
278 "\n",
279 " .dataframe tbody tr th {\n",
280 " vertical-align: top;\n",
281 " }\n",
282 "\n",
283 " .dataframe thead th {\n",
284 " text-align: right;\n",
285 " }\n",
286 "</style>\n",
287 "<table border=\"1\" class=\"dataframe\">\n",
288 " <thead>\n",
289 " <tr style=\"text-align: right;\">\n",
290 " <th></th>\n",
291 " <th>category</th>\n",
292 " <th>samples</th>\n",
293 " <th>average</th>\n",
294 " </tr>\n",
295 " </thead>\n",
296 " <tbody>\n",
297 " <tr>\n",
298 " <th>0</th>\n",
299 " <td>capital-common-countries</td>\n",
300 " <td>462</td>\n",
301 " <td>0.701299</td>\n",
302 " </tr>\n",
303 " <tr>\n",
304 " <th>1</th>\n",
305 " <td>capital-world</td>\n",
306 " <td>930</td>\n",
307 " <td>0.690323</td>\n",
308 " </tr>\n",
309 " <tr>\n",
310 " <th>2</th>\n",
311 " <td>city-in-state</td>\n",
312 " <td>3644</td>\n",
313 " <td>0.350714</td>\n",
314 " </tr>\n",
315 " <tr>\n",
316 " <th>3</th>\n",
317 " <td>currency</td>\n",
318 " <td>268</td>\n",
319 " <td>0.018657</td>\n",
320 " </tr>\n",
321 " <tr>\n",
322 " <th>4</th>\n",
323 " <td>family</td>\n",
324 " <td>342</td>\n",
325 " <td>0.824561</td>\n",
326 " </tr>\n",
327 " <tr>\n",
328 " <th>5</th>\n",
329 " <td>gram1-adjective-to-adverb</td>\n",
330 " <td>650</td>\n",
331 " <td>0.143077</td>\n",
332 " </tr>\n",
333 " <tr>\n",
334 " <th>6</th>\n",
335 " <td>gram2-opposite</td>\n",
336 " <td>342</td>\n",
337 " <td>0.365497</td>\n",
338 " </tr>\n",
339 " <tr>\n",
340 " <th>7</th>\n",
341 " <td>gram3-comparative</td>\n",
342 " <td>1260</td>\n",
343 " <td>0.757937</td>\n",
344 " </tr>\n",
345 " <tr>\n",
346 " <th>8</th>\n",
347 " <td>gram4-superlative</td>\n",
348 " <td>930</td>\n",
349 " <td>0.686022</td>\n",
350 " </tr>\n",
351 " <tr>\n",
352 " <th>9</th>\n",
353 " <td>gram5-present-participle</td>\n",
354 " <td>702</td>\n",
355 " <td>0.750712</td>\n",
356 " </tr>\n",
357 " <tr>\n",
358 " <th>10</th>\n",
359 " <td>gram6-nationality-adjective</td>\n",
360 " <td>870</td>\n",
361 " <td>0.750575</td>\n",
362 " </tr>\n",
363 " <tr>\n",
364 " <th>11</th>\n",
365 " <td>gram7-past-tense</td>\n",
366 " <td>1190</td>\n",
367 " <td>0.576471</td>\n",
368 " </tr>\n",
369 " <tr>\n",
370 " <th>12</th>\n",
371 " <td>gram8-plural</td>\n",
372 " <td>1122</td>\n",
373 " <td>0.811052</td>\n",
374 " </tr>\n",
375 " <tr>\n",
376 " <th>13</th>\n",
377 " <td>gram9-plural-verbs</td>\n",
378 " <td>600</td>\n",
379 " <td>0.655000</td>\n",
380 " </tr>\n",
381 " <tr>\n",
382 " <th>14</th>\n",
383 " <td>total</td>\n",
384 " <td>13312</td>\n",
385 " <td>0.564228</td>\n",
386 " </tr>\n",
387 " </tbody>\n",
388 "</table>\n",
389 "</div>"
390 ],
391 "text/plain": [
392 " category samples average\n",
393 "0 capital-common-countries 462 0.701299\n",
394 "1 capital-world 930 0.690323\n",
395 "2 city-in-state 3644 0.350714\n",
396 "3 currency 268 0.018657\n",
397 "4 family 342 0.824561\n",
398 "5 gram1-adjective-to-adverb 650 0.143077\n",
399 "6 gram2-opposite 342 0.365497\n",
400 "7 gram3-comparative 1260 0.757937\n",
401 "8 gram4-superlative 930 0.686022\n",
402 "9 gram5-present-participle 702 0.750712\n",
403 "10 gram6-nationality-adjective 870 0.750575\n",
404 "11 gram7-past-tense 1190 0.576471\n",
405 "12 gram8-plural 1122 0.811052\n",
406 "13 gram9-plural-verbs 600 0.655000\n",
407 "14 total 13312 0.564228"
408 ]
409 },
410 "execution_count": 41,
411 "metadata": {},
412 "output_type": "execute_result"
413 }
414 ],
415 "source": [
416 "result"
417 ]
418 },
419 {
420 "cell_type": "markdown",
421 "metadata": {},
422 "source": [
423 "### wiki result"
424 ]
425 },
426 {
427 "cell_type": "code",
428 "execution_count": 39,
429 "metadata": {
430 "ExecuteTime": {
431 "end_time": "2018-12-10T06:28:21.483713Z",
432 "start_time": "2018-12-10T06:28:21.477881Z"
433 }
434 },
435 "outputs": [
436 {
437 "data": {
438 "text/html": [
439 "<div>\n",
440 "<style scoped>\n",
441 " .dataframe tbody tr th:only-of-type {\n",
442 " vertical-align: middle;\n",
443 " }\n",
444 "\n",
445 " .dataframe tbody tr th {\n",
446 " vertical-align: top;\n",
447 " }\n",
448 "\n",
449 " .dataframe thead th {\n",
450 " text-align: right;\n",
451 " }\n",
452 "</style>\n",
453 "<table border=\"1\" class=\"dataframe\">\n",
454 " <thead>\n",
455 " <tr style=\"text-align: right;\">\n",
456 " <th></th>\n",
457 " <th>category</th>\n",
458 " <th>samples</th>\n",
459 " <th>average</th>\n",
460 " </tr>\n",
461 " </thead>\n",
462 " <tbody>\n",
463 " <tr>\n",
464 " <th>0</th>\n",
465 " <td>capital-common-countries</td>\n",
466 " <td>506</td>\n",
467 " <td>0.948617</td>\n",
468 " </tr>\n",
469 " <tr>\n",
470 " <th>1</th>\n",
471 " <td>capital-world</td>\n",
472 " <td>8372</td>\n",
473 " <td>0.964644</td>\n",
474 " </tr>\n",
475 " <tr>\n",
476 " <th>2</th>\n",
477 " <td>city-in-state</td>\n",
478 " <td>4242</td>\n",
479 " <td>0.599953</td>\n",
480 " </tr>\n",
481 " <tr>\n",
482 " <th>3</th>\n",
483 " <td>currency</td>\n",
484 " <td>752</td>\n",
485 " <td>0.174202</td>\n",
486 " </tr>\n",
487 " <tr>\n",
488 " <th>4</th>\n",
489 " <td>family</td>\n",
490 " <td>506</td>\n",
491 " <td>0.881423</td>\n",
492 " </tr>\n",
493 " <tr>\n",
494 " <th>5</th>\n",
495 " <td>gram1-adjective-to-adverb</td>\n",
496 " <td>992</td>\n",
497 " <td>0.225806</td>\n",
498 " </tr>\n",
499 " <tr>\n",
500 " <th>6</th>\n",
501 " <td>gram2-opposite</td>\n",
502 " <td>756</td>\n",
503 " <td>0.285714</td>\n",
504 " </tr>\n",
505 " <tr>\n",
506 " <th>7</th>\n",
507 " <td>gram3-comparative</td>\n",
508 " <td>1332</td>\n",
509 " <td>0.882132</td>\n",
510 " </tr>\n",
511 " <tr>\n",
512 " <th>8</th>\n",
513 " <td>gram4-superlative</td>\n",
514 " <td>1056</td>\n",
515 " <td>0.746212</td>\n",
516 " </tr>\n",
517 " <tr>\n",
518 " <th>9</th>\n",
519 " <td>gram5-present-participle</td>\n",
520 " <td>1056</td>\n",
521 " <td>0.699811</td>\n",
522 " </tr>\n",
523 " <tr>\n",
524 " <th>10</th>\n",
525 " <td>gram6-nationality-adjective</td>\n",
526 " <td>1640</td>\n",
527 " <td>0.925000</td>\n",
528 " </tr>\n",
529 " <tr>\n",
530 " <th>11</th>\n",
531 " <td>gram7-past-tense</td>\n",
532 " <td>1560</td>\n",
533 " <td>0.611538</td>\n",
534 " </tr>\n",
535 " <tr>\n",
536 " <th>12</th>\n",
537 " <td>gram8-plural</td>\n",
538 " <td>1332</td>\n",
539 " <td>0.780781</td>\n",
540 " </tr>\n",
541 " <tr>\n",
542 " <th>13</th>\n",
543 " <td>gram9-plural-verbs</td>\n",
544 " <td>870</td>\n",
545 " <td>0.585057</td>\n",
546 " </tr>\n",
547 " <tr>\n",
548 " <th>14</th>\n",
549 " <td>total</td>\n",
550 " <td>24972</td>\n",
551 " <td>0.754445</td>\n",
552 " </tr>\n",
553 " </tbody>\n",
554 "</table>\n",
555 "</div>"
556 ],
557 "text/plain": [
558 " category samples average\n",
559 "0 capital-common-countries 506 0.948617\n",
560 "1 capital-world 8372 0.964644\n",
561 "2 city-in-state 4242 0.599953\n",
562 "3 currency 752 0.174202\n",
563 "4 family 506 0.881423\n",
564 "5 gram1-adjective-to-adverb 992 0.225806\n",
565 "6 gram2-opposite 756 0.285714\n",
566 "7 gram3-comparative 1332 0.882132\n",
567 "8 gram4-superlative 1056 0.746212\n",
568 "9 gram5-present-participle 1056 0.699811\n",
569 "10 gram6-nationality-adjective 1640 0.925000\n",
570 "11 gram7-past-tense 1560 0.611538\n",
571 "12 gram8-plural 1332 0.780781\n",
572 "13 gram9-plural-verbs 870 0.585057\n",
573 "14 total 24972 0.754445"
574 ]
575 },
576 "execution_count": 39,
577 "metadata": {},
578 "output_type": "execute_result"
579 }
580 ],
581 "source": [
582 "result"
583 ]
584 },
585 {
586 "cell_type": "markdown",
587 "metadata": {},
588 "source": [
589 "### Common Crawl result"
590 ]
591 },
592 {
593 "cell_type": "code",
594 "execution_count": 33,
595 "metadata": {
596 "ExecuteTime": {
597 "end_time": "2018-12-10T06:20:56.028002Z",
598 "start_time": "2018-12-10T06:20:56.021706Z"
599 }
600 },
601 "outputs": [
602 {
603 "data": {
604 "text/html": [
605 "<div>\n",
606 "<style scoped>\n",
607 " .dataframe tbody tr th:only-of-type {\n",
608 " vertical-align: middle;\n",
609 " }\n",
610 "\n",
611 " .dataframe tbody tr th {\n",
612 " vertical-align: top;\n",
613 " }\n",
614 "\n",
615 " .dataframe thead th {\n",
616 " text-align: right;\n",
617 " }\n",
618 "</style>\n",
619 "<table border=\"1\" class=\"dataframe\">\n",
620 " <thead>\n",
621 " <tr style=\"text-align: right;\">\n",
622 " <th></th>\n",
623 " <th>category</th>\n",
624 " <th>samples</th>\n",
625 " <th>average</th>\n",
626 " </tr>\n",
627 " </thead>\n",
628 " <tbody>\n",
629 " <tr>\n",
630 " <th>0</th>\n",
631 " <td>capital-common-countries</td>\n",
632 " <td>506</td>\n",
633 " <td>0.946640</td>\n",
634 " </tr>\n",
635 " <tr>\n",
636 " <th>1</th>\n",
637 " <td>capital-world</td>\n",
638 " <td>4290</td>\n",
639 " <td>0.917483</td>\n",
640 " </tr>\n",
641 " <tr>\n",
642 " <th>2</th>\n",
643 " <td>city-in-state</td>\n",
644 " <td>4242</td>\n",
645 " <td>0.706742</td>\n",
646 " </tr>\n",
647 " <tr>\n",
648 " <th>3</th>\n",
649 " <td>currency</td>\n",
650 " <td>206</td>\n",
651 " <td>0.184466</td>\n",
652 " </tr>\n",
653 " <tr>\n",
654 " <th>4</th>\n",
655 " <td>family</td>\n",
656 " <td>420</td>\n",
657 " <td>0.978571</td>\n",
658 " </tr>\n",
659 " <tr>\n",
660 " <th>5</th>\n",
661 " <td>gram1-adjective-to-adverb</td>\n",
662 " <td>992</td>\n",
663 " <td>0.388105</td>\n",
664 " </tr>\n",
665 " <tr>\n",
666 " <th>6</th>\n",
667 " <td>gram2-opposite</td>\n",
668 " <td>702</td>\n",
669 " <td>0.363248</td>\n",
670 " </tr>\n",
671 " <tr>\n",
672 " <th>7</th>\n",
673 " <td>gram3-comparative</td>\n",
674 " <td>1332</td>\n",
675 " <td>0.876877</td>\n",
676 " </tr>\n",
677 " <tr>\n",
678 " <th>8</th>\n",
679 " <td>gram4-superlative</td>\n",
680 " <td>1122</td>\n",
681 " <td>0.919786</td>\n",
682 " </tr>\n",
683 " <tr>\n",
684 " <th>9</th>\n",
685 " <td>gram5-present-participle</td>\n",
686 " <td>1056</td>\n",
687 " <td>0.827652</td>\n",
688 " </tr>\n",
689 " <tr>\n",
690 " <th>10</th>\n",
691 " <td>gram6-nationality-adjective</td>\n",
692 " <td>1406</td>\n",
693 " <td>0.948791</td>\n",
694 " </tr>\n",
695 " <tr>\n",
696 " <th>11</th>\n",
697 " <td>gram7-past-tense</td>\n",
698 " <td>1560</td>\n",
699 " <td>0.621154</td>\n",
700 " </tr>\n",
701 " <tr>\n",
702 " <th>12</th>\n",
703 " <td>gram8-plural</td>\n",
704 " <td>1332</td>\n",
705 " <td>0.864114</td>\n",
706 " </tr>\n",
707 " <tr>\n",
708 " <th>13</th>\n",
709 " <td>gram9-plural-verbs</td>\n",
710 " <td>870</td>\n",
711 " <td>0.672414</td>\n",
712 " </tr>\n",
713 " <tr>\n",
714 " <th>14</th>\n",
715 " <td>total</td>\n",
716 " <td>20036</td>\n",
717 " <td>0.779347</td>\n",
718 " </tr>\n",
719 " </tbody>\n",
720 "</table>\n",
721 "</div>"
722 ],
723 "text/plain": [
724 " category samples average\n",
725 "0 capital-common-countries 506 0.946640\n",
726 "1 capital-world 4290 0.917483\n",
727 "2 city-in-state 4242 0.706742\n",
728 "3 currency 206 0.184466\n",
729 "4 family 420 0.978571\n",
730 "5 gram1-adjective-to-adverb 992 0.388105\n",
731 "6 gram2-opposite 702 0.363248\n",
732 "7 gram3-comparative 1332 0.876877\n",
733 "8 gram4-superlative 1122 0.919786\n",
734 "9 gram5-present-participle 1056 0.827652\n",
735 "10 gram6-nationality-adjective 1406 0.948791\n",
736 "11 gram7-past-tense 1560 0.621154\n",
737 "12 gram8-plural 1332 0.864114\n",
738 "13 gram9-plural-verbs 870 0.672414\n",
739 "14 total 20036 0.779347"
740 ]
741 },
742 "execution_count": 33,
743 "metadata": {},
744 "output_type": "execute_result"
745 }
746 ],
747 "source": [
748 "result"
749 ]
750 },
751 {
752 "cell_type": "code",
753 "execution_count": 16,
754 "metadata": {
755 "ExecuteTime": {
756 "end_time": "2018-12-10T05:29:37.510823Z",
757 "start_time": "2018-12-10T05:29:37.498492Z"
758 }
759 },
760 "outputs": [
761 {
762 "data": {
763 "text/html": [
764 "<div>\n",
765 "<style scoped>\n",
766 " .dataframe tbody tr th:only-of-type {\n",
767 " vertical-align: middle;\n",
768 " }\n",
769 "\n",
770 " .dataframe tbody tr th {\n",
771 " vertical-align: top;\n",
772 " }\n",
773 "\n",
774 " .dataframe thead th {\n",
775 " text-align: right;\n",
776 " }\n",
777 "</style>\n",
778 "<table border=\"1\" class=\"dataframe\">\n",
779 " <thead>\n",
780 " <tr style=\"text-align: right;\">\n",
781 " <th></th>\n",
782 " <th>category</th>\n",
783 " <th>correct</th>\n",
784 " <th>incorrect</th>\n",
785 " <th>average</th>\n",
786 " </tr>\n",
787 " </thead>\n",
788 " <tbody>\n",
789 " <tr>\n",
790 " <th>0</th>\n",
791 " <td>capital-common-countries</td>\n",
792 " <td>482</td>\n",
793 " <td>24</td>\n",
794 " <td>0.952569</td>\n",
795 " </tr>\n",
796 " <tr>\n",
797 " <th>1</th>\n",
798 " <td>capital-world</td>\n",
799 " <td>6093</td>\n",
800 " <td>227</td>\n",
801 " <td>0.964082</td>\n",
802 " </tr>\n",
803 " <tr>\n",
804 " <th>2</th>\n",
805 " <td>city-in-state</td>\n",
806 " <td>2472</td>\n",
807 " <td>1646</td>\n",
808 " <td>0.600291</td>\n",
809 " </tr>\n",
810 " <tr>\n",
811 " <th>3</th>\n",
812 " <td>currency</td>\n",
813 " <td>112</td>\n",
814 " <td>390</td>\n",
815 " <td>0.223108</td>\n",
816 " </tr>\n",
817 " <tr>\n",
818 " <th>4</th>\n",
819 " <td>family</td>\n",
820 " <td>392</td>\n",
821 " <td>28</td>\n",
822 " <td>0.933333</td>\n",
823 " </tr>\n",
824 " <tr>\n",
825 " <th>5</th>\n",
826 " <td>gram1-adjective-to-adverb</td>\n",
827 " <td>228</td>\n",
828 " <td>764</td>\n",
829 " <td>0.229839</td>\n",
830 " </tr>\n",
831 " <tr>\n",
832 " <th>6</th>\n",
833 " <td>gram2-opposite</td>\n",
834 " <td>205</td>\n",
835 " <td>497</td>\n",
836 " <td>0.292023</td>\n",
837 " </tr>\n",
838 " <tr>\n",
839 " <th>7</th>\n",
840 " <td>gram3-comparative</td>\n",
841 " <td>1175</td>\n",
842 " <td>157</td>\n",
843 " <td>0.882132</td>\n",
844 " </tr>\n",
845 " <tr>\n",
846 " <th>8</th>\n",
847 " <td>gram4-superlative</td>\n",
848 " <td>737</td>\n",
849 " <td>193</td>\n",
850 " <td>0.792473</td>\n",
851 " </tr>\n",
852 " <tr>\n",
853 " <th>9</th>\n",
854 " <td>gram5-present-participle</td>\n",
855 " <td>686</td>\n",
856 " <td>306</td>\n",
857 " <td>0.691532</td>\n",
858 " </tr>\n",
859 " <tr>\n",
860 " <th>10</th>\n",
861 " <td>gram6-nationality-adjective</td>\n",
862 " <td>1445</td>\n",
863 " <td>37</td>\n",
864 " <td>0.975034</td>\n",
865 " </tr>\n",
866 " <tr>\n",
867 " <th>11</th>\n",
868 " <td>gram7-past-tense</td>\n",
869 " <td>954</td>\n",
870 " <td>606</td>\n",
871 " <td>0.611538</td>\n",
872 " </tr>\n",
873 " <tr>\n",
874 " <th>12</th>\n",
875 " <td>gram8-plural</td>\n",
876 " <td>1016</td>\n",
877 " <td>244</td>\n",
878 " <td>0.806349</td>\n",
879 " </tr>\n",
880 " <tr>\n",
881 " <th>13</th>\n",
882 " <td>gram9-plural-verbs</td>\n",
883 " <td>472</td>\n",
884 " <td>340</td>\n",
885 " <td>0.581281</td>\n",
886 " </tr>\n",
887 " <tr>\n",
888 " <th>14</th>\n",
889 " <td>total</td>\n",
890 " <td>16469</td>\n",
891 " <td>5459</td>\n",
892 " <td>0.751049</td>\n",
893 " </tr>\n",
894 " </tbody>\n",
895 "</table>\n",
896 "</div>"
897 ],
898 "text/plain": [
899 " category correct incorrect average\n",
900 "0 capital-common-countries 482 24 0.952569\n",
901 "1 capital-world 6093 227 0.964082\n",
902 "2 city-in-state 2472 1646 0.600291\n",
903 "3 currency 112 390 0.223108\n",
904 "4 family 392 28 0.933333\n",
905 "5 gram1-adjective-to-adverb 228 764 0.229839\n",
906 "6 gram2-opposite 205 497 0.292023\n",
907 "7 gram3-comparative 1175 157 0.882132\n",
908 "8 gram4-superlative 737 193 0.792473\n",
909 "9 gram5-present-participle 686 306 0.691532\n",
910 "10 gram6-nationality-adjective 1445 37 0.975034\n",
911 "11 gram7-past-tense 954 606 0.611538\n",
912 "12 gram8-plural 1016 244 0.806349\n",
913 "13 gram9-plural-verbs 472 340 0.581281\n",
914 "14 total 16469 5459 0.751049"
915 ]
916 },
917 "execution_count": 16,
918 "metadata": {},
919 "output_type": "execute_result"
920 }
921 ],
922 "source": [
923 "result"
924 ]
925 },
926 {
927 "cell_type": "code",
928 "execution_count": 17,
929 "metadata": {
930 "ExecuteTime": {
931 "end_time": "2018-12-10T05:29:55.829245Z",
932 "start_time": "2018-12-10T05:29:55.822131Z"
933 }
934 },
935 "outputs": [],
936 "source": [
937 "result.to_csv(glove_path / 'accuracy.csv', index=False)"
938 ]
939 },
940 {
941 "cell_type": "code",
942 "execution_count": null,
943 "metadata": {},
944 "outputs": [],
945 "source": []
946 }
947 ],
948 "metadata": {
949 "kernelspec": {
950 "display_name": "Python 3",
951 "language": "python",
952 "name": "python3"
953 },
954 "language_info": {
955 "codemirror_mode": {
956 "name": "ipython",
957 "version": 3
958 },
959 "file_extension": ".py",
960 "mimetype": "text/x-python",
961 "name": "python",
962 "nbconvert_exporter": "python",
963 "pygments_lexer": "ipython3",
964 "version": "3.6.8"
965 },
966 "toc": {
967 "base_numbering": 1,
968 "nav_menu": {},
969 "number_sections": true,
970 "sideBar": true,
971 "skip_h1_title": false,
972 "title_cell": "Table of Contents",
973 "title_sidebar": "Contents",
974 "toc_cell": false,
975 "toc_position": {},
976 "toc_section_display": true,
977 "toc_window_display": false
978 }
979 },
980 "nbformat": 4,
981 "nbformat_minor": 2
982 }