ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
02_using_trained_vectors.ipynb

(28150B)
      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "## Imports & Settings"
      8    ]
      9   },
     10   {
     11    "cell_type": "code",
     12    "execution_count": 1,
     13    "metadata": {
     14     "ExecuteTime": {
     15      "end_time": "2018-12-10T05:26:22.735641Z",
     16      "start_time": "2018-12-10T05:26:22.732284Z"
     17     }
     18    },
     19    "outputs": [],
     20    "source": [
     21     "from time import time\n",
     22     "import warnings\n",
     23     "from collections import Counter\n",
     24     "from pathlib import Path\n",
     25     "import pandas as pd\n",
     26     "import numpy as np\n",
     27     "\n",
     28     "import matplotlib.pyplot as plt\n",
     29     "\n",
     30     "from gensim.models import Word2Vec, KeyedVectors\n",
     31     "from gensim.scripts.glove2word2vec import glove2word2vec"
     32    ]
     33   },
     34   {
     35    "cell_type": "code",
     36    "execution_count": 2,
     37    "metadata": {
     38     "ExecuteTime": {
     39      "end_time": "2018-12-10T05:26:22.809782Z",
     40      "start_time": "2018-12-10T05:26:22.807930Z"
     41     }
     42    },
     43    "outputs": [],
     44    "source": [
     45     "warnings.filterwarnings('ignore')"
     46    ]
     47   },
     48   {
     49    "cell_type": "code",
     50    "execution_count": 3,
     51    "metadata": {
     52     "ExecuteTime": {
     53      "end_time": "2018-12-10T05:23:31.270587Z",
     54      "start_time": "2018-12-10T05:23:31.261745Z"
     55     }
     56    },
     57    "outputs": [],
     58    "source": [
     59     "analogies_path = Path('data', 'analogies', 'analogies-en.txt')"
     60    ]
     61   },
     62   {
     63    "cell_type": "markdown",
     64    "metadata": {},
     65    "source": [
     66     "## Convert GloVE Vectors to gensim format"
     67    ]
     68   },
     69   {
     70    "cell_type": "markdown",
     71    "metadata": {},
     72    "source": [
     73     "The various GloVE vectors are available [here](https://nlp.stanford.edu/projects/glove/). Download link for the [wikipedia](http://nlp.stanford.edu/data/glove.6B.zip) version. Unzip and store in `data/glove`."
     74    ]
     75   },
     76   {
     77    "cell_type": "markdown",
     78    "metadata": {},
     79    "source": [
     80     "### WikiPedia"
     81    ]
     82   },
     83   {
     84    "cell_type": "code",
     85    "execution_count": 4,
     86    "metadata": {
     87     "ExecuteTime": {
     88      "end_time": "2018-12-10T06:21:41.349764Z",
     89      "start_time": "2018-12-10T06:21:41.347458Z"
     90     }
     91    },
     92    "outputs": [],
     93    "source": [
     94     "glove_path = Path('data/glove')\n",
     95     "glove_wiki_file= glove_path / 'glove.6B.300d.txt'\n",
     96     "word2vec_wiki_file = glove_path / 'glove.wiki.gensim.txt'"
     97    ]
     98   },
     99   {
    100    "cell_type": "code",
    101    "execution_count": null,
    102    "metadata": {
    103     "ExecuteTime": {
    104      "end_time": "2018-12-10T06:21:44.300116Z",
    105      "start_time": "2018-12-10T06:21:41.533781Z"
    106     }
    107    },
    108    "outputs": [],
    109    "source": [
    110     "glove2word2vec(glove_input_file=glove_wiki_file, word2vec_output_file=word2vec_wiki_file)"
    111    ]
    112   },
    113   {
    114    "cell_type": "markdown",
    115    "metadata": {},
    116    "source": [
    117     "### Twitter Data"
    118    ]
    119   },
    120   {
    121    "cell_type": "code",
    122    "execution_count": 18,
    123    "metadata": {
    124     "ExecuteTime": {
    125      "end_time": "2018-12-10T05:44:52.104643Z",
    126      "start_time": "2018-12-10T05:44:52.096912Z"
    127     }
    128    },
    129    "outputs": [],
    130    "source": [
    131     "glove_twitter_file= glove_path / 'glove.twitter.27B.200d.txt'\n",
    132     "word2vec_twitter_file = glove_path / 'glove.twitter.gensim.txt'"
    133    ]
    134   },
    135   {
    136    "cell_type": "code",
    137    "execution_count": 19,
    138    "metadata": {
    139     "ExecuteTime": {
    140      "end_time": "2018-12-10T05:45:02.864556Z",
    141      "start_time": "2018-12-10T05:44:59.034198Z"
    142     }
    143    },
    144    "outputs": [
    145     {
    146      "data": {
    147       "text/plain": [
    148        "(1193517, 200)"
    149       ]
    150      },
    151      "execution_count": 19,
    152      "metadata": {},
    153      "output_type": "execute_result"
    154     }
    155    ],
    156    "source": [
    157     "glove2word2vec(glove_input_file=glove_twitter_file, word2vec_output_file=word2vec_twitter_file)"
    158    ]
    159   },
    160   {
    161    "cell_type": "markdown",
    162    "metadata": {},
    163    "source": [
    164     "### Common Crawl"
    165    ]
    166   },
    167   {
    168    "cell_type": "code",
    169    "execution_count": 26,
    170    "metadata": {
    171     "ExecuteTime": {
    172      "end_time": "2018-12-10T05:59:20.729441Z",
    173      "start_time": "2018-12-10T05:59:20.721920Z"
    174     }
    175    },
    176    "outputs": [],
    177    "source": [
    178     "glove_crawl_file= glove_path / 'glove.840B.300d.txt'\n",
    179     "word2vec_crawl_file = glove_path / 'glove.crawl.gensim.txt'"
    180    ]
    181   },
    182   {
    183    "cell_type": "code",
    184    "execution_count": 27,
    185    "metadata": {
    186     "ExecuteTime": {
    187      "end_time": "2018-12-10T05:59:40.404114Z",
    188      "start_time": "2018-12-10T05:59:28.731439Z"
    189     }
    190    },
    191    "outputs": [
    192     {
    193      "data": {
    194       "text/plain": [
    195        "(2196018, 300)"
    196       ]
    197      },
    198      "execution_count": 27,
    199      "metadata": {},
    200      "output_type": "execute_result"
    201     }
    202    ],
    203    "source": [
    204     "glove2word2vec(glove_input_file=glove_crawl_file, word2vec_output_file=word2vec_crawl_file)"
    205    ]
    206   },
    207   {
    208    "cell_type": "markdown",
    209    "metadata": {},
    210    "source": [
    211     "## Evaluate embeddings"
    212    ]
    213   },
    214   {
    215    "cell_type": "code",
    216    "execution_count": 37,
    217    "metadata": {
    218     "ExecuteTime": {
    219      "end_time": "2018-12-10T06:21:48.973717Z",
    220      "start_time": "2018-12-10T06:21:48.965153Z"
    221     }
    222    },
    223    "outputs": [],
    224    "source": [
    225     "def eval_analogies(file_name, vocab=30000):\n",
    226     "    model = KeyedVectors.load_word2vec_format(file_name, binary=False)\n",
    227     "    accuracy = model.wv.accuracy(analogies_path,\n",
    228     "                                 restrict_vocab=vocab,\n",
    229     "                                 case_insensitive=True)\n",
    230     "    return (pd.DataFrame([[c['section'],\n",
    231     "                           len(c['correct']),\n",
    232     "                           len(c['incorrect'])] for c in accuracy],\n",
    233     "                         columns=['category', 'correct', 'incorrect'])\n",
    234     "            .assign(samples=lambda x: x.correct.add(x.incorrect))\n",
    235     "            .assign(average=lambda x: x.correct.div(x.samples))\n",
    236     "            .drop(['correct', 'incorrect'], axis=1))"
    237    ]
    238   },
    239   {
    240    "cell_type": "code",
    241    "execution_count": 40,
    242    "metadata": {
    243     "ExecuteTime": {
    244      "end_time": "2018-12-10T06:33:06.540388Z",
    245      "start_time": "2018-12-10T06:28:21.484660Z"
    246     }
    247    },
    248    "outputs": [],
    249    "source": [
    250     "result = eval_analogies(word2vec_twitter_file, vocab=100000)"
    251    ]
    252   },
    253   {
    254    "cell_type": "markdown",
    255    "metadata": {},
    256    "source": [
    257     "### twitter result"
    258    ]
    259   },
    260   {
    261    "cell_type": "code",
    262    "execution_count": 41,
    263    "metadata": {
    264     "ExecuteTime": {
    265      "end_time": "2018-12-10T06:33:06.559308Z",
    266      "start_time": "2018-12-10T06:33:06.553450Z"
    267     }
    268    },
    269    "outputs": [
    270     {
    271      "data": {
    272       "text/html": [
    273        "<div>\n",
    274        "<style scoped>\n",
    275        "    .dataframe tbody tr th:only-of-type {\n",
    276        "        vertical-align: middle;\n",
    277        "    }\n",
    278        "\n",
    279        "    .dataframe tbody tr th {\n",
    280        "        vertical-align: top;\n",
    281        "    }\n",
    282        "\n",
    283        "    .dataframe thead th {\n",
    284        "        text-align: right;\n",
    285        "    }\n",
    286        "</style>\n",
    287        "<table border=\"1\" class=\"dataframe\">\n",
    288        "  <thead>\n",
    289        "    <tr style=\"text-align: right;\">\n",
    290        "      <th></th>\n",
    291        "      <th>category</th>\n",
    292        "      <th>samples</th>\n",
    293        "      <th>average</th>\n",
    294        "    </tr>\n",
    295        "  </thead>\n",
    296        "  <tbody>\n",
    297        "    <tr>\n",
    298        "      <th>0</th>\n",
    299        "      <td>capital-common-countries</td>\n",
    300        "      <td>462</td>\n",
    301        "      <td>0.701299</td>\n",
    302        "    </tr>\n",
    303        "    <tr>\n",
    304        "      <th>1</th>\n",
    305        "      <td>capital-world</td>\n",
    306        "      <td>930</td>\n",
    307        "      <td>0.690323</td>\n",
    308        "    </tr>\n",
    309        "    <tr>\n",
    310        "      <th>2</th>\n",
    311        "      <td>city-in-state</td>\n",
    312        "      <td>3644</td>\n",
    313        "      <td>0.350714</td>\n",
    314        "    </tr>\n",
    315        "    <tr>\n",
    316        "      <th>3</th>\n",
    317        "      <td>currency</td>\n",
    318        "      <td>268</td>\n",
    319        "      <td>0.018657</td>\n",
    320        "    </tr>\n",
    321        "    <tr>\n",
    322        "      <th>4</th>\n",
    323        "      <td>family</td>\n",
    324        "      <td>342</td>\n",
    325        "      <td>0.824561</td>\n",
    326        "    </tr>\n",
    327        "    <tr>\n",
    328        "      <th>5</th>\n",
    329        "      <td>gram1-adjective-to-adverb</td>\n",
    330        "      <td>650</td>\n",
    331        "      <td>0.143077</td>\n",
    332        "    </tr>\n",
    333        "    <tr>\n",
    334        "      <th>6</th>\n",
    335        "      <td>gram2-opposite</td>\n",
    336        "      <td>342</td>\n",
    337        "      <td>0.365497</td>\n",
    338        "    </tr>\n",
    339        "    <tr>\n",
    340        "      <th>7</th>\n",
    341        "      <td>gram3-comparative</td>\n",
    342        "      <td>1260</td>\n",
    343        "      <td>0.757937</td>\n",
    344        "    </tr>\n",
    345        "    <tr>\n",
    346        "      <th>8</th>\n",
    347        "      <td>gram4-superlative</td>\n",
    348        "      <td>930</td>\n",
    349        "      <td>0.686022</td>\n",
    350        "    </tr>\n",
    351        "    <tr>\n",
    352        "      <th>9</th>\n",
    353        "      <td>gram5-present-participle</td>\n",
    354        "      <td>702</td>\n",
    355        "      <td>0.750712</td>\n",
    356        "    </tr>\n",
    357        "    <tr>\n",
    358        "      <th>10</th>\n",
    359        "      <td>gram6-nationality-adjective</td>\n",
    360        "      <td>870</td>\n",
    361        "      <td>0.750575</td>\n",
    362        "    </tr>\n",
    363        "    <tr>\n",
    364        "      <th>11</th>\n",
    365        "      <td>gram7-past-tense</td>\n",
    366        "      <td>1190</td>\n",
    367        "      <td>0.576471</td>\n",
    368        "    </tr>\n",
    369        "    <tr>\n",
    370        "      <th>12</th>\n",
    371        "      <td>gram8-plural</td>\n",
    372        "      <td>1122</td>\n",
    373        "      <td>0.811052</td>\n",
    374        "    </tr>\n",
    375        "    <tr>\n",
    376        "      <th>13</th>\n",
    377        "      <td>gram9-plural-verbs</td>\n",
    378        "      <td>600</td>\n",
    379        "      <td>0.655000</td>\n",
    380        "    </tr>\n",
    381        "    <tr>\n",
    382        "      <th>14</th>\n",
    383        "      <td>total</td>\n",
    384        "      <td>13312</td>\n",
    385        "      <td>0.564228</td>\n",
    386        "    </tr>\n",
    387        "  </tbody>\n",
    388        "</table>\n",
    389        "</div>"
    390       ],
    391       "text/plain": [
    392        "                       category  samples   average\n",
    393        "0      capital-common-countries      462  0.701299\n",
    394        "1                 capital-world      930  0.690323\n",
    395        "2                 city-in-state     3644  0.350714\n",
    396        "3                      currency      268  0.018657\n",
    397        "4                        family      342  0.824561\n",
    398        "5     gram1-adjective-to-adverb      650  0.143077\n",
    399        "6                gram2-opposite      342  0.365497\n",
    400        "7             gram3-comparative     1260  0.757937\n",
    401        "8             gram4-superlative      930  0.686022\n",
    402        "9      gram5-present-participle      702  0.750712\n",
    403        "10  gram6-nationality-adjective      870  0.750575\n",
    404        "11             gram7-past-tense     1190  0.576471\n",
    405        "12                 gram8-plural     1122  0.811052\n",
    406        "13           gram9-plural-verbs      600  0.655000\n",
    407        "14                        total    13312  0.564228"
    408       ]
    409      },
    410      "execution_count": 41,
    411      "metadata": {},
    412      "output_type": "execute_result"
    413     }
    414    ],
    415    "source": [
    416     "result"
    417    ]
    418   },
    419   {
    420    "cell_type": "markdown",
    421    "metadata": {},
    422    "source": [
    423     "### wiki result"
    424    ]
    425   },
    426   {
    427    "cell_type": "code",
    428    "execution_count": 39,
    429    "metadata": {
    430     "ExecuteTime": {
    431      "end_time": "2018-12-10T06:28:21.483713Z",
    432      "start_time": "2018-12-10T06:28:21.477881Z"
    433     }
    434    },
    435    "outputs": [
    436     {
    437      "data": {
    438       "text/html": [
    439        "<div>\n",
    440        "<style scoped>\n",
    441        "    .dataframe tbody tr th:only-of-type {\n",
    442        "        vertical-align: middle;\n",
    443        "    }\n",
    444        "\n",
    445        "    .dataframe tbody tr th {\n",
    446        "        vertical-align: top;\n",
    447        "    }\n",
    448        "\n",
    449        "    .dataframe thead th {\n",
    450        "        text-align: right;\n",
    451        "    }\n",
    452        "</style>\n",
    453        "<table border=\"1\" class=\"dataframe\">\n",
    454        "  <thead>\n",
    455        "    <tr style=\"text-align: right;\">\n",
    456        "      <th></th>\n",
    457        "      <th>category</th>\n",
    458        "      <th>samples</th>\n",
    459        "      <th>average</th>\n",
    460        "    </tr>\n",
    461        "  </thead>\n",
    462        "  <tbody>\n",
    463        "    <tr>\n",
    464        "      <th>0</th>\n",
    465        "      <td>capital-common-countries</td>\n",
    466        "      <td>506</td>\n",
    467        "      <td>0.948617</td>\n",
    468        "    </tr>\n",
    469        "    <tr>\n",
    470        "      <th>1</th>\n",
    471        "      <td>capital-world</td>\n",
    472        "      <td>8372</td>\n",
    473        "      <td>0.964644</td>\n",
    474        "    </tr>\n",
    475        "    <tr>\n",
    476        "      <th>2</th>\n",
    477        "      <td>city-in-state</td>\n",
    478        "      <td>4242</td>\n",
    479        "      <td>0.599953</td>\n",
    480        "    </tr>\n",
    481        "    <tr>\n",
    482        "      <th>3</th>\n",
    483        "      <td>currency</td>\n",
    484        "      <td>752</td>\n",
    485        "      <td>0.174202</td>\n",
    486        "    </tr>\n",
    487        "    <tr>\n",
    488        "      <th>4</th>\n",
    489        "      <td>family</td>\n",
    490        "      <td>506</td>\n",
    491        "      <td>0.881423</td>\n",
    492        "    </tr>\n",
    493        "    <tr>\n",
    494        "      <th>5</th>\n",
    495        "      <td>gram1-adjective-to-adverb</td>\n",
    496        "      <td>992</td>\n",
    497        "      <td>0.225806</td>\n",
    498        "    </tr>\n",
    499        "    <tr>\n",
    500        "      <th>6</th>\n",
    501        "      <td>gram2-opposite</td>\n",
    502        "      <td>756</td>\n",
    503        "      <td>0.285714</td>\n",
    504        "    </tr>\n",
    505        "    <tr>\n",
    506        "      <th>7</th>\n",
    507        "      <td>gram3-comparative</td>\n",
    508        "      <td>1332</td>\n",
    509        "      <td>0.882132</td>\n",
    510        "    </tr>\n",
    511        "    <tr>\n",
    512        "      <th>8</th>\n",
    513        "      <td>gram4-superlative</td>\n",
    514        "      <td>1056</td>\n",
    515        "      <td>0.746212</td>\n",
    516        "    </tr>\n",
    517        "    <tr>\n",
    518        "      <th>9</th>\n",
    519        "      <td>gram5-present-participle</td>\n",
    520        "      <td>1056</td>\n",
    521        "      <td>0.699811</td>\n",
    522        "    </tr>\n",
    523        "    <tr>\n",
    524        "      <th>10</th>\n",
    525        "      <td>gram6-nationality-adjective</td>\n",
    526        "      <td>1640</td>\n",
    527        "      <td>0.925000</td>\n",
    528        "    </tr>\n",
    529        "    <tr>\n",
    530        "      <th>11</th>\n",
    531        "      <td>gram7-past-tense</td>\n",
    532        "      <td>1560</td>\n",
    533        "      <td>0.611538</td>\n",
    534        "    </tr>\n",
    535        "    <tr>\n",
    536        "      <th>12</th>\n",
    537        "      <td>gram8-plural</td>\n",
    538        "      <td>1332</td>\n",
    539        "      <td>0.780781</td>\n",
    540        "    </tr>\n",
    541        "    <tr>\n",
    542        "      <th>13</th>\n",
    543        "      <td>gram9-plural-verbs</td>\n",
    544        "      <td>870</td>\n",
    545        "      <td>0.585057</td>\n",
    546        "    </tr>\n",
    547        "    <tr>\n",
    548        "      <th>14</th>\n",
    549        "      <td>total</td>\n",
    550        "      <td>24972</td>\n",
    551        "      <td>0.754445</td>\n",
    552        "    </tr>\n",
    553        "  </tbody>\n",
    554        "</table>\n",
    555        "</div>"
    556       ],
    557       "text/plain": [
    558        "                       category  samples   average\n",
    559        "0      capital-common-countries      506  0.948617\n",
    560        "1                 capital-world     8372  0.964644\n",
    561        "2                 city-in-state     4242  0.599953\n",
    562        "3                      currency      752  0.174202\n",
    563        "4                        family      506  0.881423\n",
    564        "5     gram1-adjective-to-adverb      992  0.225806\n",
    565        "6                gram2-opposite      756  0.285714\n",
    566        "7             gram3-comparative     1332  0.882132\n",
    567        "8             gram4-superlative     1056  0.746212\n",
    568        "9      gram5-present-participle     1056  0.699811\n",
    569        "10  gram6-nationality-adjective     1640  0.925000\n",
    570        "11             gram7-past-tense     1560  0.611538\n",
    571        "12                 gram8-plural     1332  0.780781\n",
    572        "13           gram9-plural-verbs      870  0.585057\n",
    573        "14                        total    24972  0.754445"
    574       ]
    575      },
    576      "execution_count": 39,
    577      "metadata": {},
    578      "output_type": "execute_result"
    579     }
    580    ],
    581    "source": [
    582     "result"
    583    ]
    584   },
    585   {
    586    "cell_type": "markdown",
    587    "metadata": {},
    588    "source": [
    589     "### Common Crawl result"
    590    ]
    591   },
    592   {
    593    "cell_type": "code",
    594    "execution_count": 33,
    595    "metadata": {
    596     "ExecuteTime": {
    597      "end_time": "2018-12-10T06:20:56.028002Z",
    598      "start_time": "2018-12-10T06:20:56.021706Z"
    599     }
    600    },
    601    "outputs": [
    602     {
    603      "data": {
    604       "text/html": [
    605        "<div>\n",
    606        "<style scoped>\n",
    607        "    .dataframe tbody tr th:only-of-type {\n",
    608        "        vertical-align: middle;\n",
    609        "    }\n",
    610        "\n",
    611        "    .dataframe tbody tr th {\n",
    612        "        vertical-align: top;\n",
    613        "    }\n",
    614        "\n",
    615        "    .dataframe thead th {\n",
    616        "        text-align: right;\n",
    617        "    }\n",
    618        "</style>\n",
    619        "<table border=\"1\" class=\"dataframe\">\n",
    620        "  <thead>\n",
    621        "    <tr style=\"text-align: right;\">\n",
    622        "      <th></th>\n",
    623        "      <th>category</th>\n",
    624        "      <th>samples</th>\n",
    625        "      <th>average</th>\n",
    626        "    </tr>\n",
    627        "  </thead>\n",
    628        "  <tbody>\n",
    629        "    <tr>\n",
    630        "      <th>0</th>\n",
    631        "      <td>capital-common-countries</td>\n",
    632        "      <td>506</td>\n",
    633        "      <td>0.946640</td>\n",
    634        "    </tr>\n",
    635        "    <tr>\n",
    636        "      <th>1</th>\n",
    637        "      <td>capital-world</td>\n",
    638        "      <td>4290</td>\n",
    639        "      <td>0.917483</td>\n",
    640        "    </tr>\n",
    641        "    <tr>\n",
    642        "      <th>2</th>\n",
    643        "      <td>city-in-state</td>\n",
    644        "      <td>4242</td>\n",
    645        "      <td>0.706742</td>\n",
    646        "    </tr>\n",
    647        "    <tr>\n",
    648        "      <th>3</th>\n",
    649        "      <td>currency</td>\n",
    650        "      <td>206</td>\n",
    651        "      <td>0.184466</td>\n",
    652        "    </tr>\n",
    653        "    <tr>\n",
    654        "      <th>4</th>\n",
    655        "      <td>family</td>\n",
    656        "      <td>420</td>\n",
    657        "      <td>0.978571</td>\n",
    658        "    </tr>\n",
    659        "    <tr>\n",
    660        "      <th>5</th>\n",
    661        "      <td>gram1-adjective-to-adverb</td>\n",
    662        "      <td>992</td>\n",
    663        "      <td>0.388105</td>\n",
    664        "    </tr>\n",
    665        "    <tr>\n",
    666        "      <th>6</th>\n",
    667        "      <td>gram2-opposite</td>\n",
    668        "      <td>702</td>\n",
    669        "      <td>0.363248</td>\n",
    670        "    </tr>\n",
    671        "    <tr>\n",
    672        "      <th>7</th>\n",
    673        "      <td>gram3-comparative</td>\n",
    674        "      <td>1332</td>\n",
    675        "      <td>0.876877</td>\n",
    676        "    </tr>\n",
    677        "    <tr>\n",
    678        "      <th>8</th>\n",
    679        "      <td>gram4-superlative</td>\n",
    680        "      <td>1122</td>\n",
    681        "      <td>0.919786</td>\n",
    682        "    </tr>\n",
    683        "    <tr>\n",
    684        "      <th>9</th>\n",
    685        "      <td>gram5-present-participle</td>\n",
    686        "      <td>1056</td>\n",
    687        "      <td>0.827652</td>\n",
    688        "    </tr>\n",
    689        "    <tr>\n",
    690        "      <th>10</th>\n",
    691        "      <td>gram6-nationality-adjective</td>\n",
    692        "      <td>1406</td>\n",
    693        "      <td>0.948791</td>\n",
    694        "    </tr>\n",
    695        "    <tr>\n",
    696        "      <th>11</th>\n",
    697        "      <td>gram7-past-tense</td>\n",
    698        "      <td>1560</td>\n",
    699        "      <td>0.621154</td>\n",
    700        "    </tr>\n",
    701        "    <tr>\n",
    702        "      <th>12</th>\n",
    703        "      <td>gram8-plural</td>\n",
    704        "      <td>1332</td>\n",
    705        "      <td>0.864114</td>\n",
    706        "    </tr>\n",
    707        "    <tr>\n",
    708        "      <th>13</th>\n",
    709        "      <td>gram9-plural-verbs</td>\n",
    710        "      <td>870</td>\n",
    711        "      <td>0.672414</td>\n",
    712        "    </tr>\n",
    713        "    <tr>\n",
    714        "      <th>14</th>\n",
    715        "      <td>total</td>\n",
    716        "      <td>20036</td>\n",
    717        "      <td>0.779347</td>\n",
    718        "    </tr>\n",
    719        "  </tbody>\n",
    720        "</table>\n",
    721        "</div>"
    722       ],
    723       "text/plain": [
    724        "                       category  samples   average\n",
    725        "0      capital-common-countries      506  0.946640\n",
    726        "1                 capital-world     4290  0.917483\n",
    727        "2                 city-in-state     4242  0.706742\n",
    728        "3                      currency      206  0.184466\n",
    729        "4                        family      420  0.978571\n",
    730        "5     gram1-adjective-to-adverb      992  0.388105\n",
    731        "6                gram2-opposite      702  0.363248\n",
    732        "7             gram3-comparative     1332  0.876877\n",
    733        "8             gram4-superlative     1122  0.919786\n",
    734        "9      gram5-present-participle     1056  0.827652\n",
    735        "10  gram6-nationality-adjective     1406  0.948791\n",
    736        "11             gram7-past-tense     1560  0.621154\n",
    737        "12                 gram8-plural     1332  0.864114\n",
    738        "13           gram9-plural-verbs      870  0.672414\n",
    739        "14                        total    20036  0.779347"
    740       ]
    741      },
    742      "execution_count": 33,
    743      "metadata": {},
    744      "output_type": "execute_result"
    745     }
    746    ],
    747    "source": [
    748     "result"
    749    ]
    750   },
    751   {
    752    "cell_type": "code",
    753    "execution_count": 16,
    754    "metadata": {
    755     "ExecuteTime": {
    756      "end_time": "2018-12-10T05:29:37.510823Z",
    757      "start_time": "2018-12-10T05:29:37.498492Z"
    758     }
    759    },
    760    "outputs": [
    761     {
    762      "data": {
    763       "text/html": [
    764        "<div>\n",
    765        "<style scoped>\n",
    766        "    .dataframe tbody tr th:only-of-type {\n",
    767        "        vertical-align: middle;\n",
    768        "    }\n",
    769        "\n",
    770        "    .dataframe tbody tr th {\n",
    771        "        vertical-align: top;\n",
    772        "    }\n",
    773        "\n",
    774        "    .dataframe thead th {\n",
    775        "        text-align: right;\n",
    776        "    }\n",
    777        "</style>\n",
    778        "<table border=\"1\" class=\"dataframe\">\n",
    779        "  <thead>\n",
    780        "    <tr style=\"text-align: right;\">\n",
    781        "      <th></th>\n",
    782        "      <th>category</th>\n",
    783        "      <th>correct</th>\n",
    784        "      <th>incorrect</th>\n",
    785        "      <th>average</th>\n",
    786        "    </tr>\n",
    787        "  </thead>\n",
    788        "  <tbody>\n",
    789        "    <tr>\n",
    790        "      <th>0</th>\n",
    791        "      <td>capital-common-countries</td>\n",
    792        "      <td>482</td>\n",
    793        "      <td>24</td>\n",
    794        "      <td>0.952569</td>\n",
    795        "    </tr>\n",
    796        "    <tr>\n",
    797        "      <th>1</th>\n",
    798        "      <td>capital-world</td>\n",
    799        "      <td>6093</td>\n",
    800        "      <td>227</td>\n",
    801        "      <td>0.964082</td>\n",
    802        "    </tr>\n",
    803        "    <tr>\n",
    804        "      <th>2</th>\n",
    805        "      <td>city-in-state</td>\n",
    806        "      <td>2472</td>\n",
    807        "      <td>1646</td>\n",
    808        "      <td>0.600291</td>\n",
    809        "    </tr>\n",
    810        "    <tr>\n",
    811        "      <th>3</th>\n",
    812        "      <td>currency</td>\n",
    813        "      <td>112</td>\n",
    814        "      <td>390</td>\n",
    815        "      <td>0.223108</td>\n",
    816        "    </tr>\n",
    817        "    <tr>\n",
    818        "      <th>4</th>\n",
    819        "      <td>family</td>\n",
    820        "      <td>392</td>\n",
    821        "      <td>28</td>\n",
    822        "      <td>0.933333</td>\n",
    823        "    </tr>\n",
    824        "    <tr>\n",
    825        "      <th>5</th>\n",
    826        "      <td>gram1-adjective-to-adverb</td>\n",
    827        "      <td>228</td>\n",
    828        "      <td>764</td>\n",
    829        "      <td>0.229839</td>\n",
    830        "    </tr>\n",
    831        "    <tr>\n",
    832        "      <th>6</th>\n",
    833        "      <td>gram2-opposite</td>\n",
    834        "      <td>205</td>\n",
    835        "      <td>497</td>\n",
    836        "      <td>0.292023</td>\n",
    837        "    </tr>\n",
    838        "    <tr>\n",
    839        "      <th>7</th>\n",
    840        "      <td>gram3-comparative</td>\n",
    841        "      <td>1175</td>\n",
    842        "      <td>157</td>\n",
    843        "      <td>0.882132</td>\n",
    844        "    </tr>\n",
    845        "    <tr>\n",
    846        "      <th>8</th>\n",
    847        "      <td>gram4-superlative</td>\n",
    848        "      <td>737</td>\n",
    849        "      <td>193</td>\n",
    850        "      <td>0.792473</td>\n",
    851        "    </tr>\n",
    852        "    <tr>\n",
    853        "      <th>9</th>\n",
    854        "      <td>gram5-present-participle</td>\n",
    855        "      <td>686</td>\n",
    856        "      <td>306</td>\n",
    857        "      <td>0.691532</td>\n",
    858        "    </tr>\n",
    859        "    <tr>\n",
    860        "      <th>10</th>\n",
    861        "      <td>gram6-nationality-adjective</td>\n",
    862        "      <td>1445</td>\n",
    863        "      <td>37</td>\n",
    864        "      <td>0.975034</td>\n",
    865        "    </tr>\n",
    866        "    <tr>\n",
    867        "      <th>11</th>\n",
    868        "      <td>gram7-past-tense</td>\n",
    869        "      <td>954</td>\n",
    870        "      <td>606</td>\n",
    871        "      <td>0.611538</td>\n",
    872        "    </tr>\n",
    873        "    <tr>\n",
    874        "      <th>12</th>\n",
    875        "      <td>gram8-plural</td>\n",
    876        "      <td>1016</td>\n",
    877        "      <td>244</td>\n",
    878        "      <td>0.806349</td>\n",
    879        "    </tr>\n",
    880        "    <tr>\n",
    881        "      <th>13</th>\n",
    882        "      <td>gram9-plural-verbs</td>\n",
    883        "      <td>472</td>\n",
    884        "      <td>340</td>\n",
    885        "      <td>0.581281</td>\n",
    886        "    </tr>\n",
    887        "    <tr>\n",
    888        "      <th>14</th>\n",
    889        "      <td>total</td>\n",
    890        "      <td>16469</td>\n",
    891        "      <td>5459</td>\n",
    892        "      <td>0.751049</td>\n",
    893        "    </tr>\n",
    894        "  </tbody>\n",
    895        "</table>\n",
    896        "</div>"
    897       ],
    898       "text/plain": [
    899        "                       category  correct  incorrect   average\n",
    900        "0      capital-common-countries      482         24  0.952569\n",
    901        "1                 capital-world     6093        227  0.964082\n",
    902        "2                 city-in-state     2472       1646  0.600291\n",
    903        "3                      currency      112        390  0.223108\n",
    904        "4                        family      392         28  0.933333\n",
    905        "5     gram1-adjective-to-adverb      228        764  0.229839\n",
    906        "6                gram2-opposite      205        497  0.292023\n",
    907        "7             gram3-comparative     1175        157  0.882132\n",
    908        "8             gram4-superlative      737        193  0.792473\n",
    909        "9      gram5-present-participle      686        306  0.691532\n",
    910        "10  gram6-nationality-adjective     1445         37  0.975034\n",
    911        "11             gram7-past-tense      954        606  0.611538\n",
    912        "12                 gram8-plural     1016        244  0.806349\n",
    913        "13           gram9-plural-verbs      472        340  0.581281\n",
    914        "14                        total    16469       5459  0.751049"
    915       ]
    916      },
    917      "execution_count": 16,
    918      "metadata": {},
    919      "output_type": "execute_result"
    920     }
    921    ],
    922    "source": [
    923     "result"
    924    ]
    925   },
    926   {
    927    "cell_type": "code",
    928    "execution_count": 17,
    929    "metadata": {
    930     "ExecuteTime": {
    931      "end_time": "2018-12-10T05:29:55.829245Z",
    932      "start_time": "2018-12-10T05:29:55.822131Z"
    933     }
    934    },
    935    "outputs": [],
    936    "source": [
    937     "result.to_csv(glove_path / 'accuracy.csv', index=False)"
    938    ]
    939   },
    940   {
    941    "cell_type": "code",
    942    "execution_count": null,
    943    "metadata": {},
    944    "outputs": [],
    945    "source": []
    946   }
    947  ],
    948  "metadata": {
    949   "kernelspec": {
    950    "display_name": "Python 3",
    951    "language": "python",
    952    "name": "python3"
    953   },
    954   "language_info": {
    955    "codemirror_mode": {
    956     "name": "ipython",
    957     "version": 3
    958    },
    959    "file_extension": ".py",
    960    "mimetype": "text/x-python",
    961    "name": "python",
    962    "nbconvert_exporter": "python",
    963    "pygments_lexer": "ipython3",
    964    "version": "3.6.8"
    965   },
    966   "toc": {
    967    "base_numbering": 1,
    968    "nav_menu": {},
    969    "number_sections": true,
    970    "sideBar": true,
    971    "skip_h1_title": false,
    972    "title_cell": "Table of Contents",
    973    "title_sidebar": "Contents",
    974    "toc_cell": false,
    975    "toc_position": {},
    976    "toc_section_display": true,
    977    "toc_window_display": false
    978   }
    979  },
    980  "nbformat": 4,
    981  "nbformat_minor": 2
    982 }