ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

05_lda_with_gensim.ipynb

(64387B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {
      6     "slideshow": {
      7      "slide_type": "slide"
      8     }
      9    },
     10    "source": [
     11     "# Topic Modeling: Latent Dirichlet Allocation with gensim"
     12    ]
     13   },
     14   {
     15    "cell_type": "markdown",
     16    "metadata": {},
     17    "source": [
     18     "Gensim is a specialized NLP library with a fast LDA implementation and many additional features. We will also use it in the next chapter on word vectors (see the notebook lda_with_gensim for details."
     19    ]
     20   },
     21   {
     22    "cell_type": "markdown",
     23    "metadata": {
     24     "slideshow": {
     25      "slide_type": "slide"
     26     }
     27    },
     28    "source": [
     29     "## Imports & Settings"
     30    ]
     31   },
     32   {
     33    "cell_type": "code",
     34    "execution_count": 1,
     35    "metadata": {
     36     "ExecuteTime": {
     37      "end_time": "2018-11-17T22:51:16.888440Z",
     38      "start_time": "2018-11-17T22:51:16.589557Z"
     39     },
     40     "slideshow": {
     41      "slide_type": "fragment"
     42     }
     43    },
     44    "outputs": [
     45     {
     46      "name": "stderr",
     47      "output_type": "stream",
     48      "text": [
     49       "/home/stefan/.pyenv/versions/miniconda3-latest/envs/ml4t/lib/python3.6/site-packages/scipy/sparse/sparsetools.py:21: DeprecationWarning: `scipy.sparse.sparsetools` is deprecated!\n",
     50       "scipy.sparse.sparsetools is a private module for scipy.sparse, and should not be used.\n",
     51       "  _deprecated()\n"
     52      ]
     53     }
     54    ],
     55    "source": [
     56     "import warnings\n",
     57     "from collections import OrderedDict\n",
     58     "from pathlib import Path\n",
     59     "\n",
     60     "import numpy as np\n",
     61     "import pandas as pd\n",
     62     "\n",
     63     "# Visualization\n",
     64     "from ipywidgets import interact, FloatSlider\n",
     65     "import matplotlib.pyplot as plt\n",
     66     "from matplotlib.ticker import FuncFormatter\n",
     67     "import seaborn as sns\n",
     68     "\n",
     69     "import pyLDAvis\n",
     70     "from pyLDAvis.sklearn import prepare\n",
     71     "\n",
     72     "from wordcloud import WordCloud\n",
     73     "from termcolor import colored\n",
     74     "\n",
     75     "# spacy for language processing\n",
     76     "import spacy\n",
     77     "\n",
     78     "# sklearn for feature extraction & modeling\n",
     79     "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer\n",
     80     "from sklearn.decomposition import LatentDirichletAllocation, TruncatedSVD, NMF\n",
     81     "from sklearn.model_selection import train_test_split\n",
     82     "from sklearn.externals import joblib\n",
     83     "\n",
     84     "# gensim for alternative models\n",
     85     "from gensim.models import LdaModel, LdaMulticore\n",
     86     "from gensim.corpora import Dictionary\n",
     87     "from gensim.matutils import Sparse2Corpus"
     88    ]
     89   },
     90   {
     91    "cell_type": "code",
     92    "execution_count": 2,
     93    "metadata": {
     94     "ExecuteTime": {
     95      "end_time": "2018-05-01T04:27:58.207682Z",
     96      "start_time": "2018-05-01T04:27:58.198244Z"
     97     }
     98    },
     99    "outputs": [],
    100    "source": [
    101     "%matplotlib inline\n",
    102     "plt.style.use('ggplot')\n",
    103     "plt.rcParams['figure.figsize'] = (14.0, 8.7)\n",
    104     "pyLDAvis.enable_notebook()\n",
    105     "warnings.filterwarnings('ignore')\n",
    106     "pd.options.display.float_format = '{:,.2f}'.format"
    107    ]
    108   },
    109   {
    110    "cell_type": "markdown",
    111    "metadata": {
    112     "slideshow": {
    113      "slide_type": "skip"
    114     }
    115    },
    116    "source": [
    117     "## Load BBC data"
    118    ]
    119   },
    120   {
    121    "cell_type": "code",
    122    "execution_count": 3,
    123    "metadata": {},
    124    "outputs": [],
    125    "source": [
    126     "# change to your data path if necessary\n",
    127     "DATA_DIR = Path('../data')"
    128    ]
    129   },
    130   {
    131    "cell_type": "code",
    132    "execution_count": 4,
    133    "metadata": {
    134     "ExecuteTime": {
    135      "end_time": "2018-11-30T16:00:39.606772Z",
    136      "start_time": "2018-11-30T16:00:39.503364Z"
    137     },
    138     "slideshow": {
    139      "slide_type": "skip"
    140     }
    141    },
    142    "outputs": [],
    143    "source": [
    144     "path = DATA_DIR / 'bbc'\n",
    145     "files = path.glob('**/*.txt')\n",
    146     "doc_list = []\n",
    147     "for i, file in enumerate(files):\n",
    148     "    with open(str(file), encoding='latin1') as f:\n",
    149     "        topic = file.parts[-2]\n",
    150     "        lines = f.readlines()\n",
    151     "        heading = lines[0].strip()\n",
    152     "        body = ' '.join([l.strip() for l in lines[1:]])\n",
    153     "        doc_list.append([topic.capitalize(), heading, body])"
    154    ]
    155   },
    156   {
    157    "cell_type": "markdown",
    158    "metadata": {
    159     "slideshow": {
    160      "slide_type": "skip"
    161     }
    162    },
    163    "source": [
    164     "### Convert to DataFrame"
    165    ]
    166   },
    167   {
    168    "cell_type": "code",
    169    "execution_count": 5,
    170    "metadata": {
    171     "ExecuteTime": {
    172      "end_time": "2018-05-01T04:27:59.007837Z",
    173      "start_time": "2018-05-01T04:27:58.992529Z"
    174     },
    175     "slideshow": {
    176      "slide_type": "skip"
    177     }
    178    },
    179    "outputs": [
    180     {
    181      "name": "stdout",
    182      "output_type": "stream",
    183      "text": [
    184       "<class 'pandas.core.frame.DataFrame'>\n",
    185       "RangeIndex: 2225 entries, 0 to 2224\n",
    186       "Data columns (total 3 columns):\n",
    187       "topic      2225 non-null object\n",
    188       "heading    2225 non-null object\n",
    189       "article    2225 non-null object\n",
    190       "dtypes: object(3)\n",
    191       "memory usage: 52.2+ KB\n"
    192      ]
    193     }
    194    ],
    195    "source": [
    196     "docs = pd.DataFrame(doc_list, columns=['topic', 'heading', 'article'])\n",
    197     "docs.info()"
    198    ]
    199   },
    200   {
    201    "cell_type": "markdown",
    202    "metadata": {
    203     "slideshow": {
    204      "slide_type": "slide"
    205     }
    206    },
    207    "source": [
    208     "## Create Train & Test Sets"
    209    ]
    210   },
    211   {
    212    "cell_type": "code",
    213    "execution_count": 6,
    214    "metadata": {
    215     "ExecuteTime": {
    216      "end_time": "2018-05-01T04:38:58.366229Z",
    217      "start_time": "2018-05-01T04:38:58.356918Z"
    218     },
    219     "slideshow": {
    220      "slide_type": "fragment"
    221     }
    222    },
    223    "outputs": [],
    224    "source": [
    225     "train_docs, test_docs = train_test_split(docs, \n",
    226     "                                         stratify=docs.topic, \n",
    227     "                                         test_size=50, \n",
    228     "                                         random_state=42)"
    229    ]
    230   },
    231   {
    232    "cell_type": "code",
    233    "execution_count": 7,
    234    "metadata": {
    235     "ExecuteTime": {
    236      "end_time": "2018-05-01T04:38:58.372958Z",
    237      "start_time": "2018-05-01T04:38:58.368455Z"
    238     },
    239     "slideshow": {
    240      "slide_type": "fragment"
    241     }
    242    },
    243    "outputs": [
    244     {
    245      "data": {
    246       "text/plain": [
    247        "((2175, 3), (50, 3))"
    248       ]
    249      },
    250      "execution_count": 7,
    251      "metadata": {},
    252      "output_type": "execute_result"
    253     }
    254    ],
    255    "source": [
    256     "train_docs.shape, test_docs.shape"
    257    ]
    258   },
    259   {
    260    "cell_type": "code",
    261    "execution_count": 8,
    262    "metadata": {
    263     "ExecuteTime": {
    264      "end_time": "2018-05-01T04:38:58.381455Z",
    265      "start_time": "2018-05-01T04:38:58.374872Z"
    266     },
    267     "slideshow": {
    268      "slide_type": "fragment"
    269     }
    270    },
    271    "outputs": [
    272     {
    273      "data": {
    274       "text/plain": [
    275        "Sport            12\n",
    276        "Business         11\n",
    277        "Tech              9\n",
    278        "Entertainment     9\n",
    279        "Politics          9\n",
    280        "Name: topic, dtype: int64"
    281       ]
    282      },
    283      "execution_count": 8,
    284      "metadata": {},
    285      "output_type": "execute_result"
    286     }
    287    ],
    288    "source": [
    289     "pd.Series(test_docs.topic).value_counts()"
    290    ]
    291   },
    292   {
    293    "cell_type": "markdown",
    294    "metadata": {
    295     "slideshow": {
    296      "slide_type": "slide"
    297     }
    298    },
    299    "source": [
    300     "### Vectorize train & test sets"
    301    ]
    302   },
    303   {
    304    "cell_type": "code",
    305    "execution_count": 9,
    306    "metadata": {
    307     "ExecuteTime": {
    308      "end_time": "2018-05-01T04:38:59.033549Z",
    309      "start_time": "2018-05-01T04:38:58.383604Z"
    310     },
    311     "slideshow": {
    312      "slide_type": "fragment"
    313     }
    314    },
    315    "outputs": [
    316     {
    317      "data": {
    318       "text/plain": [
    319        "<2175x2000 sparse matrix of type '<class 'numpy.int64'>'\n",
    320        "\twith 178572 stored elements in Compressed Sparse Row format>"
    321       ]
    322      },
    323      "execution_count": 9,
    324      "metadata": {},
    325      "output_type": "execute_result"
    326     }
    327    ],
    328    "source": [
    329     "vectorizer = CountVectorizer(max_df=.2, \n",
    330     "                             min_df=3, \n",
    331     "                             stop_words='english', \n",
    332     "                             max_features=2000)\n",
    333     "\n",
    334     "train_dtm = vectorizer.fit_transform(train_docs.article)\n",
    335     "words = vectorizer.get_feature_names()\n",
    336     "train_dtm"
    337    ]
    338   },
    339   {
    340    "cell_type": "code",
    341    "execution_count": 10,
    342    "metadata": {
    343     "ExecuteTime": {
    344      "end_time": "2018-05-01T04:38:59.052875Z",
    345      "start_time": "2018-05-01T04:38:59.035152Z"
    346     },
    347     "scrolled": true,
    348     "slideshow": {
    349      "slide_type": "fragment"
    350     }
    351    },
    352    "outputs": [
    353     {
    354      "data": {
    355       "text/plain": [
    356        "<50x2000 sparse matrix of type '<class 'numpy.int64'>'\n",
    357        "\twith 4160 stored elements in Compressed Sparse Row format>"
    358       ]
    359      },
    360      "execution_count": 10,
    361      "metadata": {},
    362      "output_type": "execute_result"
    363     }
    364    ],
    365    "source": [
    366     "test_dtm = vectorizer.transform(test_docs.article)\n",
    367     "test_dtm"
    368    ]
    369   },
    370   {
    371    "cell_type": "markdown",
    372    "metadata": {
    373     "slideshow": {
    374      "slide_type": "slide"
    375     }
    376    },
    377    "source": [
    378     "## LDA with gensim"
    379    ]
    380   },
    381   {
    382    "cell_type": "markdown",
    383    "metadata": {
    384     "slideshow": {
    385      "slide_type": "fragment"
    386     }
    387    },
    388    "source": [
    389     "### Using `CountVectorizer` Input"
    390    ]
    391   },
    392   {
    393    "cell_type": "code",
    394    "execution_count": 11,
    395    "metadata": {
    396     "ExecuteTime": {
    397      "end_time": "2018-05-01T04:50:23.337553Z",
    398      "start_time": "2018-05-01T04:50:23.017269Z"
    399     },
    400     "slideshow": {
    401      "slide_type": "fragment"
    402     }
    403    },
    404    "outputs": [],
    405    "source": [
    406     "max_df = .2\n",
    407     "min_df = 3\n",
    408     "max_features = 2000\n",
    409     "\n",
    410     "# used by sklearn: https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/feature_extraction/stop_words.py\n",
    411     "stop_words = pd.read_csv('http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words', \n",
    412     "                         header=None, \n",
    413     "                         squeeze=True).tolist()"
    414    ]
    415   },
    416   {
    417    "cell_type": "code",
    418    "execution_count": 12,
    419    "metadata": {
    420     "ExecuteTime": {
    421      "end_time": "2018-05-01T04:50:24.064327Z",
    422      "start_time": "2018-05-01T04:50:23.340576Z"
    423     },
    424     "slideshow": {
    425      "slide_type": "fragment"
    426     }
    427    },
    428    "outputs": [],
    429    "source": [
    430     "vectorizer = CountVectorizer(max_df=max_df, \n",
    431     "                             min_df=min_df, \n",
    432     "                             stop_words='english', \n",
    433     "                             max_features=max_features)\n",
    434     "\n",
    435     "train_dtm = vectorizer.fit_transform(train_docs.article)\n",
    436     "test_dtm = vectorizer.transform(test_docs.article)"
    437    ]
    438   },
    439   {
    440    "cell_type": "markdown",
    441    "metadata": {},
    442    "source": [
    443     "### Convert sklearn DTM to gensim data structures"
    444    ]
    445   },
    446   {
    447    "cell_type": "markdown",
    448    "metadata": {},
    449    "source": [
    450     "It faciltiates the conversion of DTM produced by sklearn to gensim data structures as follows:"
    451    ]
    452   },
    453   {
    454    "cell_type": "code",
    455    "execution_count": 15,
    456    "metadata": {
    457     "ExecuteTime": {
    458      "end_time": "2018-05-01T04:50:24.070987Z",
    459      "start_time": "2018-05-01T04:50:24.066420Z"
    460     },
    461     "slideshow": {
    462      "slide_type": "slide"
    463     }
    464    },
    465    "outputs": [],
    466    "source": [
    467     "train_corpus = Sparse2Corpus(train_dtm, documents_columns=False)\n",
    468     "test_corpus = Sparse2Corpus(test_dtm, documents_columns=False)\n",
    469     "id2word = pd.Series(vectorizer.get_feature_names()).to_dict()"
    470    ]
    471   },
    472   {
    473    "cell_type": "markdown",
    474    "metadata": {
    475     "slideshow": {
    476      "slide_type": "slide"
    477     }
    478    },
    479    "source": [
    480     "### Train Model & Review Results"
    481    ]
    482   },
    483   {
    484    "cell_type": "code",
    485    "execution_count": 20,
    486    "metadata": {},
    487    "outputs": [],
    488    "source": [
    489     "LdaModel(corpus=train_corpus, \n",
    490     "         num_topics=100, \n",
    491     "         id2word=None, \n",
    492     "         distributed=False, \n",
    493     "         chunksize=2000,                   # Number of documents to be used in each training chunk.\n",
    494     "         passes=1,                         # Number of passes through the corpus during training\n",
    495     "         update_every=1,                   # Number of docs to be iterated through for each update\n",
    496     "         alpha='symmetric', \n",
    497     "         eta=None,                         # a-priori belief on word probability\n",
    498     "         decay=0.5,                        # percentage of previous lambda forgotten when new document is examined\n",
    499     "         offset=1.0,                       # controls slow down of the first steps the first few iterations.\n",
    500     "         eval_every=10,                    # estimate log perplexity\n",
    501     "         iterations=50,                    # Maximum number of iterations through the corpus\n",
    502     "         gamma_threshold=0.001,            # Minimum change in the value of the gamma parameters to continue iterating\n",
    503     "         minimum_probability=0.01,         # Topics with a probability lower than this threshold will be filtered out\n",
    504     "         random_state=None, \n",
    505     "         ns_conf=None, \n",
    506     "         minimum_phi_value=0.01,           # if `per_word_topics` is True, represents lower bound on term probabilities\n",
    507     "         per_word_topics=False,            #  If True, compute a list of most likely topics for each word with phi values multiplied by word count\n",
    508     "         callbacks=None);"
    509    ]
    510   },
    511   {
    512    "cell_type": "code",
    513    "execution_count": 16,
    514    "metadata": {},
    515    "outputs": [],
    516    "source": [
    517     "num_topics = 5\n",
    518     "topic_labels = ['Topic {}'.format(i) for i in range(1, num_topics+1)]"
    519    ]
    520   },
    521   {
    522    "cell_type": "code",
    523    "execution_count": 17,
    524    "metadata": {
    525     "ExecuteTime": {
    526      "end_time": "2018-05-01T05:07:27.311042Z",
    527      "start_time": "2018-05-01T05:05:23.051642Z"
    528     },
    529     "slideshow": {
    530      "slide_type": "fragment"
    531     }
    532    },
    533    "outputs": [],
    534    "source": [
    535     "lda_gensim = LdaModel(corpus=train_corpus,\n",
    536     "                      num_topics=num_topics,\n",
    537     "                      id2word=id2word)"
    538    ]
    539   },
    540   {
    541    "cell_type": "code",
    542    "execution_count": 18,
    543    "metadata": {
    544     "ExecuteTime": {
    545      "end_time": "2018-05-01T05:04:28.529642Z",
    546      "start_time": "2018-05-01T05:02:33.896Z"
    547     },
    548     "slideshow": {
    549      "slide_type": "fragment"
    550     }
    551    },
    552    "outputs": [
    553     {
    554      "data": {
    555       "text/plain": [
    556        "(0,\n",
    557        " '0.008*\"search\" + 0.006*\"net\" + 0.006*\"mail\" + 0.005*\"yahoo\" + 0.005*\"labour\" + 0.005*\"web\" + 0.005*\"tax\" + 0.004*\"says\" + 0.004*\"information\" + 0.004*\"oil\"')"
    558       ]
    559      },
    560      "execution_count": 18,
    561      "metadata": {},
    562      "output_type": "execute_result"
    563     }
    564    ],
    565    "source": [
    566     "topics = lda_gensim.print_topics()\n",
    567     "topics[0]"
    568    ]
    569   },
    570   {
    571    "cell_type": "markdown",
    572    "metadata": {
    573     "slideshow": {
    574      "slide_type": "slide"
    575     }
    576    },
    577    "source": [
    578     "### Evaluate Topic Coherence\n",
    579     "\n",
    580     "Topic Coherence measures whether the words in a topic tend to co-occur together. \n",
    581     "\n",
    582     "- It adds up a score for each distinct pair of top ranked words. \n",
    583     "- The score is the log of the probability that a document containing at least one instance of the higher-ranked word also contains at least one instance of the lower-ranked word.\n",
    584     "\n",
    585     "Large negative values indicate words that don't co-occur often; values closer to zero indicate that words tend to co-occur more often."
    586    ]
    587   },
    588   {
    589    "cell_type": "code",
    590    "execution_count": 21,
    591    "metadata": {
    592     "ExecuteTime": {
    593      "end_time": "2018-05-01T05:04:28.530886Z",
    594      "start_time": "2018-05-01T05:02:34.504Z"
    595     },
    596     "slideshow": {
    597      "slide_type": "fragment"
    598     }
    599    },
    600    "outputs": [],
    601    "source": [
    602     "coherence = lda_gensim.top_topics(corpus=train_corpus, coherence='u_mass')"
    603    ]
    604   },
    605   {
    606    "cell_type": "markdown",
    607    "metadata": {},
    608    "source": [
    609     "Gensim permits topic coherence evaluation that produces the topic coherence and shows the most important words per topic: "
    610    ]
    611   },
    612   {
    613    "cell_type": "code",
    614    "execution_count": 22,
    615    "metadata": {
    616     "ExecuteTime": {
    617      "end_time": "2018-05-01T05:04:28.531995Z",
    618      "start_time": "2018-05-01T05:02:36.466Z"
    619     },
    620     "slideshow": {
    621      "slide_type": "slide"
    622     }
    623    },
    624    "outputs": [
    625     {
    626      "name": "stdout",
    627      "output_type": "stream",
    628      "text": [
    629       "  Topic 1         Topic 2           Topic 3           Topic 4         Topic 5            \n",
    630       "     prob    term    prob      term    prob      term    prob    term    prob        term\n",
    631       "0   0.70%   games   0.56%    united   0.97%    labour   0.78%  search   0.70%     digital\n",
    632       "1   0.55%    game   0.52%        eu   0.81%     blair   0.62%     net   0.69%        wage\n",
    633       "2   0.49%    2004   0.38%       aid   0.63%     party   0.59%    mail   0.60%     minimum\n",
    634       "3   0.47%  market   0.38%  airlines   0.62%      film   0.51%   yahoo   0.58%    software\n",
    635       "4   0.46%  prices   0.38%     state   0.53%  minister   0.51%  labour   0.55%  technology\n"
    636      ]
    637     },
    638     {
    639      "data": {
    640       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAz4AAAIWCAYAAACWQ1g3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3W9snfV99/GPYzcg4hJin5I0S1ImByrYlqZpkrWMFhrcJ/tDcw8G7saeoBUyqla0MCWhC3S4KWYQZRNlE7Qp6iiCDZZQNLFKsVBaAa0gTKFbEaCkGhU0JDjGyVz+NIl9P5hu32N2GsfHuU78y+v1yNc5l/37YX8T8c51neOm4eHh4QAAABRsWqM3AAAAcLwJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeC2T8UV27NiRe++9N0NDQ7n44ouzcuXKdz1/8ODBfP3rX89Pf/rTvPe97811112XM888czKWBgAAOKq6r/gMDQ1l06ZNufHGG7Nx48Y8+eSTeeWVV951zuOPP54ZM2bkzjvvzO/93u/l/vvvr3dZAACAcas7fHbu3Jk5c+Zk9uzZaWlpyfnnn59nnnnmXeds3749F110UZLkox/9aP7jP/4jfm8qAABQlbrDp7+/P+3t7SPH7e3t6e/vP+I5zc3NOe200/Jf//Vf9S4NAAAwLnW/xmesKzdNTU3HfM7/09vbm97e3iRJT09PvdsDAACoP3za29uzb9++keN9+/Zl1qxZY57T3t6ew4cP580330xra+uYX6+zszOdnZ0jxz//+c/r3eJxcfizlzR6C1NS8zcebfQWpiTzNjHmbWLM28SYt4kxbxNj3ibOzE3MiTxzc+fOHdd5dd/q1tHRkd27d2fv3r05dOhQnnrqqSxduvRd53zkIx/Jtm3bkiQ/+tGP8hu/8RtHvOIDAAAw2eq+4tPc3Jyrrroq69evz9DQUD75yU9m/vz5+cd//Md0dHRk6dKlWbFiRb7+9a/n85//fFpbW3PddddNxt4BAADGZVJ+j8+SJUuyZMmSdz12xRVXjHw8ffr0fOlLX5qMpQAAAI5Z3be6AQAAnOiEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAULyWej55cHAwGzduzOuvv573ve99+eIXv5jW1tZR511xxRVZsGBBkqRWq2X16tX1LAsAAHBM6gqfRx55JL/1W7+VlStX5pFHHskjjzySK6+8ctR506dPz+23317PUgAAABNW161uzzzzTC688MIkyYUXXphnnnlmUjYFAAAwmeq64rN///7MmjUrSTJr1qwcOHBgzPMOHjyYNWvWpLm5OZ/+9KezfPnyepYFAAA4JkcNn+7u7gwMDIx6vKura9yL/N3f/V3a2tqyZ8+e3HLLLVmwYEHmzJkz5rm9vb3p7e1NkvT09KRWq417nSrtafQGpqgT9ed5ojNvE2PeJsa8TYx5mxjzNjHmbeLM3MSUMHNHDZ9169Yd8bmZM2fmjTfeyKxZs/LGG2/k9NNPH/O8tra2JMns2bNz3nnn5T//8z+PGD6dnZ3p7OwcOe7r6zvaFplC/DypknmjSuaNKpk3qnYiz9zcuXPHdV5dr/FZunRpvv/97ydJvv/972fZsmWjzhkcHMzBgweTJAcOHMiLL76YefPm1bMsAADAManrNT4rV67Mxo0b8/jjj6dWq+VLX/pSkmTXrl3ZunVrVq1alVdffTX33HNPpk2blqGhoaxcuVL4AAAAlaorfN773vfmpptuGvV4R0dHOjo6kiQf/OAHs2HDhnqWAQAAqEtdt7oBAABMBcIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAoXks9n/zDH/4wDz30UF599dV87WtfS0dHx5jn7dixI/fee2+GhoZy8cUXZ+XKlfUsCwAAcEzquuIzf/783HDDDTn33HOPeM7Q0FA2bdqUG2+8MRs3bsyTTz6ZV155pZ5lAQAAjkldV3zmzZt31HN27tyZOXPmZPbs2UmS888/P88888y4PhcAAGAyHPfX+PT396e9vX3kuL29Pf39/cd7WQAAgBFHveLT3d2dgYGBUY93dXVl2bJlR11geHh41GNNTU1HPL+3tze9vb1Jkp6entRqtaOu0Qh7Gr2BKepE/Xme6MzbxJi3iTFvE2PeJsa8TYx5mzgzNzElzNxRw2fdunV1LdDe3p59+/aNHO/bty+zZs064vmdnZ3p7OwcOe7r66trfU4sfp5UybxRJfNGlcwbVTuRZ27u3LnjOu+43+rW0dGR3bt3Z+/evTl06FCeeuqpLF269HgvCwAAMKKu8Hn66aezatWqvPTSS+np6cn69euT/Pfrem699dYkSXNzc6666qqsX78+X/ziF/Oxj30s8+fPr3/nAAAA41TXu7otX748y5cvH/V4W1tb1q5dO3K8ZMmSLFmypJ6lAAAAJuy43+oGAADQaMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAoXks9n/zDH/4wDz30UF599dV87WtfS0dHx5jnfe5zn8upp56aadOmpbm5OT09PfUsCwAAcEzqCp/58+fnhhtuyD333HPUc2+++eacfvrp9SwHAAAwIXWFz7x58yZrHwAAAMdNXeFzLNavX58k+dSnPpXOzs6qlgUAADh6+HR3d2dgYGDU411dXVm2bNm4Funu7k5bW1v279+fr371q5k7d27OO++8Mc/t7e1Nb29vkqSnpye1Wm1ca1RtT6M3MEWdqD/PE515mxjzNjHmbWLM28SYt4kxbxNn5iamhJk7avisW7eu7kXa2tqSJDNnzsyyZcuyc+fOI4ZPZ2fnu64I9fX11b0+Jw4/T6pk3qiSeaNK5o2qncgzN3fu3HGdd9zfzvrtt9/OW2+9NfLxj3/84yxYsOB4LwsAADCirtf4PP300/nWt76VAwcOpKenJ2eddVa+/OUvp7+/P3fffXfWrl2b/fv354477kiSHD58OBdccEEWL148KZsHAAAYj7rCZ/ny5Vm+fPmox9va2rJ27dokyezZs3P77bfXswwAAEBdjvutbgAAAI0mfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAoXkujNzBVNX/j0UZv4YhqtVr6+voavQ0AADhhuOIDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABSvrnd1u++++/Lss8+mpaUls2fPzrXXXpsZM2aMOm/Hjh259957MzQ0lIsvvjgrV66sZ1kAAIBjUtcVn0WLFmXDhg2544478v73vz9btmwZdc7Q0FA2bdqUG2+8MRs3bsyTTz6ZV155pZ5lAQAAjkld4fOhD30ozc3NSZJzzjkn/f39o87ZuXNn5syZk9mzZ6elpSXnn39+nnnmmXqWBQAAOCaT9hqfxx9/PIsXLx71eH9/f9rb20eO29vbxwwkAACA4+Wor/Hp7u7OwMDAqMe7urqybNmyJMnmzZvT3Nycj3/846POGx4eHvVYU1PTEdfr7e1Nb29vkqSnpye1Wu1oW+R/aWlp8X0rzJ5Gb2CK8udgYszbxJi3iTFvE2PeJs7MTUwJM3fU8Fm3bt2vfH7btm159tlnc9NNN40ZNO3t7dm3b9/I8b59+zJr1qwjfr3Ozs50dnaOHPf19R1ti/wvtVrN9w3i7w+qZd6oknmjaifyzM2dO3dc59V1q9uOHTvy3e9+N6tXr84pp5wy5jkdHR3ZvXt39u7dm0OHDuWpp57K0qVL61kWAADgmNT1dtabNm3KoUOH0t3dnSQ5++yzc/XVV6e/vz9333131q5dm+bm5lx11VVZv359hoaG8slPfjLz58+flM0DAACMR13hc+edd475eFtbW9auXTtyvGTJkixZsqSepQAAACZs0t7VDQAA4EQlfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOK11PPJ9913X5599tm0tLRk9uzZufbaazNjxoxR533uc5/LqaeemmnTpqW5uTk9PT31LAsAAHBM6gqfRYsW5Y//+I/T3Nyc73znO9myZUuuvPLKMc+9+eabc/rpp9ezHAAAwITUdavbhz70oTQ3NydJzjnnnPT390/KpgAAACZTXVd8/qfHH388559//hGfX79+fZLkU5/6VDo7OydrWQAAgKM6avh0d3dnYGBg1ONdXV1ZtmxZkmTz5s1pbm7Oxz/+8SN+jba2tuzfvz9f/epXM3fu3Jx33nljntvb25ve3t4kSU9PT2q12rj/Y/hvLS0tvm+F2dPoDUxR/hxMjHmbGPM2MeZtYszbxJm5iSlh5o4aPuvWrfuVz2/bti3PPvtsbrrppjQ1NY15TltbW5Jk5syZWbZsWXbu3HnE8Ons7HzXFaG+vr6jbZH/pVar+b5B/P1BtcwbVTJvVO1Enrm5c+eO67y6XuOzY8eOfPe7383q1atzyimnjHnO22+/nbfeemvk4x//+MdZsGBBPcsCAAAck7pe47Np06YcOnQo3d3dSZKzzz47V199dfr7+3P33Xdn7dq12b9/f+64444kyeHDh3PBBRdk8eLF9e8cAABgnOoKnzvvvHPMx9va2rJ27dokyezZs3P77bfXswyc9Jq/8Wijt3BEbq0EAKaCum51AwAAmAqEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPFaGr0BAE4szd94tNFbOKJarZa+vr5GbwOAKcgVHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOIJHwAAoHjCBwAAKJ7wAQAAiid8AACA4gkfAACgeMIHAAAonvABAACKJ3wAAIDiCR8AAKB4wgcAACie8AEAAIonfAAAgOK11PsFHnzwwWzfvj1NTU2ZOXNmrr322rS1tY06b9u2bdm8eXOS5A//8A9z0UUX1bs0AADAuNQdPpdcckm6urqSJI899lgefvjhXH311e86Z3BwMA8//HB6enqSJGvWrMnSpUvT2tpa7/IAAABHVfetbqeddtrIx++8806amppGnbNjx44sWrQora2taW1tzaJFi7Jjx456lwYAABiXuq/4JMkDDzyQH/zgBznttNNy8803j3q+v78/7e3tI8dtbW3p7++fjKUBAACOalzh093dnYGBgVGPd3V1ZdmyZfnMZz6Tz3zmM9myZUu+973v5fLLLz/q1xzrylCS9Pb2pre3N0nS09OTWq02ni3yP7S0tPi+URnzRpXMW3n2NHoDU5Q/BxNn5iamhJkbV/isW7duXF/sggsuSE9Pz6jwaWtry/PPPz9y3N/fn/POO2/Mr9HZ2ZnOzs6R476+vnGtzf9Xq9V836iMeaNK5g3+mz8HVO1Enrm5c+eO67y6X+Oze/fukY+3b98+5sKLFy/Oc889l8HBwQwODua5557L4sWL610aAABgXOp+jc/999+f3bt3p6mpKbVabeQd3Xbt2pWtW7dm1apVaW1tzaWXXpq1a9cmSS677DLv6AYAAFSm7vC54YYbxny8o6MjHR0dI8crVqzIihUr6l0OAADgmNV9qxsAAMCJTvgAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABSvpdEbAABOXs3feLTRWziiWq2Wvr6+Rm8DmCSu+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUr6WeT37wwQezffv2NDU1ZebMmbn22mvT1tY26rwrrrgiCxYsSJLUarWsXr26nmUBAACOSV3hc8kll6SrqytJ8thjj+Xhhx/O1VdfPeq86dOn5/bbb69nKQAAgAmr61a30047beTjd955J01NTXVvCAAAYLLVdcUnSR544IH84Ac/yGmnnZabb755zHMOHjyYNWvWpLm5OZ/+9KezfPnyepcFAAAYt6bh4eHhX3VCd3d3BgYGRj3e1dWVZcuWjRxv2bIlBw8ezOWXXz7q3P7+/rS1tWXPnj255ZZbsm7dusyZM2fM9Xp7e9Pb25sk6enpyS9/+ctj+g8iaWlpyaFDhxq9DU4S5o0qmTeqZN7KtOf/nN/oLUxJs7c81egtHNH06dPHdd5Rw2e8Xn/99fT09GTDhg2/8ry77rorH/nIR/LRj350XF/35z//+WRs76RSq9XS19fX6G1wkjBvVMm8USXzVqbDn72k0VuYkpq/8Wijt3BEc+fOHdd5db3GZ/fu3SMfb9++fcxFBwcHc/DgwSTJgQMH8uKLL2bevHn1LAsAAHBM6nqNz/3335/du3enqakptVpt5B3ddu3ala1bt2bVqlV59dVXc88992TatGkZGhrKypUrhQ8AAFCpSbvV7Xhxq9uxc2meKpk3qmTeqJJ5K5Nb3SbmpL/VDQAAYCoQPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUr6XRGwAAgKo0f+PRRm/hiGq1Wvr6+hq9jWK54gMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFE/4AAAAxRM+AABA8YQPAABQPOEDAAAUT/gAAADFEz4AAEDxhA8AAFA84QMAABRP+AAAAMUTPgAAQPGEDwAAUDzhAwAAFG/SwufRRx/N5ZdfngMHDoz5/LZt2/KFL3whX/jCF7Jt27bJWhYAAOCoWibji/T19eXf//3fU6vVxnx+cHAwDz/8cHp6epIka9asydKlS9Pa2joZywMAAPxKk3LF59vf/nb+5E/+JE1NTWM+v2PHjixatCitra1pbW3NokWLsmPHjslYGgAA4KjqDp/t27enra0tZ5111hHP6e/vT3t7+8hxW1tb+vv7610aAABgXMZ1q1t3d3cGBgZGPd7V1ZUtW7bkL//yL4954SNdHert7U1vb2+SpKen54i3z3FkLS0tvm9UxrxRJfNGlcwbVTNzx1fT8PDw8EQ/+Wc/+1luueWWnHLKKUmSffv2ZdasWbn11ltzxhlnjJz3xBNP5Pnnn8/VV1+dJLnnnnty3nnn5YILLqhz+wAAAEdX161uCxYsyDe/+c3cddddueuuu9Le3p7bbrvtXdGTJIsXL85zzz2XwcHBDA4O5rnnnsvixYvr2jhHtmbNmkZvgZOIeaNK5o0qmTeqZuaOr0l5V7ex7Nq1K1u3bs2qVavS2tqaSy+9NGvXrk2SXHbZZd7RDQAAqMykhs9dd9018nFHR0c6OjpGjlesWJEVK1ZM5nIAAADjMmm/wJQTR2dnZ6O3wEnEvFEl80aVzBtVM3PHV11vbgAAADAVuOIDAAAUT/gAAADFEz6F2r17d6O3QKGGhoZGPTY4ONiAnXAy+H93Yx8+fDgvv/xy3nzzzQbviJPF1q1bG70FTiLvvPNOXn755bz11luN3krRjtvbWdNYt9xyS/7+7/++0dugIM8//3zuvPPOvPPOOzn77LPz2c9+duS3S3d3d+e2225r8A4pyfbt23P33Xenqakp11xzTTZv3pyWlpa89tprueaaa7JkyZJGb5GCPPbYY6Me++d//uccPHgwSfK7v/u7VW+Jwn3rW9/KVVddlSR56aWXsnHjxrzvfe/L3r17s2rVKr/v8jgRPlPYt7/97TEfHx4e9q+iTLr77rsva9asyYIFC/LUU0+lu7s7n//857Nw4cJ4jxQm2z/90z/lr//6r/POO+9k9erVWb9+febNm5e9e/dm48aNwodJ9cADD+TDH/5wfu3Xfm3k77OhoaEcOHCgwTujVC+++OLIxw888ECuv/76LFy4MK+99lr+9m//VvgcJ8JnCuvt7c2VV16Z97znPaOea2nxo2VyHTp0KB/4wAeSJL/zO7+T+fPnZ8OGDfnTP/3TNDU1NXh3lGjWrFlJklqtlnnz5iVJzjzzzDFvt4R6bNiwIf/wD/+QoaGhXHrppZk+fXqeeOKJdHV1NXprnATefPPNLFy4MEkyZ84cf8cdR/7veApbuHBhfv3Xfz3nnHPOqOceeuihBuyIkk2bNi0DAwM544wzkiQLFizIunXr0tPTk9dff73Bu6M0w8PDGRoayrRp03LNNdeMPD40NJRDhw41cGeU6Mwzz8wNN9yQH/3oR+nu7s4f/MEfNHpLFO7VV1/N6tWrMzw8nD179uQXv/hFZsyY4e+448zv8ZnCDhw4kOnTp+fUU09t9FY4CezYsSNnnHFGzjrrrHc9Pjg4mH/913/NH/3RHzVmYxTppZdeyllnnZXp06e/6/G9e/fm+eefz0UXXdSYjVG8t99+Ow8++GB27dqV7u7uRm+HQr322mvvOq7VamlpacmBAwfyk5/8JB/72McatLOyCR8AAKB43s4aAAAonvABAACKJ3wAAIDiCZ8CrF+/Pr/4xS9GjgcHB3Prrbc2cEeUzLxRJfNGlcwbVTNz1RI+Bdi/f39mzJgxctza2po33nijgTuiZOaNKpk3qmTeqJqZq5bwKUBTU1P27ds3ctzX19fA3VA680aVzBtVMm9UzcxVy9tZF+Df/u3f8s1vfjO/+Zu/mST5yU9+kj/7sz/Lhz/84QbvjBKZN6pk3qiSeaNqZq5awqcQAwMDeemll5IkH/zgBzNz5swG74iSmTeqZN6oknmjamauOsJnCtu9e3fe//735+WXXx7z+Q984AMV74iSmTeqZN6oknmjamauMVoavQEm7pFHHsmf//mfZ9OmTaOea2pqyl/91V81YFeUyrxRJfNGlcwbVTNzjeGKDwAAUDxXfApw8ODBbN26NS+88EKamppy7rnn5uKLL8573vOeRm+NApk3qmTeqJJ5o2pmrlqu+BTgb/7mb9LS0pJPfOITSZInnngiv/zlL3Pdddc1eGeUyLxRJfNGlcwbVTNz1XLFpwCvvPJK7rjjjpHjRYsW5S+edYMcAAACZklEQVT+4i8auCNKZt6oknmjSuaNqpm5avkFpgU466yzsnPnzpHjn/70pzn77LMbuCNKZt6oknmjSuaNqpm5arnVrQDXX399XnnllZx55plJkr1792b+/PmZNm1ampqacttttzV4h5TEvFEl80aVzBtVM3PVEj4FeO21137l83PmzKloJ5wMzBtVMm9UybxRNTNXLeFTiJ/97Gd54YUXkiTnnntu5s+f3+AdUTLzRpXMG1Uyb1TNzFWn+Stf+cpXGr0J6vO9730v3/nOdzJr1qy8+eab2bx5c5Jk4cKFDd4ZJTJvVMm8USXzRtXMXMWGmfKuv/764bfeemvk+K233hq+/vrrG7gjSmbeqJJ5o0rmjaqZuWp5V7cCDA8Pp7m5eeS4ubk5w+5g5Dgxb1TJvFEl80bVzFy1/B6fKezw4cNpbm7OJz7xiXz5y1/Ob//2bydJnn766Vx44YUN3h2lMW9UybxRJfNG1cxcY3hzgyls9erVI29zuHPnzrzwwgsZHh7Oueee695QJp15o0rmjSqZN6pm5hrDFZ8p7H8268KFC/1B4bgyb1TJvFEl80bVzFxjCJ8p7MCBA/mXf/mXIz7/+7//+xXuhtKZN6pk3qiSeaNqZq4xhM8UNjQ0lLffftuL4KiEeaNK5o0qmTeqZuYaQ/hMYbNmzcpll13W6G1wkjBvVMm8USXzRtXMXGN4O+spzL8SUCXzRpXMG1Uyb1TNzDWGd3WbwgYHB9Pa2trobXCSMG9UybxRJfNG1cxcYwgfAACgeG51AwAAiid8AACA4gkfAACgeMIHAAAonvABAACK938BA24pB5lkfPoAAAAASUVORK5CYII=\n",
    641       "text/plain": [
    642        "<Figure size 1008x626.4 with 1 Axes>"
    643       ]
    644      },
    645      "metadata": {},
    646      "output_type": "display_data"
    647     }
    648    ],
    649    "source": [
    650     "topic_coherence = []\n",
    651     "topic_words = pd.DataFrame()\n",
    652     "for t in range(len(coherence)):\n",
    653     "    label = topic_labels[t]\n",
    654     "    topic_coherence.append(coherence[t][1])\n",
    655     "    df = pd.DataFrame(coherence[t][0], columns=[(label, 'prob'), (label, 'term')])\n",
    656     "    df[(label, 'prob')] = df[(label, 'prob')].apply(lambda x: '{:.2%}'.format(x))\n",
    657     "    topic_words = pd.concat([topic_words, df], axis=1)\n",
    658     "                      \n",
    659     "topic_words.columns = pd.MultiIndex.from_tuples(topic_words.columns)\n",
    660     "pd.set_option('expand_frame_repr', False)\n",
    661     "topic_words.head().to_csv('topic_words.csv', index=False)\n",
    662     "print(topic_words.head())\n",
    663     "\n",
    664     "pd.Series(topic_coherence, index=topic_labels).plot.bar();"
    665    ]
    666   },
    667   {
    668    "cell_type": "markdown",
    669    "metadata": {
    670     "slideshow": {
    671      "slide_type": "slide"
    672     }
    673    },
    674    "source": [
    675     "### Using `gensim` `Dictionary` "
    676    ]
    677   },
    678   {
    679    "cell_type": "code",
    680    "execution_count": 23,
    681    "metadata": {
    682     "ExecuteTime": {
    683      "end_time": "2018-05-01T05:04:28.532936Z",
    684      "start_time": "2018-05-01T05:02:39.320Z"
    685     },
    686     "slideshow": {
    687      "slide_type": "fragment"
    688     }
    689    },
    690    "outputs": [],
    691    "source": [
    692     "docs = [d.split() for d in train_docs.article.tolist()]\n",
    693     "docs = [[t for t in doc if t not in stop_words] for doc in docs]"
    694    ]
    695   },
    696   {
    697    "cell_type": "code",
    698    "execution_count": 24,
    699    "metadata": {
    700     "ExecuteTime": {
    701      "end_time": "2018-05-01T05:04:28.533820Z",
    702      "start_time": "2018-05-01T05:02:39.496Z"
    703     },
    704     "slideshow": {
    705      "slide_type": "fragment"
    706     }
    707    },
    708    "outputs": [],
    709    "source": [
    710     "dictionary = Dictionary(docs)\n",
    711     "dictionary.filter_extremes(no_below=min_df, no_above=max_df, keep_n=max_features)"
    712    ]
    713   },
    714   {
    715    "cell_type": "code",
    716    "execution_count": 25,
    717    "metadata": {
    718     "ExecuteTime": {
    719      "end_time": "2018-05-01T05:04:28.534812Z",
    720      "start_time": "2018-05-01T05:02:39.648Z"
    721     },
    722     "slideshow": {
    723      "slide_type": "fragment"
    724     }
    725    },
    726    "outputs": [],
    727    "source": [
    728     "corpus = [dictionary.doc2bow(doc) for doc in docs]"
    729    ]
    730   },
    731   {
    732    "cell_type": "code",
    733    "execution_count": 26,
    734    "metadata": {
    735     "ExecuteTime": {
    736      "end_time": "2018-05-01T05:04:28.535717Z",
    737      "start_time": "2018-05-01T05:02:39.825Z"
    738     },
    739     "slideshow": {
    740      "slide_type": "fragment"
    741     }
    742    },
    743    "outputs": [
    744     {
    745      "name": "stdout",
    746      "output_type": "stream",
    747      "text": [
    748       "Number of unique tokens: 2000\n",
    749       "Number of documents: 2175\n"
    750      ]
    751     }
    752    ],
    753    "source": [
    754     "print('Number of unique tokens: %d' % len(dictionary))\n",
    755     "print('Number of documents: %d' % len(corpus))"
    756    ]
    757   },
    758   {
    759    "cell_type": "code",
    760    "execution_count": 27,
    761    "metadata": {
    762     "ExecuteTime": {
    763      "end_time": "2018-05-01T05:04:28.536760Z",
    764      "start_time": "2018-05-01T05:02:42.816Z"
    765     },
    766     "slideshow": {
    767      "slide_type": "slide"
    768     }
    769    },
    770    "outputs": [],
    771    "source": [
    772     "num_topics = 5\n",
    773     "chunksize = 500\n",
    774     "passes = 20\n",
    775     "iterations = 400\n",
    776     "eval_every = None # Don't evaluate model perplexity, takes too much time.\n",
    777     "\n",
    778     "temp = dictionary[0]  # This is only to \"load\" the dictionary.\n",
    779     "id2word = dictionary.id2token"
    780    ]
    781   },
    782   {
    783    "cell_type": "code",
    784    "execution_count": 30,
    785    "metadata": {
    786     "ExecuteTime": {
    787      "end_time": "2018-05-01T05:04:28.537677Z",
    788      "start_time": "2018-05-01T05:02:45.832Z"
    789     },
    790     "slideshow": {
    791      "slide_type": "fragment"
    792     }
    793    },
    794    "outputs": [],
    795    "source": [
    796     "model = LdaModel(corpus=corpus,\n",
    797     "                 id2word=id2word,\n",
    798     "                 chunksize=chunksize,\n",
    799     "                 alpha='auto',\n",
    800     "                 eta='auto',\n",
    801     "                 iterations=iterations,\n",
    802     "                 num_topics=num_topics,\n",
    803     "                 passes=passes, \n",
    804     "                 eval_every=eval_every)"
    805    ]
    806   },
    807   {
    808    "cell_type": "code",
    809    "execution_count": 31,
    810    "metadata": {
    811     "ExecuteTime": {
    812      "end_time": "2018-05-01T05:04:28.538730Z",
    813      "start_time": "2018-05-01T05:02:46.967Z"
    814     },
    815     "slideshow": {
    816      "slide_type": "slide"
    817     }
    818    },
    819    "outputs": [
    820     {
    821      "data": {
    822       "text/plain": [
    823        "[(0,\n",
    824        "  '0.007*\"company\" + 0.007*\"growth\" + 0.006*\"market\" + 0.006*\"economic\" + 0.006*\"oil\" + 0.006*\"sales\" + 0.005*\"firm\" + 0.005*\"rise\" + 0.005*\"economy\" + 0.005*\"prices\"'),\n",
    825        " (1,\n",
    826        "  '0.010*\"technology\" + 0.009*\"mobile\" + 0.008*\"use\" + 0.008*\"digital\" + 0.007*\"music\" + 0.007*\"games\" + 0.006*\"users\" + 0.006*\"used\" + 0.006*\"software\" + 0.006*\"net\"'),\n",
    827        " (2,\n",
    828        "  '0.012*\"Labour\" + 0.011*\"government\" + 0.009*\"Blair\" + 0.007*\"election\" + 0.006*\"public\" + 0.006*\"party\" + 0.006*\"Brown\" + 0.005*\"say\" + 0.005*\"Howard\" + 0.005*\"minister\"'),\n",
    829        " (3,\n",
    830        "  '0.009*\"game\" + 0.008*\"win\" + 0.008*\"England\" + 0.007*\"good\" + 0.006*\"think\" + 0.006*\"play\" + 0.005*\"players\" + 0.005*\"got\" + 0.005*\"And\" + 0.005*\"it\\'s\"'),\n",
    831        " (4,\n",
    832        "  '0.024*\"best\" + 0.021*\"film\" + 0.012*\"won\" + 0.009*\"music\" + 0.008*\"British\" + 0.008*\"TV\" + 0.007*\"including\" + 0.007*\"director\" + 0.007*\"UK\" + 0.007*\"star\"')]"
    833       ]
    834      },
    835      "execution_count": 31,
    836      "metadata": {},
    837      "output_type": "execute_result"
    838     }
    839    ],
    840    "source": [
    841     "model.show_topics()"
    842    ]
    843   },
    844   {
    845    "cell_type": "markdown",
    846    "metadata": {},
    847    "source": [
    848     "### Evaluating Topic Assignments on the Test Set"
    849    ]
    850   },
    851   {
    852    "cell_type": "code",
    853    "execution_count": 32,
    854    "metadata": {
    855     "ExecuteTime": {
    856      "end_time": "2018-05-01T05:04:28.539924Z",
    857      "start_time": "2018-05-01T05:02:50.153Z"
    858     },
    859     "slideshow": {
    860      "slide_type": "slide"
    861     }
    862    },
    863    "outputs": [],
    864    "source": [
    865     "docs_test = [d.split() for d in test_docs.article.tolist()]\n",
    866     "docs_test = [[t for t in doc if t not in stop_words] for doc in docs_test]\n",
    867     "\n",
    868     "test_dictionary = Dictionary(docs_test)\n",
    869     "test_dictionary.filter_extremes(no_below=min_df, no_above=max_df, keep_n=max_features)\n",
    870     "test_corpus = [dictionary.doc2bow(doc) for doc in docs_test]"
    871    ]
    872   },
    873   {
    874    "cell_type": "code",
    875    "execution_count": 33,
    876    "metadata": {
    877     "ExecuteTime": {
    878      "end_time": "2018-05-01T05:04:28.541193Z",
    879      "start_time": "2018-05-01T05:02:50.336Z"
    880     },
    881     "slideshow": {
    882      "slide_type": "slide"
    883     }
    884    },
    885    "outputs": [
    886     {
    887      "data": {
    888       "text/html": [
    889        "<div>\n",
    890        "<style scoped>\n",
    891        "    .dataframe tbody tr th:only-of-type {\n",
    892        "        vertical-align: middle;\n",
    893        "    }\n",
    894        "\n",
    895        "    .dataframe tbody tr th {\n",
    896        "        vertical-align: top;\n",
    897        "    }\n",
    898        "\n",
    899        "    .dataframe thead th {\n",
    900        "        text-align: right;\n",
    901        "    }\n",
    902        "</style>\n",
    903        "<table border=\"1\" class=\"dataframe\">\n",
    904        "  <thead>\n",
    905        "    <tr style=\"text-align: right;\">\n",
    906        "      <th></th>\n",
    907        "      <th>0</th>\n",
    908        "      <th>1</th>\n",
    909        "      <th>2</th>\n",
    910        "      <th>3</th>\n",
    911        "      <th>4</th>\n",
    912        "    </tr>\n",
    913        "  </thead>\n",
    914        "  <tbody>\n",
    915        "    <tr>\n",
    916        "      <th>0</th>\n",
    917        "      <td>0.11</td>\n",
    918        "      <td>0.07</td>\n",
    919        "      <td>0.09</td>\n",
    920        "      <td>2.81</td>\n",
    921        "      <td>67.32</td>\n",
    922        "    </tr>\n",
    923        "    <tr>\n",
    924        "      <th>1</th>\n",
    925        "      <td>6.82</td>\n",
    926        "      <td>60.50</td>\n",
    927        "      <td>27.93</td>\n",
    928        "      <td>0.10</td>\n",
    929        "      <td>0.05</td>\n",
    930        "    </tr>\n",
    931        "    <tr>\n",
    932        "      <th>2</th>\n",
    933        "      <td>0.11</td>\n",
    934        "      <td>32.94</td>\n",
    935        "      <td>0.09</td>\n",
    936        "      <td>51.46</td>\n",
    937        "      <td>6.79</td>\n",
    938        "    </tr>\n",
    939        "    <tr>\n",
    940        "      <th>3</th>\n",
    941        "      <td>61.13</td>\n",
    942        "      <td>0.07</td>\n",
    943        "      <td>32.06</td>\n",
    944        "      <td>0.10</td>\n",
    945        "      <td>0.05</td>\n",
    946        "    </tr>\n",
    947        "    <tr>\n",
    948        "      <th>4</th>\n",
    949        "      <td>0.11</td>\n",
    950        "      <td>0.07</td>\n",
    951        "      <td>0.09</td>\n",
    952        "      <td>115.79</td>\n",
    953        "      <td>4.33</td>\n",
    954        "    </tr>\n",
    955        "    <tr>\n",
    956        "      <th>5</th>\n",
    957        "      <td>63.55</td>\n",
    958        "      <td>0.07</td>\n",
    959        "      <td>32.64</td>\n",
    960        "      <td>0.10</td>\n",
    961        "      <td>0.05</td>\n",
    962        "    </tr>\n",
    963        "    <tr>\n",
    964        "      <th>6</th>\n",
    965        "      <td>42.69</td>\n",
    966        "      <td>0.07</td>\n",
    967        "      <td>0.09</td>\n",
    968        "      <td>2.51</td>\n",
    969        "      <td>0.05</td>\n",
    970        "    </tr>\n",
    971        "    <tr>\n",
    972        "      <th>7</th>\n",
    973        "      <td>0.11</td>\n",
    974        "      <td>0.07</td>\n",
    975        "      <td>26.56</td>\n",
    976        "      <td>22.62</td>\n",
    977        "      <td>0.05</td>\n",
    978        "    </tr>\n",
    979        "    <tr>\n",
    980        "      <th>8</th>\n",
    981        "      <td>103.20</td>\n",
    982        "      <td>0.07</td>\n",
    983        "      <td>26.73</td>\n",
    984        "      <td>0.10</td>\n",
    985        "      <td>6.29</td>\n",
    986        "    </tr>\n",
    987        "    <tr>\n",
    988        "      <th>9</th>\n",
    989        "      <td>54.08</td>\n",
    990        "      <td>0.07</td>\n",
    991        "      <td>0.09</td>\n",
    992        "      <td>0.10</td>\n",
    993        "      <td>7.07</td>\n",
    994        "    </tr>\n",
    995        "  </tbody>\n",
    996        "</table>\n",
    997        "</div>"
    998       ],
    999       "text/plain": [
   1000        "       0     1     2      3     4\n",
   1001        "0   0.11  0.07  0.09   2.81 67.32\n",
   1002        "1   6.82 60.50 27.93   0.10  0.05\n",
   1003        "2   0.11 32.94  0.09  51.46  6.79\n",
   1004        "3  61.13  0.07 32.06   0.10  0.05\n",
   1005        "4   0.11  0.07  0.09 115.79  4.33\n",
   1006        "5  63.55  0.07 32.64   0.10  0.05\n",
   1007        "6  42.69  0.07  0.09   2.51  0.05\n",
   1008        "7   0.11  0.07 26.56  22.62  0.05\n",
   1009        "8 103.20  0.07 26.73   0.10  6.29\n",
   1010        "9  54.08  0.07  0.09   0.10  7.07"
   1011       ]
   1012      },
   1013      "execution_count": 33,
   1014      "metadata": {},
   1015      "output_type": "execute_result"
   1016     }
   1017    ],
   1018    "source": [
   1019     "gamma, _ = model.inference(test_corpus)\n",
   1020     "topic_scores = pd.DataFrame(gamma)\n",
   1021     "topic_scores.head(10)"
   1022    ]
   1023   },
   1024   {
   1025    "cell_type": "code",
   1026    "execution_count": 34,
   1027    "metadata": {
   1028     "ExecuteTime": {
   1029      "end_time": "2018-05-01T05:04:28.542544Z",
   1030      "start_time": "2018-05-01T05:02:50.479Z"
   1031     },
   1032     "slideshow": {
   1033      "slide_type": "slide"
   1034     }
   1035    },
   1036    "outputs": [
   1037     {
   1038      "data": {
   1039       "text/html": [
   1040        "<div>\n",
   1041        "<style scoped>\n",
   1042        "    .dataframe tbody tr th:only-of-type {\n",
   1043        "        vertical-align: middle;\n",
   1044        "    }\n",
   1045        "\n",
   1046        "    .dataframe tbody tr th {\n",
   1047        "        vertical-align: top;\n",
   1048        "    }\n",
   1049        "\n",
   1050        "    .dataframe thead th {\n",
   1051        "        text-align: right;\n",
   1052        "    }\n",
   1053        "</style>\n",
   1054        "<table border=\"1\" class=\"dataframe\">\n",
   1055        "  <thead>\n",
   1056        "    <tr style=\"text-align: right;\">\n",
   1057        "      <th></th>\n",
   1058        "      <th>0</th>\n",
   1059        "      <th>1</th>\n",
   1060        "      <th>2</th>\n",
   1061        "      <th>3</th>\n",
   1062        "      <th>4</th>\n",
   1063        "    </tr>\n",
   1064        "  </thead>\n",
   1065        "  <tbody>\n",
   1066        "    <tr>\n",
   1067        "      <th>0</th>\n",
   1068        "      <td>0.00</td>\n",
   1069        "      <td>0.00</td>\n",
   1070        "      <td>0.00</td>\n",
   1071        "      <td>0.04</td>\n",
   1072        "      <td>0.96</td>\n",
   1073        "    </tr>\n",
   1074        "    <tr>\n",
   1075        "      <th>1</th>\n",
   1076        "      <td>0.07</td>\n",
   1077        "      <td>0.63</td>\n",
   1078        "      <td>0.29</td>\n",
   1079        "      <td>0.00</td>\n",
   1080        "      <td>0.00</td>\n",
   1081        "    </tr>\n",
   1082        "    <tr>\n",
   1083        "      <th>2</th>\n",
   1084        "      <td>0.00</td>\n",
   1085        "      <td>0.36</td>\n",
   1086        "      <td>0.00</td>\n",
   1087        "      <td>0.56</td>\n",
   1088        "      <td>0.07</td>\n",
   1089        "    </tr>\n",
   1090        "    <tr>\n",
   1091        "      <th>3</th>\n",
   1092        "      <td>0.65</td>\n",
   1093        "      <td>0.00</td>\n",
   1094        "      <td>0.34</td>\n",
   1095        "      <td>0.00</td>\n",
   1096        "      <td>0.00</td>\n",
   1097        "    </tr>\n",
   1098        "    <tr>\n",
   1099        "      <th>4</th>\n",
   1100        "      <td>0.00</td>\n",
   1101        "      <td>0.00</td>\n",
   1102        "      <td>0.00</td>\n",
   1103        "      <td>0.96</td>\n",
   1104        "      <td>0.04</td>\n",
   1105        "    </tr>\n",
   1106        "  </tbody>\n",
   1107        "</table>\n",
   1108        "</div>"
   1109       ],
   1110       "text/plain": [
   1111        "     0    1    2    3    4\n",
   1112        "0 0.00 0.00 0.00 0.04 0.96\n",
   1113        "1 0.07 0.63 0.29 0.00 0.00\n",
   1114        "2 0.00 0.36 0.00 0.56 0.07\n",
   1115        "3 0.65 0.00 0.34 0.00 0.00\n",
   1116        "4 0.00 0.00 0.00 0.96 0.04"
   1117       ]
   1118      },
   1119      "execution_count": 34,
   1120      "metadata": {},
   1121      "output_type": "execute_result"
   1122     }
   1123    ],
   1124    "source": [
   1125     "topic_probabilities = topic_scores.div(topic_scores.sum(axis=1), axis=0)\n",
   1126     "topic_probabilities.head()"
   1127    ]
   1128   },
   1129   {
   1130    "cell_type": "code",
   1131    "execution_count": 35,
   1132    "metadata": {
   1133     "ExecuteTime": {
   1134      "end_time": "2018-05-01T05:04:28.544253Z",
   1135      "start_time": "2018-05-01T05:02:50.631Z"
   1136     },
   1137     "slideshow": {
   1138      "slide_type": "slide"
   1139     }
   1140    },
   1141    "outputs": [
   1142     {
   1143      "data": {
   1144       "text/plain": [
   1145        "0    4\n",
   1146        "1    1\n",
   1147        "2    3\n",
   1148        "3    0\n",
   1149        "4    3\n",
   1150        "dtype: int64"
   1151       ]
   1152      },
   1153      "execution_count": 35,
   1154      "metadata": {},
   1155      "output_type": "execute_result"
   1156     }
   1157    ],
   1158    "source": [
   1159     "topic_probabilities.idxmax(axis=1).head()"
   1160    ]
   1161   },
   1162   {
   1163    "cell_type": "code",
   1164    "execution_count": 36,
   1165    "metadata": {
   1166     "ExecuteTime": {
   1167      "end_time": "2018-05-01T05:04:28.545304Z",
   1168      "start_time": "2018-05-01T05:02:52.185Z"
   1169     },
   1170     "slideshow": {
   1171      "slide_type": "slide"
   1172     }
   1173    },
   1174    "outputs": [
   1175     {
   1176      "data": {
   1177       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvMAAAIMCAYAAAB4wSMbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xl8VPW5x/HvmZCQhLBkAZWtGIFCBBQIslgQJW6IolhQEG+Aq0Vwqdgq4Eat1YIWlSUsBQGvdUUFlYJcYxWuwrUgoKxhjRUChZCEkBIIs9w/vB2JREkw53cy53zevuZ1k5PMPM+Qa33y5ZnfWKFQKCQAAAAAEcfndAMAAAAAzg7DPAAAABChGOYBAACACMUwDwAAAEQohnkAAAAgQjHMAwAAABGKYR4AAACIUAzzAAAAQIRimAcAAAAiVC2nG6iMuOumOt0CHLBrzi+dbgEGJTVq7HQLcEDBwTynWwBgo8aNa+7/tsd1vMfWxy9dP93Wx/83knkAAAAgQkVEMg8AAABUK8sdmbY7ngUAAADgQSTzAAAA8B7LcrqDakEyDwAAAEQoknkAAAB4DzvzAAAAAJxEMg8AAADvYWceAAAAgJNI5gEAAOA9LtmZZ5gHAACA97BmAwAAAMBJJPMAAADwHpes2bjjWQAAAAAeRDIPAAAA72FnHgAAAICTSOYBAADgPezMAwAAAHASyTwAAAC8h515AAAAAE4imQcAAID3sDMPAAAAwEkk8wAAAPAel+zMM8wDAADAe1izAQAAAOAkknkAAAB4D8k8AAAAACeRzAMAAMB7fO54ASzJPAAAABChSOYBAADgPezMAwAAAHASyTwAAAC8xyVvGkUyDwAAAEQoknkAAAB4j0t25hnmAQAA4D2s2QAAAABwEsk8AAAAvMclazbueBYAAACAB5HMAwAAwHvYmQcAAADgJJJ5AAAAeA878wAAAACcRDJfg8z6dR9de8n5OlRUqvS7X5EkJSbU1svjrtXPGtXT1weLNXTiMhWVnHC4UwDV4fFHx2vlik+UlJSsd95d4nQ7AOAt7Myjur2cvVX9H3+33LXfDkzXJ19+o/a/+i998uU3+u3Azg51B6C69b9xgGbOnut0GwAAB8yYMUN33HGHfvOb34SvlZSU6Mknn9R9992nJ598UiUlJWd8HIb5GuSzzXkqOHq83LV+3VL1l+ytkqS/ZG/V9d0ucKI1ADbonN5F9erXd7oNAPAmy2fv7Qx69+6thx9+uNy1xYsXq3379po6darat2+vxYsXn/FxjA3zS5cu1bFjxxQKhTRz5kyNHTtWX375panyEatRg3gdKDwmSTpQeEwNG8Q53BEAAAB+qrS0NCUkJJS7tmbNGl122WWSpMsuu0xr1qw54+MY25n/+OOP1bdvX23YsEHFxcUaNWqUZs6cqYsuuqjC78/OzlZ2dvb/f9bYVJsAAADwApt35svPslJGRoYyMjJ+9D5HjhxRYmKiJCkxMVHFxcVnrGNsmA+FQpKk9evX6/LLL1eLFi3C1ypy6hOect1UIz3WRAeLjuncxG/T+XMT43WoqNTplgAAACKfzUdTVmZ4rw7G1mxSU1P1hz/8QevXr9dFF12k0tJSWS55FbGd/vr5bg3NaCtJGprRVkv+d7fDHQEAAMAO9evXV2FhoSSpsLBQ9erVO+N9jCXzd911l3Jzc3XOOeeodu3aKikp0ejRo02VjwgvPXS1erZvqpR6sdr50gg9+cr/6k8Lv9Bfxl2rzCsv1DeHjuq2Py51uk0A1WTsbx/Q2jV/V1FRoa68opdG3X2vBtw80Om2AMAbauCbRqWnp2vFihW68cYbtWLFCnXp0uWM97FCP7brUo22bdumFi1aKDY2VitXrtSePXvUt29fNWzY8Iz3jfPwmo2X7ZrzS6dbgEFJjXhtjBcVHMxzugUANmrcuOb+b3vc9TNsffzS9388tH7hhRe0ZcsWHT16VPXr19egQYPUpUsXPf/888rPz1dKSooeeOCB014k+33Gkvm5c+fq2WefVW5urt577z1dccUVmj59up544glTLQAAAADfcnjd+/7776/w+uOPP16lxzH29wtRUVGyLEtr165V37591bdvXx0/fvzMdwQAAABQIWPDfGxsrBYtWqSVK1eqU6dOCgaD8vv9psoDAAAA33H4TaOqi7FKY8aMUXR0tEaNGqUGDRqooKBAN9xwg6nyAAAAgOsYG+YbNGigrl276uTJk5KkunXr6pJLLjFVHgAAAPiOZdl7M8TYMJ+dna3nnntOc+bMkSQVFBTo2WefNVUeAAAAcB1jw/zy5cv15JNPKi4uTpJ03nnn6ciRI6bKAwAAAN9xyc68saMpo6OjVavWd+UCgQDvAAsAAABnuGQONTbMp6Wl6Z133lFZWZm++uorLV++XJ07dzZVHgAAAHAdY38HMGTIENWrV0/NmzfXhx9+qI4dO+rWW281VR4AAAAIsyzL1pspxpJ5n8+njIwMZWRkmCoJAAAAuJqxYX7btm1auHCh8vPzFQgEFAqFZFmWpk+fbqoFAAAAQJJc89pNY8P8rFmzlJmZqdTUVPl85l7hCwAAALiVsWE+Pj5eHTt2NFUOAAAA+GHuCObNDfMXXnihXn75ZXXt2rXcEZWpqammWgAAAABcxdgwv3PnTknS7t27y12fMGGCqRYAAAAASezMVxlDOwAAAFC9bB/mV65cqV69emnJkiUVfr1fv352twAAAACUQzJfSSdOnJAklZaW2l0KAAAA8BTbh/krr7xSkjRw4EC7SwEAAACV4pZk3tiB73/5y1907Ngx+f1+/f73v9d//ud/auXKlabKAwAAAGGWZdl6M8XYMP/ll18qPj5e69atU1JSkqZMmaL333/fVHkAAADAdYydZhMIBCRJ69at0y9+8QslJCSYKg0AAACU544tG3PJfOfOnXX//fdr9+7dateunYqLixUdHW2qPAAAAOA6xpL52267Tf3791d8fLx8Pp9q166thx56yFR5AAAAIMwtL4A1NsyvWLGiwuuXXXaZqRYAAAAAVzE2zO/atSv8cVlZmTZt2qTzzz+fYR4AAADGkcxX0YgRI8p9fuzYMU2bNs1UeQAAAMB1jA3z3xcTE6MDBw44VR4AAAAeRjJfRRMnTgz/oYVCIe3du1fdu3c3VR4AAABwHWPD/A033BD+2OfzqWHDhkpOTjZVHgAAAAgjma+itLS08MfFxcWqW7euqdIAAABAee6Y5e0f5rdv365XX31VCQkJuvnmmzV9+nQVFxcrFArpnnvu0cUXX2x3CwAAAIAr2T7Mz5s3T4MHD9axY8f0+9//XuPHj1fr1q21b98+TZkyhWEeAAAAxrllzcZnd4FAIKCLLrpI3bt3V4MGDdS6dWtJUpMmTewuDQAAALia7cm8z/fd7wsxMTHlvuaW34gAAAAQWdwyh9o+zOfm5iozM1OhUEhlZWXKzMyU9O3xlCdPnrS7PAAAAOBatg/zb7zxht0lAAAAgCpxSzJv+848AAAAAHsYO2ceAAAAqDHcEcyTzAMAAACRimQeAAAAnuOWnXmGeQAAYESnCaucbgGGHZjzS6dbcL2IGOZ38f8IgOsVHMxzugUAgIeQzAMAAAARyi3DPC+ABQAAACIUyTwAAAA8h2QeAAAAgKNI5gEAAOA97gjmSeYBAACASEUyDwAAAM9hZx4AAACAo0jmAQAA4Dkk8wAAAAAcRTIPAAAAzyGZBwAAAOAoknkAAAB4jzuCeYZ5AAAAeA9rNgAAAAAcRTIPAAAAzyGZBwAAAOAoknkAAAB4Dsk8AAAAAEeRzAMAAMBzSOYBAAAAOIpkHgAAAN7jjmCeZB4AAACIVCTzAAAA8By37MwzzAMAAMBz3DLMs2YDAAAARCiSeQAAAHiOS4J5knkAAAAgUpHMAwAAwHPYmQcAAADgKJJ5AAAAeI5LgnmSeQAAACBSkcwDAADAc9iZBwAAAOAoknkAAAB4jkuCeZJ5AAAAIFKRzAMAAMBzfD53RPMM8wAAAPAc1mwAAAAAOIpkHgAAAJ7D0ZQAAAAAHEUyDwAAAM9xSTDPMA8AAGCXO/q01NCe58uypL+s3KM5H+10uiW4DMM8AACADdo0rqehPc/XtU//TWX+oF779S+UvfGA9hwscbo1yPmd+SVLluhvf/ubLMtSs2bNNHr0aMXExFT5cdiZBwAAsEGr8+rqi90FKi0LKBAMafX2fPXt2NjptlADFBQUaNmyZZo4caImT56sYDCoVatWndVjGRvmp02bVqlrAAAAbrBtX7G6tU5RYp0YxcVEqU/7c9U4Kd7ptvD/LMuy9XYmwWBQZWVlCgQCKisrU2Ji4lk9D2NrNnv37i33eTAY1O7du3/w+7Ozs5WdnS1JSk9PV48ePWztDwAAoDrtOHBU0z/I0RtjeupfJ/zavLdI/kDI6bZgyKmzrCRlZGQoIyNDkpSUlKTrr79eo0aNUkxMjC666CJddNFFZ1XH9mF+0aJFWrRokcrKypSZmSlJCoVCqlWrVvgJVeTUJ5yXl2d3mwAAANXutU9z9dqnuZKk8Te10/7CY842hDC7V+ZPnWW/r6SkRGvWrFFWVpbi4+P13HPPaeXKlerVq1eV69g+zN9000266aab9Oqrr2rIkCF2lwMAAKgxUurWVv7RE2qSFKe+HRur38SPnW4J/8/JF8Bu3LhRjRo1Ur169SRJXbt21fbt22vmMP9vQ4YMUUFBgQ4dOqRAIBC+npaWZqoFAAAAo+aO6q6kOjE6GQhq/KsbdOTYSadbQg2QkpKiHTt26MSJE4qJidHGjRt1wQUXnNVjGRvmX3nlFa1atUpNmzYN/yZkWRbDPAAAcK0bn/nE6RbwA5w8mbJVq1bq1q2bxo4dq6ioKLVo0eJH189/jLFh/u9//7teeOEFRUdHmyoJAAAA1EiDBg3SoEGDfvLjGBvmzznnHAUCAYZ5AAAAOM7pN42qLsaG+ZiYGD344INq3769atX6ruyIESNMtQAAAAC4irFhPj09Xenp6abKAQAAAD/IJcG8uWG+d+/eKisrU35+vho35q2MAQAAgJ/KZ6rQ2rVr9eCDD+qpp56SJOXm5mrSpEmmygMAAABhlmXZejPF2DC/cOFC/fGPf1SdOnUkSS1atNDBgwdNlQcAAABcx9iaTVRUlOLj48tdc8uriAEAABBZ3DKGGhvmmzVrpk8//VTBYFD79+/XsmXL1Lp1a1PlAQAAANcxtmYzYsQIffPNN4qOjtaUKVMUFxenYcOGmSoPAAAAhLllZ95YMl+7dm0NHjxYgwcPNlUSAAAAqBBrNlW0a9cuLVq0SIcOHVIgEAhf/9Of/mSqBQAAAMBVjA3zU6dO1e23367mzZvzwlcAAAA4yi3zqLFhvl69erwDLAAAAFCNjA3zgwYN0qxZs9SuXTtFR0eHr3ft2tVUCwAAAIAkduar7OOPP1ZeXp78fr98vu8O0WGYBwAAAM6OsWH+66+/1uTJk02VAwAAAH6QW3bmjZ0z36pVK+3du9dUOQAAAMD1jCXzOTk5WrFihRo1aqTo6GiFQiFZlsXRlAAAADDOJcG8uWH+4YcfNlUKAAAA8ARjw3zDhg0VDAZVVFSkYDBoqiwAAABwGrfszBsb5pctW6a33npL9evXD//hsWYDAAAAJzDMV9HSpUv1wgsvqG7duqZKAgAAAK5mbJhPSUlRfHy8qXIAAADAD3JJMG9umG/UqJF+97vfqVOnTuXeAbZfv36mWgAAAABcxWgyn5KSIr/fL7/fb6osAAAAcBp25qto4MCBpkoBAAAAnmD7ML9gwQINGzZMEydOrPA3oLFjx9rdAgAAAFCOS4J5+4f5Xr16SZJuuOEGu0sBAAAAnmL7MJ+amipJSktLs7sUAAAAUCnszFfR/v379eqrr2rv3r06efJk+Pr06dNNtQAAAAC4is9UoRkzZuiqq65SVFSUJkyYoF69eoVXcAAAAACTLMvemynGhvmysjK1b99eoVBIDRs21KBBg7Rp0yZT5QEAAADXMbZmExMTo2AwqPPOO08ffPCBkpKSdOTIEVPlAQAAgDCfS3bmjSXzmZmZKisr0/Dhw7V7926tXLlS99xzj6nyAAAAQJhb1myMJfOHDh1Sy5YtFRsbq9GjR0uSVq9erVatWplqAQAAAHAVY8n84sWLK3UNAAAAsJtlWbbeTLE9mV+/fr3Wr1+vgoICzZs3L3y9tLRUPp+x3yUAAAAA17F9mE9MTFRqaqrWrl0bfgMpSYqLi1NmZqbd5QEAAIDT+Nzx+lf7h/kWLVqoefPm+uqrr9S7d2+7ywEAAACeYeQFsD6fT0ePHpXf71etWsZecwsAAABUyOReu52MTdYNGzbUY489ps6dOys2NjZ8vV+/fqZaAAAAAFzF2DCfmJioxMREhUIhlZaWmioLIEJ0mrDK6RbggHVP9HC6BQAe5ZJg3twwP3DgQEnS8ePHyyXzAADAG/jlDah+xs6G3L59u8aMGaMxY8ZIknJzczV37lxT5QEAAIAwy+Z/TDE2zC9YsECPPPKI6tatK+nbU262bt1qqjwAAAAQ5rPsvRl7HuZKSSkpKeWL86ZRAAAAwFkztjOfnJysnJwcWZYlv9+vpUuXqkmTJqbKAwAAAGFuOZrSWDR+5513avny5SooKNBdd92l3Nxc3XHHHabKAwAAAK5jLJnPy8vTfffdV+7atm3b1KZNG1MtAAAAAJLcczSlsWR+/vz5lboGAAAAoHJsT+a3b9+unJwcFRcXa8mSJeHrx44dUzAYtLs8AAAAcBqfS6J524d5v9+v48ePKxAIlHvn1/j4eD3wwAN2lwcAAABcy/ZhPi0tTWlpaerdu7caNmxodzkAAADgjFwSzJt7AezJkyc1e/ZsHTp0SIFAIHx9woQJploAAAAAXMXYMP/888/ryiuvVJ8+fXizKAAAADjKLefMGxvmfT6frrrqKlPlAAAAANczFpF37txZy5cvV2FhoUpKSsI3AAAAwDTLsvdmirFkfsWKFZKk9957L3zNsixNnz7dVAsAAACAJI6mrLKsrCxTpQAAAABPsH3N5t133w1/vHr16nJfe/XVV+0uDwAAAJzGsvlmiu3D/KpVq8IfL168uNzXvvzyS7vLAwAAAK5l+5pNKBSq8OOKPgcAAABMcMvRlLYn86f+QX3/D80tf4gAAACAE2xP5nNzc5WZmalQKKSysjJlZmZK+jaVP3nypN3lAQAAgNP4XJIp2z7Mv/HGG3aXAAAAADzJ2NGUAAAAQE3hlnVvY+8ACwAAAKB6kcwDAADAc1wSzJPMAwAAAJGKZB4AAACe45adeYZ5AAAAeI5bjqZkzQYAAACIUCTzAAAA8By3rNmQzAMAAAARimQeAAAAnuOOXJ5kHgAAAIhYJPMAAADwHJ/XdubnzZunnJycctdycnK0YMGC6u4JAAAAQCVUepj/7LPPdMEFF5S7lpqaqk8//bTamwIAAADsZFn23kyp9DBvWZaCwWC5a8FgUKFQqNqbAgAAAHBmlR7m27Rpo9dffz080AeDQS1cuFBt2rSxrTkAAADADpZl2XozpdIvgB0+fLgmTpyokSNHKiUlRfn5+UpMTNTYsWPt7A8AAACodi55/Wvlh/nk5GRNmjRJO3fu1OHDh5WcnKyWLVvK5+N0SwAAAMAJVTqa0ufzqXXr1nb1AgAAABjhlqMpf3SYHzNmjJ5//nlJ0qhRo37w+2bOnFm9XQGAB9zRp6WG9jxfliX9ZeUezflop9MtAQAizI8O8yNHjgx/fO+999reDAB4RZvG9TS05/m69um/qcwf1Gu//oWyNx7QnoMlTrcGAJ5QE4L5f/3rX5o1a5a++eYbWZalUaNGVXkL5keH+VNPqklLSzu7LgEAp2l1Xl19sbtApWUBSdLq7fnq27GxspZvd7gzAIAp8+fP18UXX6zf/OY38vv9OnHiRJUfo9I7836/X2+//bY+++wzFRYWKjExUT169NCAAQMUExNzxvuvXr1aF198seLi4vT2229rz549GjBggFJTU6vcNABEum37ijXupnZKrBOj4ycD6tP+XH35daHTbQGAZ5g8PrIix44d09atW3X33XdLkmrVqqVatar0ctZv71fZb5wzZ47y8vI0fPhwNWzYUIcOHdLixYs1d+5cjR49+oz3f/vtt9W9e3dt27ZNX375pa6//nrNnTtXTz/9dIXfn52drezsbElSenq6evToUdlWAaDG23HgqKZ/kKM3xvTUv074tXlvkfwB3oQPANzi1FlWkjIyMpSRkRH+/ODBg6pXr55mzJihr7/+WqmpqRo2bJhiY2OrVKfSw/yaNWs0bdo01alTR5LUtGlTtWrVqtK79P8+wnLdunW66qqr1KVLFy1cuPAHv//UJ5yXl1fZNgEgYrz2aa5e+zRXkjT+pnbaX3jM2YYAwEPsPlz9+8P79wUCAe3Zs0cjRoxQq1atNH/+fC1evFi33nprlepU+nk0aNDgtD2esrIyJSYmVur+SUlJ+vOf/6zVq1erY8eOOnnypEIhUigA3pVSt7YkqUlSnPp2bKxFf//G4Y4AAKYkJycrOTlZrVq1kiR169ZNe/bsqfLjVDqZ79Wrl55++mldc801Sk5O1uHDh7V8+XL16tVLmzZtCn9fu3btKrz/mDFjtGHDBl1//fWqU6eOCgsLNXTo0Co3DABuMXdUdyXVidHJQFDjX92gI8dOOt0SAHiG0zvzDRo0UHJysvLy8tS4cWNt3LhRTZs2rfLjVHqY//DDDyVJixYtOu36v79mWZamT59e4f2//vprdejQQXFxcZKk2NhYxcfHV7lhAHCLG5/5xOkWAAAOGjFihKZOnSq/369GjRpV6nWo31fpYT4rK6vKD36quXPnatKkSeHPa9eufdo1AAAAwARfDThnvkWLFpo4ceJPeowqnX8TCASUk5OjgoICJScnq3Xr1oqKiqrUfUOhULm/zvD5fAoEAlXrFgAAAKgGNWGYrw6VHub37dunSZMmqaysLLwzHx0drbFjx1Zqv+ecc87R0qVLddVVV0mS/vu//1uNGjU6+84BAAAAj6v0MD937lxlZGTo+uuvDyfs7733nl588UVNmDDhjPe/8847NX/+fL3zzjuyLEvt2rXTyJEjz75zAAAA4Cw5/QLY6lLpYT43N1ePPfZYuSd+3XXXnfaC2B9Sv3593X///VXvEAAAAECFKj3MJyUlacuWLeWOnty6desZz5l/99131b9/f82bN6/Cr48YMaKyLQAAAADVwnM784MHD9akSZPUuXNnpaSkKD8/X+vWrTvjO8A2adJEkpSamvrTOgUAAABQTqWH+by8PD3zzDNatWqVCgsL1axZMw0aNEjr1q370fulp6dL+vYoyu7du5f72urVq8+iZQAAAOCnccnKvHyV/ca3335b5513nm6++Wbdcccduvnmm9W4cWO9/fbblbr/4sWLK3UNAAAAQOWcMZnftGmTJCkYDIY//rd//vOf4Xd0/SHr16/X+vXrVVBQUG5vvrS0VD5fpX+XAAAAAKqNzyXR/BmH+ZkzZ0qSysrKwh9L3x7nU79+/TO+gDUxMVGpqalau3Ztub35uLg4ZWZmnm3fAAAAgOedcZjPysqSJE2fPl333HNPlQu0aNFCLVq0UM+ePSv9brEAAACAndyyH1LpF8CezSAvSc8995weeOABPfTQQxUezv+nP/3prB4XAAAAOFsu2bKp/DB/toYPHy5JGjdunN2lAAAAAE+xfZj/95tKNWzY0O5SAAAAQKV45gWwP9V//Md/lFuvCYVCsiwr/H9feuklu1sAAAAAXMn2Yf6//uu/7C4BAAAAVIlLgnn7h/lT5ebmatu2bZKktm3b6mc/+5nJ8gAAAICrGDuVZ+nSpZo2bZqOHDmiI0eOaOrUqVq2bJmp8gAAAECYz7L3ZoqxZP5vf/ubnnrqKcXGxkqS+vfvr0cffVTXXnutqRYAAAAAVzE2zIdCIfl83/1FgM/nUygUMlUeAAAACOM0myq6/PLL9cgjj6hLly6SpDVr1uiKK64wVR4AAABwHWPDfL9+/ZSWlhZ+Aezo0aN1/vnnmyoPAAAAhLkkmLd/mC8rK9OHH36oAwcOqHnz5rr66qsVFRVld1kAAADA9Wwf5rOyshQVFaW2bdtq/fr12rdvn4YNG2Z3WQAAAOAHmTxxxk62D/N79+7V5MmTJUlXXHGFHn74YbtLAgAAAD/KkjumedvPma9V67vfF1ivAQAAAKqP7cl8bm6uMjMzJX17PGVZWZkyMzMVCoVkWZZeeuklu1sAAAAAymHNppLeeOMNu0sAAAAAnmTsaEoAAACgpnBLMm/7zjwAAAAAe5DMAwAAwHMsl7xrFMk8AAAAEKFI5gEAAOA57MwDAAAAcBTJPAAAADzHJSvzDPMAAADwHp9LpnnWbAAAAIAIRTIPAAAAz+EFsAAAAAAcRTIPAAAAz3HJyjzJPAAAABCpSOYB1Ai5M3/pdAswLLHLPbrguk+cbgOG7frrw063AEiSfHJHNE8yDwAAAEQoknkAAAB4DjvzAAAAABxFMg8AAADP4Zx5AAAAAI4imQcAAIDn+FyyNM8wDwAAAM9xySzPmg0AAAAQqUjmAQAA4DluWbMhmQcAAAAiFMk8AAAAPMclwTzJPAAAABCpSOYBAADgOW5JtN3yPAAAAADPIZkHAACA51guWZonmQcAAAAiFMk8AAAAPMcduTzDPAAAADyIN40CAAAA4CiSeQAAAHiOO3J5knkAAAAgYpHMAwAAwHNcsjJPMg8AAABEKpJ5AAAAeA5vGgUAAADAUSTzAAAA8By3JNpueR4AAACA55DMAwAAwHPYmQcAAADgKJJ5AAAAeI47cnmGeQAAAHgQazYAAAAAHEUyDwAAAM9xS6LtlucBAAAAeA7JPAAAADyHnXkAAAAAjiKZBwAAgOe4I5cnmQcAAAAiFsk8AAAAPMclK/Mk8wAAAECkIpkHAACA5/hcsjVPMg8AAABEKJJ5AAAAeA478wCAn+TxR8erd8/uGtC/n9OtwCazJtymrz/6o9YufDh8bUBGR33x1iP61xdT1SmtuYPdAd5m2fyPKQzzAOCQ/jcO0MzZc51uAzZ6+f3/Vf+AWPynAAAUz0lEQVS7s8pd27wrT7f+Zo4+XbfLoa4A1BTBYFAPPfSQJk6ceNaPYWyY37ZtW6WuAYBXdE7vonr16zvdBmz02bpdKjhyrNy1nD3/1I6vDzrUEYB/syx7b5WxdOlSNWnS5Cc9D2PD/Pz58yt1DQAAAHC7w4cPa926derTp89PehzbXwC7fft25eTkqLi4WEuWLAlfP3bsmILB4A/eLzs7W9nZ2ZKk9PR09ejRw+5WAQAA4BF2H0156iwrSRkZGcrIyAh/vmDBAg0dOlSlpaU/qY7tw7zf79fx48cVCATKNRsfH68HHnjgB+936hPOy8uzu00AAACg2nx/eD/VF198ofr16ys1NVWbN2/+SXVsH+bT0tLUpk0b/eMf/9DAgQPtLgcAAACckZNHU+bk5Gjt2rVav369ysrKVFpaqqlTp+q+++6r8mMZOWfe5/OppKTERCkAiBhjf/uA1q75u4qKCnXlFb006u57NeBmQg83eemPw9SzcyulNEjQzg+e1JOzlqrwyL/03NiBSklM0DtT79JXOft0w/dOvAHgbkOGDNGQIUMkSZs3b9b7779/VoO8ZPBNo84//3xNmjRJ3bt3V+3atcPXu3btaqoFAKhRJv3pOadbgM0yxy+o8Pp7H39lthEAp3HLm0YZG+ZLSkpUt25dbdq0qdx1hnkAAAB41YUXXqgLL7zwrO9vbJgfPXq0qVIAAADAjzL5Lq12MjbMHz58WPPmzVNOTo4sy9LPf/5zDR8+XMnJyaZaAAAAACRJPnfM8ubeNGrGjBlKT0/X7NmzNWvWLKWnp2vGjBmmygMAAACuY2yYLy4u1uWXX66oqChFRUWpd+/eKi4uNlUeAAAACLNs/scUY8N8vXr1tHLlSgWDQQWDQa1cuVJ169Y1VR4AAABwHWM786NGjdKLL76ol156SZL085//XKNGjTJVHgAAAAjjaMoqSklJ0dixY02VAwAAAFzP2DD/z3/+U/Pnz9eOHTtkWZZat26tzMxMnXPOOaZaAAAAACS552hKYzvzU6dOVY8ePfTnP/9Zs2fPVrdu3TRlyhRT5QEAAADXMTbMh0Ih9erVK3yaTa9evWS5ZVkJAAAAEcVn2XszxdiazYUXXqjFixerR48esixLq1atUseOHVVSUiJJSkhIMNUKAAAA4ArGhvlVq1ZJkrKzsyV9m9RL0scffyzLsjR9+nRTrQAAAMDj3LIzb/swv3PnTqWkpCgrK0uS9Mknn+jzzz9Xw4YNNWjQIBJ5AAAA4CzZvjM/Z84c1ar17e8MW7Zs0WuvvabLLrtM8fHxmj17tt3lAQAAgNNYlr03U2wf5oPBYDh9X7Vqlfr06aNu3brp1ltv1YEDB+wuDwAAAJzGsvlmipFhPhAISJI2bdqkdu3alfsaAAAAgLNj+878pZdeqt/97neqW7euYmJi1LZtW0nSgQMHFB8fb3d5AAAA4DQ+lxyRbvswP2DAALVr105FRUXq0KFD+Gz5YDCo4cOH210eAAAAcC0jR1O2bt36tGuNGzc2URoAAAA4jTtyeYPvAAsAAACgehl70ygAAACgxnBJNE8yDwAAAEQoknkAAAB4juWSaJ5kHgAAAIhQJPMAAADwHJccM88wDwAAAO9xySzPmg0AAAAQqUjmAQAA4D0uieZJ5gEAAIAIRTIPAAAAz+FoSgAAAACOIpkHAACA57jlaEqSeQAAACBCkcwDAADAc1wSzJPMAwAAAJGKZB4AAADe45JonmQeAAAAiFAk8wAAAPAct5wzzzAPAAAAz+FoSgAAAACOIpkHAACA57gkmGeYR8107eytTrcA47bq88f6ON0EDCpcM93pFuCAgoN5TrcAuArDPIAag//Ie0tSo8ZOtwDAy1wSzbMzDwAAAEQoknkAAAB4jluOpiSZBwAAACIUyTwAAAA8h3PmAQAAADiKZB4AAACe45JgnmEeAAAAHuSSaZ41GwAAACBCkcwDAADAcziaEgAAAICjSOYBAADgORxNCQAAAMBRJPMAAADwHJcE8yTzAAAAQKQimQcAAID3uCSaJ5kHAAAAIhTJPAAAADyHc+YBAAAAOIpkHgAAAJ7jlnPmGeYBAADgOS6Z5VmzAQAAACIVyTwAAAC8xyXRPMk8AAAAEKFI5gEAAOA5HE0JAAAAwFEk8wAAAPActxxNSTIPAAAARCiSeQAAAHiOS4J5knkAAAAgUpHMAwAAwHtcEs0zzAMAAMBzOJoSAAAAgKNI5gEAAOA5HE0JAAAAwFEk8wAAAPAclwTzJPMAAABApCKZBwAAgPe4JJonmQcAAAAiFMk8AAAAPIdz5gEAAAA4imQeAAAAnuOWc+YZ5oEaYmj3ZhrQqbFCIWnHwRI9vniryvxBp9sCUI0ef3S8Vq74RElJyXrn3SVOtwPABRjmgRqgUd3aGtK1mW6a/r864Q/qmYHtdE27c/Tehv1OtwagGvW/cYAGDxmqR8aPdboVwPOcDubz8/OVlZWloqIiWZaljIwM9e3bt8qPY2yYD4VCKi4uViAQCF9LSkoyVR6o8aJ8lmpH++QPhhQXHaVDR0843RKAatY5vYv27dvrdBsA5PyaTVRUlG6//XalpqaqtLRU48aNU4cOHdS0adMqPY6RYX758uV68803lZCQIJ/vu9fcPv/88ybKAzXewaMn9NKqf2j5mEt13B/U6l0FWr2rwOm2AACATRITE5WYmChJiouLU5MmTVRQUFAzh/klS5bo+eefV7169UyUAyJO3dhauvznKer7wiodPe7Xs4Pa67oO5+qvXx1wujUAAFzK6UWb7xw8eFB79uxRy5Ytq3xfI8N8cnKyEhISqnSf7OxsZWdnS5LS09PVo0cPO1oDaoRuqUnaV3RchcdOSpI+2npQFzWrzzAPAECEOnWWlaSMjAxlZGSc9n3Hjx/X5MmTNWzYMMXHx1e5jq3D/NKlSyVJ5557rp544gl17txZtWp9V/LHlvxPfcJ5eXl2tgk47sCR4+rQtJ5io306fjKorqlJ2pJX7HRbAAC4lt078z80vJ/K7/dr8uTJ6tmzp7p27XpWdWwd5ouLvx1GGjRooAYNGujYsWN2lgMi1sZ9xfpwy0G9PvISBYIhbTtwVG+t3ed0WwCq2djfPqC1a/6uoqJCXXlFL426+14NuHmg020BcEAoFNKsWbPUpEkT9evX76wfxwqFQqFq7MsWJPPec+3srU63AAcsG9nW6RZgUFKjxk63AAcUHOS/6V7SuHHN/fc8r6jM1sdv3CDmR7++bds2Pf7442revLms//9rgsGDB6tTp05VqmNkZ/6pp57S/fffrzp16kiSSkpKNG3aNI0fP95EeQAAAKBGadOmjd58882f/DhGhvmioqLwIC9JCQkJKiwsNFEaAAAAOI3T58xXF9+Zv6Uaivh8Onz4cPjz/Px8E2UBAAAAVzOSzN9yyy167LHH1K5dO0nS5s2bdccdd5goDQAAAJzGqkHnzP8URob5Tp066emnn9b27dslSbfddpvq169vojQAAADgWkbWbKRv0/i9e/fqkksu0cmTJ7V7925TpQEAAIDyLJtvhhgZ5l988UVt3rxZ//M//yNJio2N1Zw5c0yUBgAAAE7jklnezDC/fft2/epXv1J0dLSkb0+z8fv9JkoDAAAArmVkZz4qKkrBYDB8IP7Ro0fDHwMAAACmuWUUtXWYDwQCioqK0tVXX63JkyeruLhYb775plavXq1f/vKXdpYGAAAAXM/WYf7hhx/WpEmTdNlllyk1NVUbN25UKBTSmDFj1Lx5cztLAwAAAD+IoykrIRQKhT9u1qyZmjVrZmc5AAAAwFNsHeaLi4u1ZMmSH/x6v3797CwPAAAAVMwdwby9w3wwGNTx48fLJfQAAAAAqoetw3xiYiIvdAUAAECN45Jg3t5z5knkAQAAAPvYmsw//vjjdj48AAAAcFY4Z74SEhIS7Hx4AAAA4Ky45WhKW9dsAAAAANjH1mQeAAAAqIncsmZDMg8AAABEKIZ5AAAAIEIxzAMAAAARip15AAAAeA478wAAAAAcRTIPAAAAz+GceQAAAACOIpkHAACA57AzDwAAAMBRJPMAAADwHJcE8wzzAAAA8CCXTPOs2QAAAAARimQeAAAAnsPRlAAAAAAcRTIPAAAAz+FoSgAAAACOIpkHAACA57gkmCeZBwAAACIVyTwAAAC8xyXRPMk8AAAAEKFI5gEAAOA5bjlnnmEeAAAAnsPRlAAAAAAcZYVCoZDTTeCHZWdnKyMjw+k2YBA/c+/hZ+49/My9h5857EIyX8NlZ2c73QIM42fuPfzMvYefuffwM4ddGOYBAACACMUwDwAAAEQohvkajv067+Fn7j38zL2Hn7n38DOHXXgBLAAAABChSOYBAACACMUwDwAAAEQo3gG2htqwYYPmz5+vYDCoPn366MYbb3S6JdhsxowZWrdunerXr6/Jkyc73Q5slp+fr6ysLBUVFcmyLGVkZKhv375OtwUblZWVacKECfL7/QoEAurWrZsGDRrkdFswIBgMaty4cUpKStK4ceOcbgcuwzBfAwWDQb344ot69NFHlZycrPHjxys9PV1NmzZ1ujXYqHfv3rrmmmuUlZXldCswICoqSrfffrtSU1NVWlqqcePGqUOHDvx77mLR0dGaMGGCYmNj5ff79fjjj+viiy9W69atnW4NNlu6dKmaNGmi0tJSp1uBC7FmUwPt3LlT5557rs455xzVqlVLPXr00Jo1a5xuCzZLS0tTQkKC023AkMTERKWmpkqS4uLi1KRJExUUFDjcFexkWZZiY2MlSYFAQIFAQJZlOdwV7Hb48GGtW7dOffr0cboVuBTJfA1UUFCg5OTk8OfJycnasWOHgx0BsNPBgwe1Z88etWzZ0ulWYLNgMKixY8fqwIEDuvrqq9WqVSunW4LNFixYoKFDh5LKwzYk8zVQRaeFkt4A7nT8+HFNnjxZw4YNU3x8vNPtwGY+n0/PPvusZs2apV27dukf//iH0y3BRl988YXq168f/ls4wA4k8zVQcnKyDh8+HP788OHDSkxMdLAjAHbw+/2aPHmyevbsqa5duzrdDgyqU6eO0tLStGHDBjVv3tzpdmCTnJwcrV27VuvXr1dZWZlKS0s1depU3XfffU63BhdhmK+BLrjgAu3fv18HDx5UUlKSVq1axb/4gMuEQiHNmjVLTZo0Ub9+/ZxuBwYUFxcrKipKderUUVlZmTZu3Kj+/fs73RZsNGTIEA0ZMkSStHnzZr3//vv89xzVjmG+BoqKitKIESP01FNPKRgM6vLLL1ezZs2cbgs2e+GFF7RlyxYdPXpUd911lwYNGqQrrrjC6bZgk5ycHK1cuVLNmzfXgw8+KEkaPHiwOnXq5HBnsEthYaGysrIUDAYVCoXUvXt3de7c2em2AEQ4K1TRgjYAAACAGo8XwAIAAAARimEeAAAAiFAM8wAAAECEYpgHAAAAIhTDPAAAABChGOYBwKCsrCy9/vrrkqStW7fq17/+tZG6gwYN0oEDB4zUAgCYwzAPAA5p27atpkyZcsbv++STT/TYY48Z6AgAEGkY5gHgLAUCAadbAAB4HO8ACwDfc/fddysjI0MrV65UUVGRunTpojvuuEM7duzQtGnTdM011+ivf/2rOnTooHvvvVdffPGFXn/9dR06dEhNmzbVnXfeqZ/97GeSpD179mjWrFnav3+/OnbsKMuywnU2b96sadOmadasWZKk/Px8LViwQFu3blUoFNKll16qq6++WnPmzJHf79ftt9+uqKgoLViwQCdPntRrr72m1atXy+/3q0uXLho2bJhiYmIkSe+9956WLFkiy7J0yy23mP9DBAAYQTIPABX49NNP9cgjj2jatGnav3+/3nnnHUlSUVGRSkpKNGPGDI0cOVK7d+/WzJkz9atf/Urz5s1TRkaGnnnmGZ08eVJ+v1/PPvusevbsqXnz5ql79+76/PPPK6wXDAY1adIkpaSkKCsrS7NmzdKll14a/uWgdevWevnll7VgwQJJ0iuvvKL9+/fr2Wef1dSpU1VQUKC33npLkrRhwwa9//77evTRRzVlyhRt3LjRyJ8ZAMA8hnkAqMDVV1+tlJQUJSQk6KabbtJnn30mSbIsS4MGDVJ0dLRiYmL00UcfKSMjQ61atZLP51Pv3r1Vq1Yt7dixQ9u3b1cgENB1112nWrVqqVu3brrgggsqrLdz504VFBTo9ttvV2xsrGJiYtSmTZsKvzcUCumjjz5SZmamEhISFBcXpwEDBoR7XLVqlXr37q3mzZsrNjZWAwcOtOcPCQDgONZsAKACKSkp4Y8bNmyogoICSVK9evXCqyzSt6sxK1as0AcffBC+5vf7VVBQIMuylJSUVG615tTHPVV+fr4aNmyoqKioM/ZWXFysEydOaNy4ceFroVBIwWBQklRYWKjU1NRy/QMA3IlhHgAqkJ+fX+7jpKQkSSo3mEtScnKyBgwYoAEDBpz2GFu2bFFBQYFCoVD4focPH9a555572vempKQoPz9fgUDgjAN93bp1FRMTo+eeey7c16kSExN1+PDhCp8LAMBdWLMBgAosX75chw8fVklJiRYtWqTu3btX+H19+vTRhx9+qB07digUCun48eNat26dSktL1bp1a/l8Pi1btkyBQECff/65du7cWeHjtGzZUomJiXrllVd0/PhxlZWVadu2bZKkBg0aqKCgQH6/X5Lk8/nUp08fLViwQEeOHJEkFRQUaMOGDZKk7t2765NPPtHevXt14sQJLVy4sLr/eAAANQTJPABU4Be/+IX+8Ic/qLCwUOnp6br55psrHMQvuOACjRw5UvPmzdP+/fvDu+5t27ZVrVq19Nvf/lazZ8/W66+/ro4dO+qSSy6psJ7P59PYsWM1b948jR49WpZl6dJLL1WbNm3Url278AthfT6fXnzxRd12221666239Mgjj+jo0aNKSkrSlVdeqYsvvlgdO3bUddddpyeeeEI+n0+33HKLPv30U7v/yAAADrBCoVDI6SYAoCa5++67NXLkSHXo0MHpVgAA+FGs2QAAAAARimEeAAAAiFCs2QAAAAARimQeAAAAiFAM8wAAAECEYpgHAAAAIhTDPAAAABChGOYBAACACPV/0GU+Uwzx6bIAAAAASUVORK5CYII=\n",
   1178       "text/plain": [
   1179        "<Figure size 1008x626.4 with 2 Axes>"
   1180       ]
   1181      },
   1182      "metadata": {},
   1183      "output_type": "display_data"
   1184     }
   1185    ],
   1186    "source": [
   1187     "predictions = test_docs.topic.to_frame('topic').assign(predicted=topic_probabilities.idxmax(axis=1).values)\n",
   1188     "heatmap_data = predictions.groupby('topic').predicted.value_counts().unstack()\n",
   1189     "sns.heatmap(heatmap_data, annot=True, cmap='Blues');"
   1190    ]
   1191   },
   1192   {
   1193    "cell_type": "markdown",
   1194    "metadata": {},
   1195    "source": [
   1196     "## Resources\n",
   1197     "\n",
   1198     "- pyLDAvis: \n",
   1199     "    - [Talk by the Author](https://speakerdeck.com/bmabey/visualizing-topic-models) and [Paper by (original) Author](http://www.aclweb.org/anthology/W14-3110)\n",
   1200     "    - [Documentation](http://pyldavis.readthedocs.io/en/latest/index.html)\n",
   1201     "- LDA:\n",
   1202     "    - [David Blei Homepage @ Columbia](http://www.cs.columbia.edu/~blei/)\n",
   1203     "    - [Introductory Paper](http://www.cs.columbia.edu/~blei/papers/Blei2012.pdf) and [more technical review paper](http://www.cs.columbia.edu/~blei/papers/BleiLafferty2009.pdf)\n",
   1204     "    - [Blei Lab @ GitHub](https://github.com/Blei-Lab)\n",
   1205     "    \n",
   1206     "- Topic Coherence:\n",
   1207     "    - [Exploring Topic Coherence over many models and many topics](https://www.aclweb.org/anthology/D/D12/D12-1087.pdf)\n",
   1208     "    - [Paper on various Methods](http://www.aclweb.org/anthology/N10-1012)\n",
   1209     "    - [Blog Post - Overview](http://qpleple.com/topic-coherence-to-evaluate-topic-models/)\n"
   1210    ]
   1211   }
   1212  ],
   1213  "metadata": {
   1214   "celltoolbar": "Slideshow",
   1215   "hide_input": false,
   1216   "kernelspec": {
   1217    "display_name": "Python 3",
   1218    "language": "python",
   1219    "name": "python3"
   1220   },
   1221   "language_info": {
   1222    "codemirror_mode": {
   1223     "name": "ipython",
   1224     "version": 3
   1225    },
   1226    "file_extension": ".py",
   1227    "mimetype": "text/x-python",
   1228    "name": "python",
   1229    "nbconvert_exporter": "python",
   1230    "pygments_lexer": "ipython3",
   1231    "version": "3.6.8"
   1232   },
   1233   "name": "_merged",
   1234   "toc": {
   1235    "base_numbering": 1,
   1236    "nav_menu": {},
   1237    "number_sections": true,
   1238    "sideBar": true,
   1239    "skip_h1_title": true,
   1240    "title_cell": "Table of Contents",
   1241    "title_sidebar": "Contents",
   1242    "toc_cell": false,
   1243    "toc_position": {
   1244     "height": "203.153px",
   1245     "left": "69.9915px",
   1246     "right": "1064px",
   1247     "top": "66.3352px",
   1248     "width": "302px"
   1249    },
   1250    "toc_section_display": true,
   1251    "toc_window_display": true
   1252   }
   1253  },
   1254  "nbformat": 4,
   1255  "nbformat_minor": 2
   1256 }