ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

yelp_sentiment.ipynb

(80149B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "code",
      5    "execution_count": 1,
      6    "metadata": {
      7     "ExecuteTime": {
      8      "end_time": "2018-12-28T02:16:30.922402Z",
      9      "start_time": "2018-12-28T02:16:30.638781Z"
     10     }
     11    },
     12    "outputs": [
     13     {
     14      "name": "stderr",
     15      "output_type": "stream",
     16      "text": [
     17       "[nltk_data] Downloading package stopwords to /home/stefan/nltk_data...\n",
     18       "[nltk_data]   Package stopwords is already up-to-date!\n"
     19      ]
     20     },
     21     {
     22      "data": {
     23       "text/plain": [
     24        "True"
     25       ]
     26      },
     27      "execution_count": 1,
     28      "metadata": {},
     29      "output_type": "execute_result"
     30     }
     31    ],
     32    "source": [
     33     "import nltk\n",
     34     "nltk.download('stopwords')"
     35    ]
     36   },
     37   {
     38    "cell_type": "code",
     39    "execution_count": 2,
     40    "metadata": {
     41     "ExecuteTime": {
     42      "end_time": "2018-12-28T02:16:31.728129Z",
     43      "start_time": "2018-12-28T02:16:30.925942Z"
     44     }
     45    },
     46    "outputs": [],
     47    "source": [
     48     "from pathlib import Path\n",
     49     "import numpy as np\n",
     50     "import pandas as pd\n",
     51     "from gensim.models import Doc2Vec\n",
     52     "from gensim.models.doc2vec import TaggedDocument\n",
     53     "import logging\n",
     54     "import warnings\n",
     55     "from random import shuffle\n",
     56     "import lightgbm as lgb\n",
     57     "from sklearn.model_selection import train_test_split\n",
     58     "from nltk import RegexpTokenizer\n",
     59     "from nltk.corpus import stopwords\n",
     60     "from sklearn.linear_model import LogisticRegression\n",
     61     "from sklearn.ensemble import RandomForestClassifier\n",
     62     "from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score\n",
     63     "from sklearn.utils import class_weight\n",
     64     "import umap"
     65    ]
     66   },
     67   {
     68    "cell_type": "code",
     69    "execution_count": 3,
     70    "metadata": {
     71     "ExecuteTime": {
     72      "end_time": "2018-12-28T02:16:35.063506Z",
     73      "start_time": "2018-12-28T02:16:35.061306Z"
     74     }
     75    },
     76    "outputs": [],
     77    "source": [
     78     "warnings.filterwarnings('ignore')\n",
     79     "pd.set_option('display.expand_frame_repr', False)\n",
     80     "np.random.seed(42)"
     81    ]
     82   },
     83   {
     84    "cell_type": "code",
     85    "execution_count": 4,
     86    "metadata": {
     87     "ExecuteTime": {
     88      "end_time": "2018-12-28T02:16:35.246590Z",
     89      "start_time": "2018-12-28T02:16:35.238924Z"
     90     }
     91    },
     92    "outputs": [],
     93    "source": [
     94     "logging.basicConfig(\n",
     95     "        filename='doc2vec.log',\n",
     96     "        level=logging.DEBUG,\n",
     97     "        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',\n",
     98     "        datefmt='%H:%M:%S')"
     99    ]
    100   },
    101   {
    102    "cell_type": "markdown",
    103    "metadata": {},
    104    "source": [
    105     "## Load Data"
    106    ]
    107   },
    108   {
    109    "cell_type": "code",
    110    "execution_count": 38,
    111    "metadata": {
    112     "ExecuteTime": {
    113      "end_time": "2018-12-28T02:31:45.448079Z",
    114      "start_time": "2018-12-28T02:31:21.942362Z"
    115     }
    116    },
    117    "outputs": [],
    118    "source": [
    119     "df = pd.read_parquet('combined.parquet', engine='fastparquet').loc[:, ['stars', 'text']]"
    120    ]
    121   },
    122   {
    123    "cell_type": "code",
    124    "execution_count": 39,
    125    "metadata": {
    126     "ExecuteTime": {
    127      "end_time": "2018-12-28T02:31:45.481577Z",
    128      "start_time": "2018-12-28T02:31:45.449302Z"
    129     }
    130    },
    131    "outputs": [
    132     {
    133      "data": {
    134       "text/plain": [
    135        "5    2641880\n",
    136        "4    1335957\n",
    137        "1     858139\n",
    138        "3     673206\n",
    139        "2     487813\n",
    140        "Name: stars, dtype: int64"
    141       ]
    142      },
    143      "execution_count": 39,
    144      "metadata": {},
    145      "output_type": "execute_result"
    146     }
    147    ],
    148    "source": [
    149     "df.stars.value_counts()"
    150    ]
    151   },
    152   {
    153    "cell_type": "code",
    154    "execution_count": 91,
    155    "metadata": {
    156     "ExecuteTime": {
    157      "end_time": "2018-12-28T02:49:18.719387Z",
    158      "start_time": "2018-12-28T02:49:18.715253Z"
    159     }
    160    },
    161    "outputs": [],
    162    "source": [
    163     "stars = range(1, 6)"
    164    ]
    165   },
    166   {
    167    "cell_type": "code",
    168    "execution_count": 40,
    169    "metadata": {
    170     "ExecuteTime": {
    171      "end_time": "2018-12-28T02:32:45.883173Z",
    172      "start_time": "2018-12-28T02:32:45.238120Z"
    173     }
    174    },
    175    "outputs": [],
    176    "source": [
    177     "sample = pd.concat([df[df.stars==s].sample(n=100000) for s in stars])"
    178    ]
    179   },
    180   {
    181    "cell_type": "code",
    182    "execution_count": 41,
    183    "metadata": {
    184     "ExecuteTime": {
    185      "end_time": "2018-12-28T02:32:47.838880Z",
    186      "start_time": "2018-12-28T02:32:47.743143Z"
    187     }
    188    },
    189    "outputs": [
    190     {
    191      "name": "stdout",
    192      "output_type": "stream",
    193      "text": [
    194       "<class 'pandas.core.frame.DataFrame'>\n",
    195       "Int64Index: 500000 entries, 52085 to 3365007\n",
    196       "Data columns (total 2 columns):\n",
    197       "stars    500000 non-null int64\n",
    198       "text     500000 non-null object\n",
    199       "dtypes: int64(1), object(1)\n",
    200       "memory usage: 11.4+ MB\n"
    201      ]
    202     }
    203    ],
    204    "source": [
    205     "sample.info()"
    206    ]
    207   },
    208   {
    209    "cell_type": "code",
    210    "execution_count": 66,
    211    "metadata": {
    212     "ExecuteTime": {
    213      "end_time": "2018-12-28T00:07:55.455558Z",
    214      "start_time": "2018-12-28T00:07:55.438396Z"
    215     }
    216    },
    217    "outputs": [],
    218    "source": [
    219     "sample.stars = (sample.stars == 5).astype(int)"
    220    ]
    221   },
    222   {
    223    "cell_type": "code",
    224    "execution_count": 42,
    225    "metadata": {
    226     "ExecuteTime": {
    227      "end_time": "2018-12-28T02:32:54.195893Z",
    228      "start_time": "2018-12-28T02:32:54.187161Z"
    229     }
    230    },
    231    "outputs": [
    232     {
    233      "data": {
    234       "text/plain": [
    235        "5    100000\n",
    236        "4    100000\n",
    237        "3    100000\n",
    238        "2    100000\n",
    239        "1    100000\n",
    240        "Name: stars, dtype: int64"
    241       ]
    242      },
    243      "execution_count": 42,
    244      "metadata": {},
    245      "output_type": "execute_result"
    246     }
    247    ],
    248    "source": [
    249     "sample.stars.value_counts()"
    250    ]
    251   },
    252   {
    253    "cell_type": "code",
    254    "execution_count": 43,
    255    "metadata": {
    256     "ExecuteTime": {
    257      "end_time": "2018-12-28T02:33:04.902169Z",
    258      "start_time": "2018-12-28T02:33:03.174809Z"
    259     }
    260    },
    261    "outputs": [],
    262    "source": [
    263     "sample.to_parquet('yelp_sample_5.parquet')"
    264    ]
    265   },
    266   {
    267    "cell_type": "code",
    268    "execution_count": 12,
    269    "metadata": {
    270     "ExecuteTime": {
    271      "end_time": "2018-12-28T02:18:17.982897Z",
    272      "start_time": "2018-12-28T02:18:17.121098Z"
    273     }
    274    },
    275    "outputs": [],
    276    "source": [
    277     "sample = pd.read_parquet('yelp_sample.parquet').reset_index(drop=True)"
    278    ]
    279   },
    280   {
    281    "cell_type": "code",
    282    "execution_count": 44,
    283    "metadata": {
    284     "ExecuteTime": {
    285      "end_time": "2018-12-28T02:33:08.533674Z",
    286      "start_time": "2018-12-28T02:33:08.526044Z"
    287     }
    288    },
    289    "outputs": [
    290     {
    291      "data": {
    292       "text/html": [
    293        "<div>\n",
    294        "<style scoped>\n",
    295        "    .dataframe tbody tr th:only-of-type {\n",
    296        "        vertical-align: middle;\n",
    297        "    }\n",
    298        "\n",
    299        "    .dataframe tbody tr th {\n",
    300        "        vertical-align: top;\n",
    301        "    }\n",
    302        "\n",
    303        "    .dataframe thead th {\n",
    304        "        text-align: right;\n",
    305        "    }\n",
    306        "</style>\n",
    307        "<table border=\"1\" class=\"dataframe\">\n",
    308        "  <thead>\n",
    309        "    <tr style=\"text-align: right;\">\n",
    310        "      <th></th>\n",
    311        "      <th>stars</th>\n",
    312        "      <th>text</th>\n",
    313        "    </tr>\n",
    314        "  </thead>\n",
    315        "  <tbody>\n",
    316        "    <tr>\n",
    317        "      <th>52085</th>\n",
    318        "      <td>1</td>\n",
    319        "      <td>Just terrible.\\n\\nI used to love Chili's - it ...</td>\n",
    320        "    </tr>\n",
    321        "    <tr>\n",
    322        "      <th>527763</th>\n",
    323        "      <td>1</td>\n",
    324        "      <td>I love Cold Stone ice cream, but this location...</td>\n",
    325        "    </tr>\n",
    326        "    <tr>\n",
    327        "      <th>3797997</th>\n",
    328        "      <td>1</td>\n",
    329        "      <td>I don't understand why people give this place ...</td>\n",
    330        "    </tr>\n",
    331        "    <tr>\n",
    332        "      <th>4715860</th>\n",
    333        "      <td>1</td>\n",
    334        "      <td>Terrible disappointment.  It was a special cel...</td>\n",
    335        "    </tr>\n",
    336        "    <tr>\n",
    337        "      <th>2230375</th>\n",
    338        "      <td>1</td>\n",
    339        "      <td>Staff is awful. One called his coworker a bitc...</td>\n",
    340        "    </tr>\n",
    341        "  </tbody>\n",
    342        "</table>\n",
    343        "</div>"
    344       ],
    345       "text/plain": [
    346        "         stars                                               text\n",
    347        "52085        1  Just terrible.\\n\\nI used to love Chili's - it ...\n",
    348        "527763       1  I love Cold Stone ice cream, but this location...\n",
    349        "3797997      1  I don't understand why people give this place ...\n",
    350        "4715860      1  Terrible disappointment.  It was a special cel...\n",
    351        "2230375      1  Staff is awful. One called his coworker a bitc..."
    352       ]
    353      },
    354      "execution_count": 44,
    355      "metadata": {},
    356      "output_type": "execute_result"
    357     }
    358    ],
    359    "source": [
    360     "sample.head()"
    361    ]
    362   },
    363   {
    364    "cell_type": "code",
    365    "execution_count": 17,
    366    "metadata": {
    367     "ExecuteTime": {
    368      "end_time": "2018-12-28T02:19:00.180749Z",
    369      "start_time": "2018-12-28T02:18:56.814179Z"
    370     }
    371    },
    372    "outputs": [
    373     {
    374      "data": {
    375       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEKCAYAAAAMzhLIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XuYXNV55/vvW7e+qVvdaklIqAUtLBkQ2BagYGzGyQTGWHgyluPgE2EfB58wh5wcM4nnjM8EZh57Ep5cTGYm2H7scQYbYuJEBg/jOIpDjLl5HBJH0GAZkGShBoTUkpBa91tf6vLOH3tXqyiqq3Z3V3dd9Ps8Tz1Ve+21V63dau23116Xbe6OiIjIZGK1roCIiNQ3BQoRESlLgUJERMpSoBARkbIUKEREpCwFChERKUuBQkREylKgEBGRshQoRESkrEStK1ANCxcu9P7+/lpXQ0SkoTz33HOH3H1RpXxNESj6+/sZGBiodTVERBqKmb0eJZ9uPYmISFkKFCIiUpYChYiIlKVAISIiZSlQiIhIWQoUIiJSlgKFiIiUpUAxRXp0rIicaxQopuCvt+zl3X/4BNv3n6h1VURE5owCxRRs33+SgyfH+MR9z7Dr0OlaV0dEZE5EChRmts7MdpjZoJndUWJ/i5k9FO7fbGb9YXqvmT1lZqfM7MsF+TvNbEvB65CZfSHc90kzGy7Y96+rc6ozd3xknI5UnJw7H//6Zg6eHK11lUREZl3FQGFmceArwI3AauBmM1tdlO1W4Ki7rwTuAe4O00eBzwKfKczs7ifdfU3+BbwOfKcgy0MF+78+nRObDcdH0iztbuMb/9fPsffYCN/9yd5aV0lEZNZFaVFcDQy6+6vuPg48CKwvyrMeeCD8/DBwvZmZu59296cJAkZJZrYKWAz8/ZRrP8eOnUkzvy3JO/u66WlPsuvwmVpXSURk1kUJFMuAPQXbQ2FayTzungGOA70R63AzQQuicDjRr5jZC2b2sJktj1jOrDs+kqa7LQnABb0d7FagEJFzQJRAYSXSiseIRskzmQ3Atwq2/wbod/d3Ao9ztqXy5i80u83MBsxsYHh4OOJXzUy+RQFw4YJ2Xj+iDm0RaX5RAsUQUPhXfR+wb7I8ZpYA5gNHKhVsZu8CEu7+XD7N3Q+7+1i4+TXgqlLHuvu97r7W3dcuWlTxuRsztnHzbg6dGuONE6Ns3LybE6Np9h4dIZ3Nzfp3i4jUUpRA8SywysxWmFmKoAWwqSjPJuCW8PNNwJMebWbazby5NYGZLS3Y/BCwPUI5sy6bc8YyOdqScQB6O1LkHPYdG6lxzUREZlfFJ9y5e8bMbgceBeLA/e6+1czuAgbcfRNwH/BNMxskaElsyB9vZruALiBlZh8GbnD3beHu/wP4YNFX/paZfQjIhGV9cgbnVzWj6SwAbakgUCzoaAHg9cNnuLC3o2b1EhGZbZEeherujwCPFKV9ruDzKPDRSY7tL1PuRSXS7gTujFKvuTSSDxTJfKBIAfD6EXVoi0hz08zsiEbGg0DRHrYoOlsTJGLG7sPq0BaR5qZAEVFxiyJmRk9Hit1qUYhIk1OgiOhM2KJoDVsUEHRov665FCLS5BQoIsq3KNpTZ7t1FoQtCi09LiLNTIEiopHxDHD21hMEgeLMeJZDp8ZrVS0RkVmnQBHRyHiWVCJGPHZ2Enp+5JP6KUSkmSlQRDSSzr6pNQGFgUIjn0SkeSlQRDQynp0YGpvX057CDHVoi0hTU6CI6Ew6S2tRiyIZj7Gkq1WryIpIU1OgiKhUiwLgggXtmp0tIk1NgSKi0RJ9FADLF7QzdFSBQkSalwJFRGfGSweKvp42Dp4cYyyTrUGtRERmnwJFBKPpLJmcT6wcW2hZdxvusP/YpE97FRFpaAoUERwfSQOUDBR9Pe0ADB3VcylEpDkpUEQwESgmufUEsPeY+ilEpDkpUERw7MzkLYol81uJmVoUItK8FCgiyLco2pNvfc5TMh5j6fw29ipQiEiTUqCI4NiZYNG/Ui0KCDq01aIQkWYV6VGo57pyfRQbN+8mnc3x2qHTbNy8eyL9Y+++YM7qJyIym9SiiOD4SBoDWpKlf1zd7SmOj6TJ5vRcChFpPpEChZmtM7MdZjZoZneU2N9iZg+F+zebWX+Y3mtmT5nZKTP7ctExPwzL3BK+Fpcrq5aOj6RpTcaJmZXc39OexIETYctDRKSZVAwUZhYHvgLcCKwGbjaz1UXZbgWOuvtK4B7g7jB9FPgs8JlJiv+4u68JXwcrlFUzx0fSk/ZPQNCiADh6Rg8wEpHmE6VFcTUw6O6vuvs48CCwvijPeuCB8PPDwPVmZu5+2t2fJggYUZUsawrHV92xM+mS/RN5Pe1JAI6eUYtCRJpPlECxDNhTsD0UppXM4+4Z4DjQG6HsPwtvO322IBhMt6xZc3wkXXLl2Lz5bUmMs6OjRESaSZRAUeqv+eJe2yh5in3c3d8BvC98fWIqZZnZbWY2YGYDw8PDFb5qZvJ9FJNJxGN0tibUohCRphQlUAwBywu2+4B9k+UxswQwHzhSrlB33xu+nwQ2EtziilyWu9/r7mvdfe2iRYsinMb0jaazpOLlf1Td7Sm1KESkKUUJFM8Cq8xshZmlgA3ApqI8m4Bbws83AU+6+6QtCjNLmNnC8HMS+CXgpemUNRfSWScWK99N0tOeVGe2iDSlihPu3D1jZrcDjwJx4H5332pmdwED7r4JuA/4ppkNEvz1vyF/vJntArqAlJl9GLgBeB14NAwSceBx4GvhIZOWVSvpbI4KDQq621O8uPc4OfdJh9GKiDSiSDOz3f0R4JGitM8VfB4FPjrJsf2TFHvVJPknLatWMtkc8QoX/572FDkP5lLkh8uKiDQDzcyOIJ1z4hVuPXVriKyINCkFiggy2VzFPorutiBQHNfsbBFpMgoUFeRyTs6peOtpfrsChYg0JwWKCtK5HEDFW08tiThtybiGyIpI01GgqCCdDUbmRhnJ1N2eVItCRJqOAkUFmWy0FgUES3koUIhIs1GgqCDfoogaKI5p1JOINBkFigoy+T6KSLeeUoyks4xlsrNdLRGROaNAUUEm30cRoUUxMURWrQoRaSIKFBWkJ/ooKufNT7o7pn4KEWkiChQVTGXU03y1KESkCSlQVJBvUSQi3HrqbA0fYDSiuRQi0jwUKCrI5KL3UcRjRpeGyIpIk1GgqGBiHkXEpcM1RFZEmo0CRQXpKYx6gqBDW53ZItJMFCgqmMo8CgiGyJ4YSZPL1fShfCIiVaNAUUF6Ckt4QHDrKZNzDp9Wh7aINAcFigqmsoQHMPF0u33HRmatTiIic0mBooKpzMyGs3Mp9h9XoBCR5qBAUcF0+igA9h4bnbU6iYjMpUiBwszWmdkOMxs0sztK7G8xs4fC/ZvNrD9M7zWzp8zslJl9uSB/u5n9rZn9zMy2mtnnC/Z90syGzWxL+PrXMz/N6Zvqrae2VJxk3HTrSUSaRsVAYWZx4CvAjcBq4GYzW12U7VbgqLuvBO4B7g7TR4HPAp8pUfR/cfdLgCuAa83sxoJ9D7n7mvD19SmdUZXl51FEjBOYGd3tKYaOnpnFWomIzJ0oLYqrgUF3f9Xdx4EHgfVFedYDD4SfHwauNzNz99Pu/jRBwJjg7mfc/anw8zjwPNA3g/OYNVMd9QSwoD3F0FG1KESkOUQJFMuAPQXbQ2FayTzungGOA71RKmBm3cC/Ap4oSP4VM3vBzB42s+VRypktU731BMGkuz1H1KIQkeYQJVCUukIWzyaLkuetBZslgG8BX3L3V8PkvwH63f2dwOOcbakUH3ubmQ2Y2cDw8HClr5q2qXZmAyzoSHFiNKM1n0SkKUQJFENA4V/1fcC+yfKEF//5wJEIZd8L7HT3L+QT3P2wu4+Fm18Drip1oLvf6+5r3X3tokWLInzV9Ex1CQ+AnnAuhVoVItIMogSKZ4FVZrbCzFLABmBTUZ5NwC3h55uAJ929bIvCzH6fIKB8uih9acHmh4DtEeo4azLTuPXU0xEECnVoi0gzSFTK4O4ZM7sdeBSIA/e7+1YzuwsYcPdNwH3AN81skKAlsSF/vJntArqAlJl9GLgBOAH8R+BnwPMW3Nb5cjjC6bfM7ENAJizrk1U612lJZ3OYRXtwUd6CiRaFOrRFpPFVDBQA7v4I8EhR2ucKPo8CH53k2P5Jii155XX3O4E7o9RrLqRzOZJRnoNaoDUZo7MloRaFiDQFzcyuIJN1klO47QTBXIq+Be3s0RBZEWkCChQVZLI5ElNsUQAs72lTZ7aINAUFigrSOScZn1qLAmD5gnaGjo5QoU9fRKTuKVBUkMnmSMSm16IYSWc5dErPpRCRxqZAUUE66ySm0aLo62kHYI86tEWkwSlQVJDO5khNp49iQRAotOaTiDQ6BYoKMtNuUbQBmp0tIo1PgaKCTG56fRQdLQl6O7TcuIg0PgWKCtLZ6Y16AoK5FJqdLSINToGigkxuevMoIJxLoRaFiDQ4BYoK0hknMcWZ2Xl9Pe3sPTpCNqe5FCLSuBQoKkjncqQS0/sx9fe2k8m5np8tIg1NgaKCTHb6LYr+hR0AvHbodDWrJCIypxQoKkhPc60ngBVhoNh1WIFCRBqXAkUFmWmu9QSwuLOF9lRcLQoRaWgKFBVMd60nCJYbv7C3g10KFCLSwBQoKpjuWk95Kxa2s+uwhsiKSONSoKhgums95fX3drDnyBnS2VwVayUiMncUKCrI5Gbaouggk3MtDigiDUuBooL0DPoooGDkk/opRKRBRboCmtk6M9thZoNmdkeJ/S1m9lC4f7OZ9YfpvWb2lJmdMrMvFx1zlZm9GB7zJTOzMH2BmT1mZjvD956Zn+b0Zaa51tPGzbvZuHk3z+8+BsDDzw2xcfPualdPRGTWVQwUZhYHvgLcCKwGbjaz1UXZbgWOuvtK4B7g7jB9FPgs8JkSRX8VuA1YFb7Whel3AE+4+yrgiXC7Zmay1hNARypOSyLG4dNjVayViMjciXIFvBoYdPdX3X0ceBBYX5RnPfBA+Plh4HozM3c/7e5PEwSMCWa2FOhy9x978FDpPwc+XKKsBwrS55y7B6vHTnNmNgRDZBfOa+GwHokqIg0qSqBYBuwp2B4K00rmcfcMcBzorVDm0CRlnufu+8Oy9gOLSxVgZreZ2YCZDQwPD0c4janLhIv5JWfQogDonZfi0Cm1KESkMUW5Apb6c7p4OdQoeWaS/62Z3e9197XuvnbRokVTOTSyTDao0kxuPQH0drRw7EyaTE5DZEWk8US5Ag4Bywu2+4B9k+UxswQwHzhSocy+Sco8EN6ayt+iOhihjrMiHV7Yp7uER97CeSkcOHJat59EpPFECRTPAqvMbIWZpYANwKaiPJuAW8LPNwFPhn0PJYW3lE6a2TXhaKdfA/66RFm3FKTPuYkWxQz6KAB657UAqJ9CRBpSolIGd8+Y2e3Ao0AcuN/dt5rZXcCAu28C7gO+aWaDBC2JDfnjzWwX0AWkzOzDwA3uvg34TeAbQBvwd+EL4PPAt83sVmA38NFqnOh0ZMLZ1DO99bRwXgpA/RQi0pAqBgoAd38EeKQo7XMFn0eZ5ILu7v2TpA8Al5dIPwxcH6Ves208e/bW00xW4GhPJehsSXDghAKFiDQezcwuI3/raaajngAWd7Vw8ORo5YwiInVGgaKM/Cilmd56Aljc1crBE2Pk9PxsEWkwChRlpPMtihl2ZgOc19nKeDbHXj0/W0QajAJFGdWaRwFwXlcw8unlAydnXJaIyFxSoCgjPXHraeYtisWdrQC8fODUjMsSEZlLChRlpDPhqKcZLDOe15aK09WaYKdaFCLSYBQoyji71tPMWxQA53W18vJBBQoRaSwKFGWkqzThLm9xZwuDB09p5JOINBQFijLOzqOoXotiNJ1jz9EzVSlPRGQuKFCUMTGPogp9FBDMpQB1aItIY1GgKCNd5RbF4k4NkRWRxqNAUUa1+yhak3HOn9+qQCEiDUWBooxq91EAvH1JJzveUKAQkcahQFHG2QcXVe/HdOnSLl4ZPsV4Rk+7E5HGoEBRRrUeXFRo9dIu0llnp+ZTiEiDUKAoo9p9FACrz+8CYNu+E1UrU0RkNilQlFHtmdkA/b0dtCXjbN+vFoWINAYFijLyaz1Vax4FQDxmXLykk237j1etTBGR2aRAUUZ6FloUENx+2rbvBO5aykNE6p8CRRmZbI5EzDCrcqBY2sWJ0YweYiQiDSFSoDCzdWa2w8wGzeyOEvtbzOyhcP9mM+sv2HdnmL7DzD4Qpl1sZlsKXifM7NPhvt81s70F+z5YnVOdukzOq/IsimL5Dm31U4hII6gYKMwsDnwFuBFYDdxsZquLst0KHHX3lcA9wN3hsauBDcBlwDrgv5lZ3N13uPsad18DXAWcAf6qoLx78vvd/ZGZneL0pbO5qjyLotglSzox08gnEWkMUa6CVwOD7v6qu48DDwLri/KsBx4IPz8MXG/B/Zr1wIPuPuburwGDYXmFrgdecffXp3sSsyWTnZ0WRXsqwYreDnVoi0hDiBIolgF7CraHwrSSedw9AxwHeiMeuwH4VlHa7Wb2gpndb2Y9pSplZreZ2YCZDQwPD0c4jalLZ3NVnUNR6NLzu9i2Xy0KEal/Ua6Cpf6kLh6uM1messeaWQr4EPA/CvZ/FXgbsAbYD/zXUpVy93vdfa27r120aNHktZ+BdNZJVTlQbNy8m42bd5PO5NhzZIT7n36NjZt3V/U7RESqKcpVcAhYXrDdB+ybLI+ZJYD5wJEIx94IPO/uB/IJ7n7A3bPungO+xltvVc2ZTC43K7eeAM7vbgNg6KhGPolIfYsSKJ4FVpnZirAFsAHYVJRnE3BL+Pkm4EkPJglsAjaEo6JWAKuAZwqOu5mi205mtrRg85eBl6KeTLVlsl7VdZ4KXbCgnZjBa4f0ECMRqW+JShncPWNmtwOPAnHgfnffamZ3AQPuvgm4D/immQ0StCQ2hMduNbNvA9uADPApd88CmFk78H7gN4q+8o/NbA3BLapdJfbPmXQ2V9WVYwu1JuOc393Gq4dOz0r5IiLVUjFQAIRDVB8pSvtcwedR4KOTHPsHwB+USD9D0OFdnP6JKHWaC7M1jyLvooUd/MPgYS05LiJ1TTOzy0hnc1Vd56nYioXzyLqz+8iZWfsOEZGZUqAoI53NVX3UU6H+XvVTiEj9U6AoY7Ym3OW1JOMsUz+FiNQ5BYoy0jmftQl3eSsWzmPoyAgj49lZ/R4RkelSoCgjk82RnKXhsXkXLeog687zu4/O6veIiEyXAkUZs33rCeDCcD7Fj185PKvfIyIyXQoUZczmWk95Lck4fT3t/Gjn7KxXJSIyUwoUZaRzszvqKe/t53XywtBxhk+Ozfp3iYhMlQJFGbO5hEehi5d0AvCjl9WqEJH6o0BRRjo7+6OeAJbOb2XhvBZ+qEAhInVIgaKMTC5HcpY7swFiZvzC2xfxo5eHyeaKV3AXEaktBYoygltPc/Mj+sVLFnF8JM2WPRomKyL1RYGijPHs3LQoAN63chExgx/u0O0nEakvkVaPPVdlZnGZ8WJ/++J+lve0853n97J0fttE+sfefcGcfL+IyGTUophELufknFmfcFfo4iWd7D02wonR9Jx9p4hIJQoUk0jngmdEzFWLAuCSJV0A7Nh/cs6+U0SkEgWKSWSyweijuZhHkXdeVwvd7Um2v3Fizr5TRKQSBYpJTASKOWxRmBmXLOnileFTpLN66p2I1AcFikmMZ/O3nuauRQFw6ZJO0lnnlYN6mJGI1IdIgcLM1pnZDjMbNLM7SuxvMbOHwv2bzay/YN+dYfoOM/tAQfouM3vRzLaY2UBB+gIze8zMdobvPTM7xenJ1KCPAmDFwg5SiRjb31A/hYjUh4rDY80sDnwFeD8wBDxrZpvcfVtBtluBo+6+0sw2AHcDv2pmq4ENwGXA+cDjZvZ2d88/pecX3f1Q0VfeATzh7p8Pg9IdwO/M4BynZOPm3QAcOT0OwMCuo/gcTpZOxGOsWjyPHW+cwP38uftiEZFJRPlz+Wpg0N1fdfdx4EFgfVGe9cAD4eeHgevNzML0B919zN1fAwbD8sopLOsB4MMR6lh1uXApjTluUABw6ZIuToxm2HdsdO6/XESkSJTL4DJgT8H2UJhWMo+7Z4DjQG+FYx34gZk9Z2a3FeQ5z933h2XtBxZHO5XqyobNiJjNbR8FBPMpYgYv7j0+598tIlIsyszsUlfK4psxk+Upd+y17r7PzBYDj5nZz9z9RxHqE3xhEFxuA7jggurPXs5OtCjmPlB0tCRYtbiTnw4dI5dzYjWog4hIXpQWxRCwvGC7D9g3WR4zSwDzgSPljnX3/PtB4K84e0vqgJktDctaChwsVSl3v9fd17r72kWLFkU4jamZCBQ1aFEArLmgm+MjaTa/dqQm3y8ikhclUDwLrDKzFWaWIuic3lSUZxNwS/j5JuBJd/cwfUM4KmoFsAp4xsw6zKwTwMw6gBuAl0qUdQvw19M7tZnJee1aFBD0U6QSMb77k701+X4RkbyKgSLsc7gdeBTYDnzb3bea2V1m9qEw231Ar5kNAv8fwUgl3H0r8G1gG/B94FPhiKfzgKfN7KfAM8Dfuvv3w7I+D7zfzHYSjLT6fHVOdWryLYpa3fZJJWJcfn4Xj7y4n9F0tvIBIiKzJNLqse7+CPBIUdrnCj6PAh+d5Ng/AP6gKO1V4F2T5D8MXB+lXrOp1reeANYs7+H53cd4YvtB/uU7l9asHiJybtPM7EmMZYIJd6lE7X5EFy3qYHFnC995fqhmdRARUaCYxHgYKFpqGChiZtx0VR9P7TjI0NEzNauHiJzbFCgmMZYJ+gVakvGa1uPj11wIwDf/6fWa1kNEzl0KFJMYq4MWBcCy7jZuWL2Eh57do05tEakJBYpJjGVyxGxun0cxmVve28+xM2k2bSmeviIiMvsUKCYxlsmRSsSwGo56yrvmogVcfF4n3/jHXfhcrlAoIkLE4bHnovFMlpZEbfsn4Oxqtpcu7eK7W/byh4/8jBULO/jYu6u/bImISClqUUxiLJOref9EoTXLu2lPxXl653CtqyIi55j6uRLWmfE6CxSpRIxrLupl+xsnGT45VuvqiMg5pH6uhHUmaFHU/tZToWsu6iURM54eLH7Wk4jI7FGgmMRYJlvTWdmlzGtJcOUFPfxk91EOnVKrQkTmRn1dCetIvfVR5F27ciGZnPNn//BarasiIueI+rsS1omxdK7uWhQAizpbeMey+dz/9C4OntCjUkVk9tXflbBOjNdhH0XeDavPI5PLcc/jO2tdFRE5ByhQlJDJ5si605Kszx9P77wWPv7uC/n2wB4GD56sdXVEpMnV55Wwxuplnady/s11K2lLxrn7+ztqXRURaXKamV1CIwSKR7ce4L1v6+UH2w7wh3+7nf6FHQCasS0iVVe/V8IaGp94aFF99lHkvfdtC+lqTfB3L+3XGlAiMmsUKEqYeBZFHbcoIJit/S8uPY89R0fYuu9ErasjIk2qvq+ENdIIt57yrrigh8WdLTy69Y2J53yLiFRTpCuhma0zsx1mNmhmd5TY32JmD4X7N5tZf8G+O8P0HWb2gTBtuZk9ZWbbzWyrmf12Qf7fNbO9ZrYlfH1w5qc5NWcDRX3fegKIx4x1ly3h8OlxLRgoIrOiYqAwszjwFeBGYDVws5mtLsp2K3DU3VcC9wB3h8euBjYAlwHrgP8WlpcB/p27XwpcA3yqqMx73H1N+HpkRmc4DeMNcusp7+IlnVx+fhePbz/IS3uP17o6ItJkolwJrwYG3f1Vdx8HHgTWF+VZDzwQfn4YuN6CJ/6sBx509zF3fw0YBK529/3u/jyAu58EtgPLZn461TE20ZndGIHCzPjwmmW0t8T59ENb9MhUEamqKFfCZcCegu0h3npRn8jj7hngONAb5djwNtUVwOaC5NvN7AUzu9/MeiLUsaoaqY8ir70lwU1X9jF48BR3fW+bRkGJSNVEuRKWehZo8VVosjxljzWzecD/BD7t7vlhO18F3gasAfYD/7VkpcxuM7MBMxsYHq7uvfmxdI64GYl44wQKgFXndfIbv3ARGzfv5ktPDNa6OiLSJKJcCYeA5QXbfcC+yfKYWQKYDxwpd6yZJQmCxF+6+3fyGdz9gLtn3T0HfI3g1tdbuPu97r7W3dcuWrQowmlEV49LjEf1Ox+4hJuu6uOex1/m/qe1wqyIzFyUq+GzwCozW2FmKYLO6U1FeTYBt4SfbwKe9ODexyZgQzgqagWwCngm7L+4D9ju7n9SWJCZLS3Y/GXgpame1EyNZ3J1u85TJbGY8fmPvIMPXHYed31vG997oTimi4hMTcUlPNw9Y2a3A48CceB+d99qZncBA+6+ieCi/00zGyRoSWwIj91qZt8GthGMdPqUu2fN7J8BnwBeNLMt4Vf9h3CE0x+b2RqCW1S7gN+o4vlGUq/Poohi4+bdQDBr+2f7T/LpB7ewde8JfufGS2pcMxFpVNYMnZ5r1671gYGBqpS1cfNu7n/6NcazOf6fX3hbVcqslVNjGb76w0HSWefRf/vzLOtuq3WVRKSOmNlz7r62Ur7G/LN5lo1lsg3boig0ryXBr72nn3Q2xyfvf4ZjZ8ZrXSURaUCNfzWcBWOZ+ny63XSc19XKJ665kNcPn+HXv/EsZ8Yzta6SiDSY5rgaVtlYHT/dbjouWjSPL928hi17jvGbf/G8JuSJyJQoUJTQyMNjJ7Pu8qX80Ufewf96eZhbH3iW02NqWYhINM11NawCdw+fl91cP5qNm3eTzcFHr+rjHwcPc+MX/557f/RqraslIg1AT7grksk5OW+s5Tum4ooLekjGYzw0sIc/eWwHbak4H7v6AuKxUpPoRUTUoniLRlznaaouXzaff3PdSs7vbuOz332Jm/70H9l16HStqyUidUotiiJj6fwS483TmV3K4s5Wbr12BVv2HONvXtjHDff8iH/5zqWsvbCHj19zYa2rJyJ1RIGiSKMtMT4TZsYVF/SwYmEHDz8/xF/9ZC8Du45w8ZJO1vYvqHX1RKRONP/VcIrG87eeGnStp+nobk/x69eu4Feu7OP4SJqb/vTH/OZEkGrMAAAMrklEQVRfPKfbUSICqEXxFo30GNRqiplx1YU9vGPZfE6MpvnT//UKj28/wK/+3HJ+5co+1izvJljLUUTONQoURcYa7DGo1ZZKxFg4r4Xfvn4Vj28/wLee2cNf/NNuFnSk+L/fdxE3X72c7vZUraspInNIgaLI+Dkw6imKztYkv3xFHzdevpSt+07w/O6j3P39n/HFJ17mI1f28evX9rNycWetqykic0CBosi5eutpMq3JOFdd2MNVF/aw//gIP37lMN9+dg8bN+9m1eJ5vPdtC/lP/2o1Mc3DEGlaChRF8reezoVRT1O1dH4bH7myjxsuW8Izrx1h82uHeeDHu/j7wWE+9K7zuXblQtYs7ybZYI+QFZHyFCiKjGVyJGKmmcplzGtJcN0li/n5ty/kpb3HeWX4NF98YidfeHwnLYkYq8/v4p3L5vOety3kvSt76WpN1rrKIjIDChRFmmmJ8dmWiMVYs7yHNct7uPHyJbw6fJrXD59m77FRvvXMHh748evEY8YVy7v5+bcv4n2rFvLOvm4FYZEGo0BRpBkXBJwL7akEly+bz+XL5gOQzTm7j5xh58GT7Dxwinsee5k/eexl2pJxrrt0MSsXzWPhvBTnd7dx+bL5nNfVWuMzEJHJKFAUSGdzvDp8iqXz9cjQmYrHjBULO1ixsIMbVsPpsQyDw6cYPHCKp3ce4pEX9lP4EN7O1gRX9y/g8mXzuXRpFysXd3DBgg617kTqgAJFgce2HeDEaIb1a7R8RbV1tCR4V1837+rrBoIWx5nxDEdOj7P32Ah7j46w+8gZntxxkMLHuLen4sxrSbB8QTurFs/jgt52ejtSLOhooa+njeUL2pnXol9jkdkU6X+Yma0DvgjEga+7++eL9rcAfw5cBRwGftXdd4X77gRuBbLAb7n7o+XKNLMVwIPAAuB54BPuPicPe/7zH++iuz3JxUs0P2C2xWNGZ2uSztYkF/Z2TKSPZbIMnxxj+OQYR86MM5bOMTKe5fCpcX62fx+nx9/6dL72VJzO1gQ97Sn6ezu4aFEHiztb6G5PMb89yfy2JN1twfv8tiQJjcoSmZKKgcLM4sBXgPcDQ8CzZrbJ3bcVZLsVOOruK81sA3A38KtmthrYAFwGnA88bmZvD4+ZrMy7gXvc/UEz+9Ow7K9W42TLefnASf7p1SN84LIlxLRURc20JOL09bTT19Necv94JseZ8QynxoLWyJHT45weyzCWyXFqLMPA60f5wbY3yHnJwwHobEnQ1Zakqy1JeypOeypOWzJ8T8VpSyboaInT056id16K1uTZOTVGsJhiazJGR0uCeS0J2lNxOlIJUokYqUSMRMy03Ik0lSgtiquBQXd/FcDMHgTWA4WBYj3wu+Hnh4EvW/A/ZT3woLuPAa+Z2WBYHqXKNLPtwHXAx8I8D4Tlzkqg2LbvBC/uPcZl58/nLzfvJpWIsfbCntn4KqmS4GKcors9NWkwyeackXSW0fEsZ9JZRsazjKQznBnPMpLfHs8yms5y7Mw4B0/kSGed8WyOdCbHeDbHeCZHmVhTuZ7xGMm4kUzESMZjJGNGPG7ELRh6nYjFgvd4fvvN6a3JGO2pBG2pOO3JOK3JOBODxcIgZARrdMUMYjE7+9kMC9/jMaMlEQS1lkQMJ3iKozvkHHLu5MJtx8nlmDhvD+8Bvunn4Pm3cJ+/ZddEmhccaRjJuJFKxGgJA2oqHp9IS8RimAUtzfyrIxUE7JgZmZyDQzwe/KxiFr5XaQSd+5v/tacS6N39TfndnWzOiRf8wZDO5nCHZDxIy4W/o/l/HzNjLJPl9FiWlkSM9lQcdzg+kubkaIb5bUm62hKMpnPsPTbCqbEM589vZeG8ljmZ7BolUCwD9hRsDwHvniyPu2fM7DjQG6b/U9Gxy8LPpcrsBY65e6ZE/qr7wbY3+MLjOye2P3LlMjp0v7vhxWPGvPCv/enKuTOaznJqLEMm++aLiHvwH38sk2Msk2U8E3zO5pxseJHIvzI5J5vLkcsFZWY9eIJiLue4O2NpP5ueO3vhTofBajzrpDM50tlgxYCZBK9mZMZE4HCAfMDzMCDm8xFc/GMWBC0zJoJpJucTF/JSCoNvYRmOk86eDQrJuJHzs8sAxSz4wyaTDX4P8mnJeGxiBYh8WiIemzgOmBhCni1oGsdj9qZtCP4g+b31l3Hz1RfM6OdYSZT/SaXCVfGPdLI8k6WXuklcLv9bK2V2G3BbuHnKzHaUyjcV9wRvC4FDMy2rhlT/2lL9a6eR6w7TrP/H/vDsLZhpiPSUsiiBYghYXrDdB+ybJM+QmSWA+cCRCseWSj8EdJtZImxVlPouANz9XuDeCPWfEjMbcPe11S53rqj+taX6104j1x3qu/5Rhn88C6wysxVmliLonN5UlGcTcEv4+SbgSQ9u+m0CNphZSziaaRXwzGRlhsc8FZZBWOZfT//0RERkpiq2KMI+h9uBRwmGst7v7lvN7C5gwN03AfcB3ww7q48QXPgJ832boOM7A3zK3bMApcoMv/J3gAfN7PeBn4Rli4hIjVhxb/+5zsxuC29rNSTVv7ZU/9pp5LpDfddfgUJERMrSFFURESlLgaKAma0zsx1mNmhmd9S6PqWY2f1mdtDMXipIW2Bmj5nZzvC9J0w3M/tSeD4vmNmVtas5mNlyM3vKzLab2VYz++0Gq3+rmT1jZj8N6/97YfoKM9sc1v+hcIAG4SCOh8L6bzaz/lrWP8/M4mb2EzP7XrjdMPU3s11m9qKZbTGzgTCtUX5/us3sYTP7Wfh/4D2NUncFipCdXarkRmA1cLMFS5DUm28A64rS7gCecPdVwBPhNgTnsip83cYcLIVSQQb4d+5+KXAN8KnwZ9wo9R8DrnP3dwFrgHVmdg1nl51ZBRwlWHYGCpa2IZimc3cN6lzKbwPbC7Ybrf6/6O5rCoaSNsrvzxeB77v7JcC7CP4NGqPuwXR+vYD3AI8WbN8J3Fnrek1S137gpYLtHcDS8PNSYEf4+b8DN5fKVw8vgqHP72/E+gPtBItWvptg/k+i+PeIYFTfe8LPiTCf1bjefQQXpOuA7xFMcm2k+u8CFhal1f3vD9AFvFb882uEuru7WhQFSi1VMmvLh1TZee6+HyB8Xxym1+05hbcxrgA200D1D2/bbAEOAo8BrzD5sjNvWtoGyC9tU0tfAP49kF8votyyOfVYfwd+YGbPWbA6AzTG789FwDDwZ+Ftv6+bWQeNUXcFigKRlw9pIHV5TmY2D/ifwKfd/US5rCXSalp/d8+6+xqCv8yvBi4tlS18r6v6m9kvAQfd/bnC5BJZ67L+oWvd/UqCWzOfMrOfL5O3nuqfAK4EvuruVwCnOXubqZR6qrsCRYEoS5XUqwNmthQgfD8YptfdOZlZkiBI/KW7fydMbpj657n7MeCHBH0t3RYsXQNvruNE/e3NS9vUyrXAh8xsF8EzX64jaGE0Sv1x933h+0HgrwiCdSP8/gwBQ+6+Odx+mCBwNELdFSgKRFmqpF4VLqFSuOzJJuDXwhEU1wDH883cWjAzI5hpv93d/6RgV6PUf5GZdYef24B/QdAhOdmyM5MtbVMT7n6nu/e5ez/B7/eT7v5xGqT+ZtZhZp35z8ANwEs0wO+Pu78B7DGzi8Ok6wlWrKj7ugPqzC58AR8EXia47/wfa12fSer4LWA/kCb4q+NWgvvGTwA7w/cFYV4jGMn1CvAisLbGdf9nBM3nF4At4euDDVT/dxIsK/MCwQXqc2H6RQRrmA0C/wNoCdNbw+3BcP9Ftf79KTiXfw58r5HqH9bzp+Fra/7/aAP9/qwBBsLfn+8CPY1Sd83MFhGRsnTrSUREylKgEBGRshQoRESkLAUKEREpS4FCRETKUqAQmaZwNdD/d5rH9pvZx6pdJ5HZoEAhMn3dwLQCBcHCjgoU0hAUKESm7/PA28JnI/xnM/v/zezZ8PkB+WdV/Fy43RrOLN5qZpeHx74vPPbf1vQsRCrQhDuRaQpXwP2eu19uZjcQLHPxGwSzajcBf+zuPzKz3yeY5dxGsN7PH5nZPwc+4+6/VJPKi0xBonIWEYnghvD1k3B7HsFDZ34E3EWwltgo8Fs1qZ3IDChQiFSHAX/k7v+9xL4FBIEjSdCyOD2XFROZKfVRiEzfSaAz/Pwo8OvhszYws2Vmln8Izb3AZ4G/5OzjRAuPFalralGITJO7HzazfzCzl4C/AzYCPw5WU+cU8H+a2Tog4+4bw+ey/6OZXQf8PZAxs58C33D3e2p0GiIVqTNbRETK0q0nEREpS4FCRETKUqAQEZGyFChERKQsBQoRESlLgUJERMpSoBARkbIUKEREpKz/DecErv0c3J59AAAAAElFTkSuQmCC\n",
    376       "text/plain": [
    377        "<Figure size 432x288 with 1 Axes>"
    378       ]
    379      },
    380      "metadata": {
    381       "needs_background": "light"
    382      },
    383      "output_type": "display_data"
    384     }
    385    ],
    386    "source": [
    387     "sns.distplot(sample.text.str.split().str.len());"
    388    ]
    389   },
    390   {
    391    "cell_type": "markdown",
    392    "metadata": {},
    393    "source": [
    394     "## Doc2Vec"
    395    ]
    396   },
    397   {
    398    "cell_type": "markdown",
    399    "metadata": {},
    400    "source": [
    401     "### Basic text cleaning"
    402    ]
    403   },
    404   {
    405    "cell_type": "code",
    406    "execution_count": 45,
    407    "metadata": {
    408     "ExecuteTime": {
    409      "end_time": "2018-12-28T02:33:19.647682Z",
    410      "start_time": "2018-12-28T02:33:19.644233Z"
    411     }
    412    },
    413    "outputs": [],
    414    "source": [
    415     "tokenizer = RegexpTokenizer(r'\\w+')\n",
    416     "stopword_set = set(stopwords.words('english'))\n",
    417     "\n",
    418     "def clean(review):\n",
    419     "    tokens = tokenizer.tokenize(review)\n",
    420     "    return ' '.join([t for t in tokens if t not in stopword_set])"
    421    ]
    422   },
    423   {
    424    "cell_type": "code",
    425    "execution_count": 46,
    426    "metadata": {
    427     "ExecuteTime": {
    428      "end_time": "2018-12-28T02:33:35.354851Z",
    429      "start_time": "2018-12-28T02:33:20.198492Z"
    430     }
    431    },
    432    "outputs": [],
    433    "source": [
    434     "sample.text = sample.text.str.lower().apply(clean)"
    435    ]
    436   },
    437   {
    438    "cell_type": "code",
    439    "execution_count": 47,
    440    "metadata": {
    441     "ExecuteTime": {
    442      "end_time": "2018-12-28T02:33:35.368047Z",
    443      "start_time": "2018-12-28T02:33:35.355960Z"
    444     }
    445    },
    446    "outputs": [
    447     {
    448      "data": {
    449       "text/html": [
    450        "<div>\n",
    451        "<style scoped>\n",
    452        "    .dataframe tbody tr th:only-of-type {\n",
    453        "        vertical-align: middle;\n",
    454        "    }\n",
    455        "\n",
    456        "    .dataframe tbody tr th {\n",
    457        "        vertical-align: top;\n",
    458        "    }\n",
    459        "\n",
    460        "    .dataframe thead th {\n",
    461        "        text-align: right;\n",
    462        "    }\n",
    463        "</style>\n",
    464        "<table border=\"1\" class=\"dataframe\">\n",
    465        "  <thead>\n",
    466        "    <tr style=\"text-align: right;\">\n",
    467        "      <th></th>\n",
    468        "      <th>stars</th>\n",
    469        "      <th>text</th>\n",
    470        "    </tr>\n",
    471        "  </thead>\n",
    472        "  <tbody>\n",
    473        "    <tr>\n",
    474        "      <th>3713191</th>\n",
    475        "      <td>1</td>\n",
    476        "      <td>called 938 placed order informer ian manager a...</td>\n",
    477        "    </tr>\n",
    478        "    <tr>\n",
    479        "      <th>3632813</th>\n",
    480        "      <td>3</td>\n",
    481        "      <td>ok best tip sell stuff buffalo exchange sharin...</td>\n",
    482        "    </tr>\n",
    483        "    <tr>\n",
    484        "      <th>1414414</th>\n",
    485        "      <td>5</td>\n",
    486        "      <td>afford rooms well worth money absolutely amazi...</td>\n",
    487        "    </tr>\n",
    488        "    <tr>\n",
    489        "      <th>4609094</th>\n",
    490        "      <td>3</td>\n",
    491        "      <td>little bit pricier nw competition peak hours d...</td>\n",
    492        "    </tr>\n",
    493        "    <tr>\n",
    494        "      <th>4996179</th>\n",
    495        "      <td>4</td>\n",
    496        "      <td>great pigging comfort food visiting great frie...</td>\n",
    497        "    </tr>\n",
    498        "    <tr>\n",
    499        "      <th>1826950</th>\n",
    500        "      <td>5</td>\n",
    501        "      <td>went sun auto fri mar 9th dealt patrick mantan...</td>\n",
    502        "    </tr>\n",
    503        "    <tr>\n",
    504        "      <th>4210188</th>\n",
    505        "      <td>5</td>\n",
    506        "      <td>went nail salon must say impressed level custo...</td>\n",
    507        "    </tr>\n",
    508        "    <tr>\n",
    509        "      <th>1354353</th>\n",
    510        "      <td>5</td>\n",
    511        "      <td>rita must love custard black cherry little bit...</td>\n",
    512        "    </tr>\n",
    513        "    <tr>\n",
    514        "      <th>2760</th>\n",
    515        "      <td>1</td>\n",
    516        "      <td>drittes goa pfaffing erlebt absolut nix unterh...</td>\n",
    517        "    </tr>\n",
    518        "    <tr>\n",
    519        "      <th>1118726</th>\n",
    520        "      <td>1</td>\n",
    521        "      <td>visited week ago im finally writing review pla...</td>\n",
    522        "    </tr>\n",
    523        "  </tbody>\n",
    524        "</table>\n",
    525        "</div>"
    526       ],
    527       "text/plain": [
    528        "         stars                                               text\n",
    529        "3713191      1  called 938 placed order informer ian manager a...\n",
    530        "3632813      3  ok best tip sell stuff buffalo exchange sharin...\n",
    531        "1414414      5  afford rooms well worth money absolutely amazi...\n",
    532        "4609094      3  little bit pricier nw competition peak hours d...\n",
    533        "4996179      4  great pigging comfort food visiting great frie...\n",
    534        "1826950      5  went sun auto fri mar 9th dealt patrick mantan...\n",
    535        "4210188      5  went nail salon must say impressed level custo...\n",
    536        "1354353      5  rita must love custard black cherry little bit...\n",
    537        "2760         1  drittes goa pfaffing erlebt absolut nix unterh...\n",
    538        "1118726      1  visited week ago im finally writing review pla..."
    539       ]
    540      },
    541      "execution_count": 47,
    542      "metadata": {},
    543      "output_type": "execute_result"
    544     }
    545    ],
    546    "source": [
    547     "sample.sample(n=10)"
    548    ]
    549   },
    550   {
    551    "cell_type": "code",
    552    "execution_count": 48,
    553    "metadata": {
    554     "ExecuteTime": {
    555      "end_time": "2018-12-28T02:33:57.874953Z",
    556      "start_time": "2018-12-28T02:33:55.863246Z"
    557     }
    558    },
    559    "outputs": [
    560     {
    561      "name": "stdout",
    562      "output_type": "stream",
    563      "text": [
    564       "<class 'pandas.core.frame.DataFrame'>\n",
    565       "Int64Index: 485681 entries, 52085 to 3365007\n",
    566       "Data columns (total 2 columns):\n",
    567       "stars    485681 non-null int64\n",
    568       "text     485681 non-null object\n",
    569       "dtypes: int64(1), object(1)\n",
    570       "memory usage: 11.1+ MB\n"
    571      ]
    572     }
    573    ],
    574    "source": [
    575     "sample = sample[sample.text.str.split().str.len()>10]\n",
    576     "sample.info()"
    577    ]
    578   },
    579   {
    580    "cell_type": "markdown",
    581    "metadata": {},
    582    "source": [
    583     "### Create sentence stream"
    584    ]
    585   },
    586   {
    587    "cell_type": "code",
    588    "execution_count": 49,
    589    "metadata": {
    590     "ExecuteTime": {
    591      "end_time": "2018-12-28T02:34:06.903097Z",
    592      "start_time": "2018-12-28T02:34:01.100396Z"
    593     }
    594    },
    595    "outputs": [],
    596    "source": [
    597     "sentences = []\n",
    598     "for i, (_, text) in enumerate(sample.values):\n",
    599     "    sentences.append(TaggedDocument(words=text.split(), tags=[i]))"
    600    ]
    601   },
    602   {
    603    "cell_type": "markdown",
    604    "metadata": {},
    605    "source": [
    606     "### Formulate the model"
    607    ]
    608   },
    609   {
    610    "cell_type": "code",
    611    "execution_count": 50,
    612    "metadata": {
    613     "ExecuteTime": {
    614      "end_time": "2018-12-28T02:34:06.906431Z",
    615      "start_time": "2018-12-28T02:34:06.904273Z"
    616     }
    617    },
    618    "outputs": [],
    619    "source": [
    620     "size=300\n",
    621     "window=5\n",
    622     "min_count=0\n",
    623     "epochs=5\n",
    624     "negative=5\n",
    625     "dm = 1\n",
    626     "dm_concat=0\n",
    627     "dbow_words=0\n",
    628     "workers = 8"
    629    ]
    630   },
    631   {
    632    "cell_type": "code",
    633    "execution_count": null,
    634    "metadata": {},
    635    "outputs": [],
    636    "source": [
    637     "model = Doc2Vec(documents=sentences,\n",
    638     "                dm=1,\n",
    639     "                size=size,\n",
    640     "                window=window,\n",
    641     "                min_count=min_count,\n",
    642     "                workers=workers,\n",
    643     "                epochs=epochs,\n",
    644     "                negative=negative,\n",
    645     "                dm_concat=dm_concat,\n",
    646     "                dbow_words=dbow_words)"
    647    ]
    648   },
    649   {
    650    "cell_type": "code",
    651    "execution_count": 51,
    652    "metadata": {
    653     "ExecuteTime": {
    654      "end_time": "2018-12-28T02:37:01.442409Z",
    655      "start_time": "2018-12-28T02:34:07.761377Z"
    656     }
    657    },
    658    "outputs": [],
    659    "source": [
    660     "model = Doc2Vec(documents=sentences,\n",
    661     "                dm=dm,\n",
    662     "                size=size,\n",
    663     "                window=window,\n",
    664     "                min_count=min_count,\n",
    665     "                workers=workers,\n",
    666     "                epochs=epochs,\n",
    667     "                negative=negative,\n",
    668     "                dm_concat=dm_concat,\n",
    669     "                dbow_words=dbow_words)"
    670    ]
    671   },
    672   {
    673    "cell_type": "code",
    674    "execution_count": 90,
    675    "metadata": {
    676     "ExecuteTime": {
    677      "end_time": "2018-12-28T01:56:03.959942Z",
    678      "start_time": "2018-12-28T01:52:41.532880Z"
    679     }
    680    },
    681    "outputs": [],
    682    "source": [
    683     "model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)"
    684    ]
    685   },
    686   {
    687    "cell_type": "code",
    688    "execution_count": 52,
    689    "metadata": {
    690     "ExecuteTime": {
    691      "end_time": "2018-12-28T02:37:01.604728Z",
    692      "start_time": "2018-12-28T02:37:01.443607Z"
    693     }
    694    },
    695    "outputs": [
    696     {
    697      "data": {
    698       "text/html": [
    699        "<div>\n",
    700        "<style scoped>\n",
    701        "    .dataframe tbody tr th:only-of-type {\n",
    702        "        vertical-align: middle;\n",
    703        "    }\n",
    704        "\n",
    705        "    .dataframe tbody tr th {\n",
    706        "        vertical-align: top;\n",
    707        "    }\n",
    708        "\n",
    709        "    .dataframe thead th {\n",
    710        "        text-align: right;\n",
    711        "    }\n",
    712        "</style>\n",
    713        "<table border=\"1\" class=\"dataframe\">\n",
    714        "  <thead>\n",
    715        "    <tr style=\"text-align: right;\">\n",
    716        "      <th></th>\n",
    717        "      <th>token</th>\n",
    718        "      <th>similarity</th>\n",
    719        "    </tr>\n",
    720        "  </thead>\n",
    721        "  <tbody>\n",
    722        "    <tr>\n",
    723        "      <th>0</th>\n",
    724        "      <td>great</td>\n",
    725        "      <td>0.869434</td>\n",
    726        "    </tr>\n",
    727        "    <tr>\n",
    728        "      <th>1</th>\n",
    729        "      <td>decent</td>\n",
    730        "      <td>0.824517</td>\n",
    731        "    </tr>\n",
    732        "    <tr>\n",
    733        "      <th>2</th>\n",
    734        "      <td>ok</td>\n",
    735        "      <td>0.759463</td>\n",
    736        "    </tr>\n",
    737        "    <tr>\n",
    738        "      <th>3</th>\n",
    739        "      <td>bad</td>\n",
    740        "      <td>0.749852</td>\n",
    741        "    </tr>\n",
    742        "    <tr>\n",
    743        "      <th>4</th>\n",
    744        "      <td>amazing</td>\n",
    745        "      <td>0.748687</td>\n",
    746        "    </tr>\n",
    747        "    <tr>\n",
    748        "      <th>5</th>\n",
    749        "      <td>awesome</td>\n",
    750        "      <td>0.733886</td>\n",
    751        "    </tr>\n",
    752        "    <tr>\n",
    753        "      <th>6</th>\n",
    754        "      <td>okay</td>\n",
    755        "      <td>0.719382</td>\n",
    756        "    </tr>\n",
    757        "    <tr>\n",
    758        "      <th>7</th>\n",
    759        "      <td>tasty</td>\n",
    760        "      <td>0.707188</td>\n",
    761        "    </tr>\n",
    762        "    <tr>\n",
    763        "      <th>8</th>\n",
    764        "      <td>nice</td>\n",
    765        "      <td>0.702621</td>\n",
    766        "    </tr>\n",
    767        "    <tr>\n",
    768        "      <th>9</th>\n",
    769        "      <td>delicious</td>\n",
    770        "      <td>0.692123</td>\n",
    771        "    </tr>\n",
    772        "  </tbody>\n",
    773        "</table>\n",
    774        "</div>"
    775       ],
    776       "text/plain": [
    777        "       token  similarity\n",
    778        "0      great    0.869434\n",
    779        "1     decent    0.824517\n",
    780        "2         ok    0.759463\n",
    781        "3        bad    0.749852\n",
    782        "4    amazing    0.748687\n",
    783        "5    awesome    0.733886\n",
    784        "6       okay    0.719382\n",
    785        "7      tasty    0.707188\n",
    786        "8       nice    0.702621\n",
    787        "9  delicious    0.692123"
    788       ]
    789      },
    790      "execution_count": 52,
    791      "metadata": {},
    792      "output_type": "execute_result"
    793     }
    794    ],
    795    "source": [
    796     "pd.DataFrame(model.most_similar('good'), columns=['token', 'similarity'])"
    797    ]
    798   },
    799   {
    800    "cell_type": "markdown",
    801    "metadata": {},
    802    "source": [
    803     "## Persist Model"
    804    ]
    805   },
    806   {
    807    "cell_type": "code",
    808    "execution_count": 53,
    809    "metadata": {
    810     "ExecuteTime": {
    811      "end_time": "2018-12-28T02:37:02.197070Z",
    812      "start_time": "2018-12-28T02:37:01.605570Z"
    813     }
    814    },
    815    "outputs": [],
    816    "source": [
    817     "model.save('sample5.model')"
    818    ]
    819   },
    820   {
    821    "cell_type": "code",
    822    "execution_count": 6,
    823    "metadata": {
    824     "ExecuteTime": {
    825      "end_time": "2018-12-28T00:54:04.864287Z",
    826      "start_time": "2018-12-28T00:54:03.581152Z"
    827     }
    828    },
    829    "outputs": [],
    830    "source": [
    831     "model = Doc2Vec.load('sample.model')"
    832    ]
    833   },
    834   {
    835    "cell_type": "markdown",
    836    "metadata": {},
    837    "source": [
    838     "## Evaluate"
    839    ]
    840   },
    841   {
    842    "cell_type": "code",
    843    "execution_count": 62,
    844    "metadata": {
    845     "ExecuteTime": {
    846      "end_time": "2018-12-28T02:38:50.845013Z",
    847      "start_time": "2018-12-28T02:38:50.804633Z"
    848     }
    849    },
    850    "outputs": [],
    851    "source": [
    852     "y = sample.stars.sub(1)"
    853    ]
    854   },
    855   {
    856    "cell_type": "code",
    857    "execution_count": 55,
    858    "metadata": {
    859     "ExecuteTime": {
    860      "end_time": "2018-12-28T02:37:03.062359Z",
    861      "start_time": "2018-12-28T02:37:02.201730Z"
    862     }
    863    },
    864    "outputs": [],
    865    "source": [
    866     "X = np.zeros(shape=(len(y), size))\n",
    867     "for i in range(len(sample)):\n",
    868     "    X[i] = model.docvecs[i]"
    869    ]
    870   },
    871   {
    872    "cell_type": "code",
    873    "execution_count": 56,
    874    "metadata": {
    875     "ExecuteTime": {
    876      "end_time": "2018-12-28T02:37:03.065414Z",
    877      "start_time": "2018-12-28T02:37:03.063244Z"
    878     }
    879    },
    880    "outputs": [
    881     {
    882      "data": {
    883       "text/plain": [
    884        "(485681, 300)"
    885       ]
    886      },
    887      "execution_count": 56,
    888      "metadata": {},
    889      "output_type": "execute_result"
    890     }
    891    ],
    892    "source": [
    893     "X.shape"
    894    ]
    895   },
    896   {
    897    "cell_type": "markdown",
    898    "metadata": {},
    899    "source": [
    900     "### Train-Test Split"
    901    ]
    902   },
    903   {
    904    "cell_type": "code",
    905    "execution_count": 63,
    906    "metadata": {
    907     "ExecuteTime": {
    908      "end_time": "2018-12-28T02:39:04.085998Z",
    909      "start_time": "2018-12-28T02:39:03.631216Z"
    910     }
    911    },
    912    "outputs": [],
    913    "source": [
    914     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)"
    915    ]
    916   },
    917   {
    918    "cell_type": "code",
    919    "execution_count": 64,
    920    "metadata": {
    921     "ExecuteTime": {
    922      "end_time": "2018-12-28T02:39:06.672781Z",
    923      "start_time": "2018-12-28T02:39:06.665668Z"
    924     }
    925    },
    926    "outputs": [
    927     {
    928      "name": "stdout",
    929      "output_type": "stream",
    930      "text": [
    931       "Baseline Score: 20.16%\n"
    932      ]
    933     }
    934    ],
    935    "source": [
    936     "mode = pd.Series(y_train).mode().iloc[0]\n",
    937     "baseline = accuracy_score(y_true=y_test, y_pred=np.full_like(y_test, fill_value=mode))\n",
    938     "print(f'Baseline Score: {baseline:.2%}')"
    939    ]
    940   },
    941   {
    942    "cell_type": "code",
    943    "execution_count": 26,
    944    "metadata": {
    945     "ExecuteTime": {
    946      "end_time": "2018-12-27T23:35:39.040108Z",
    947      "start_time": "2018-12-27T23:35:38.953294Z"
    948     }
    949    },
    950    "outputs": [],
    951    "source": [
    952     "class_weights = class_weight.compute_class_weight('balanced',\n",
    953     "                                                 np.unique(y_train),\n",
    954     "                                                 y_train)"
    955    ]
    956   },
    957   {
    958    "cell_type": "code",
    959    "execution_count": 27,
    960    "metadata": {
    961     "ExecuteTime": {
    962      "end_time": "2018-12-27T23:35:39.240067Z",
    963      "start_time": "2018-12-27T23:35:39.237696Z"
    964     }
    965    },
    966    "outputs": [
    967     {
    968      "data": {
    969       "text/plain": [
    970        "array([0.52585038, 1.59482003, 2.12184306])"
    971       ]
    972      },
    973      "execution_count": 27,
    974      "metadata": {},
    975      "output_type": "execute_result"
    976     }
    977    ],
    978    "source": [
    979     "class_weights"
    980    ]
    981   },
    982   {
    983    "cell_type": "markdown",
    984    "metadata": {},
    985    "source": [
    986     "## LightGBM"
    987    ]
    988   },
    989   {
    990    "cell_type": "code",
    991    "execution_count": 65,
    992    "metadata": {
    993     "ExecuteTime": {
    994      "end_time": "2018-12-28T02:39:10.223141Z",
    995      "start_time": "2018-12-28T02:39:10.217963Z"
    996     }
    997    },
    998    "outputs": [],
    999    "source": [
   1000     "train_data = lgb.Dataset(data=X_train, label=y_train)\n",
   1001     "test_data = train_data.create_valid(X_test, label=y_test)"
   1002    ]
   1003   },
   1004   {
   1005    "cell_type": "code",
   1006    "execution_count": 66,
   1007    "metadata": {
   1008     "ExecuteTime": {
   1009      "end_time": "2018-12-28T02:39:10.908403Z",
   1010      "start_time": "2018-12-28T02:39:10.901251Z"
   1011     }
   1012    },
   1013    "outputs": [],
   1014    "source": [
   1015     "params = {'objective': 'multiclass',\n",
   1016     "          'num_classes': 5}"
   1017    ]
   1018   },
   1019   {
   1020    "cell_type": "code",
   1021    "execution_count": 67,
   1022    "metadata": {
   1023     "ExecuteTime": {
   1024      "end_time": "2018-12-28T02:42:04.449691Z",
   1025      "start_time": "2018-12-28T02:39:11.555708Z"
   1026     },
   1027     "scrolled": true
   1028    },
   1029    "outputs": [
   1030     {
   1031      "name": "stdout",
   1032      "output_type": "stream",
   1033      "text": [
   1034       "[25]\ttraining's multi_logloss: 1.50257\tvalid_1's multi_logloss: 1.51211\n",
   1035       "[50]\ttraining's multi_logloss: 1.45251\tvalid_1's multi_logloss: 1.4704\n",
   1036       "[75]\ttraining's multi_logloss: 1.41546\tvalid_1's multi_logloss: 1.44103\n",
   1037       "[100]\ttraining's multi_logloss: 1.38507\tvalid_1's multi_logloss: 1.41809\n",
   1038       "[125]\ttraining's multi_logloss: 1.35921\tvalid_1's multi_logloss: 1.39942\n",
   1039       "[150]\ttraining's multi_logloss: 1.33601\tvalid_1's multi_logloss: 1.38295\n",
   1040       "[175]\ttraining's multi_logloss: 1.31554\tvalid_1's multi_logloss: 1.36904\n",
   1041       "[200]\ttraining's multi_logloss: 1.29656\tvalid_1's multi_logloss: 1.35624\n",
   1042       "[225]\ttraining's multi_logloss: 1.27918\tvalid_1's multi_logloss: 1.34486\n",
   1043       "[250]\ttraining's multi_logloss: 1.26276\tvalid_1's multi_logloss: 1.33447\n"
   1044      ]
   1045     }
   1046    ],
   1047    "source": [
   1048     "lgb_model = lgb.train(params=params,\n",
   1049     "                      train_set=train_data,\n",
   1050     "                      num_boost_round=250,\n",
   1051     "                      valid_sets=[train_data, test_data],\n",
   1052     "                      verbose_eval=25)"
   1053    ]
   1054   },
   1055   {
   1056    "cell_type": "code",
   1057    "execution_count": 72,
   1058    "metadata": {
   1059     "ExecuteTime": {
   1060      "end_time": "2018-12-28T02:43:01.722585Z",
   1061      "start_time": "2018-12-28T02:43:00.450410Z"
   1062     }
   1063    },
   1064    "outputs": [],
   1065    "source": [
   1066     "y_pred = np.argmax(lgb_model.predict(X_test), axis=1)"
   1067    ]
   1068   },
   1069   {
   1070    "cell_type": "code",
   1071    "execution_count": 88,
   1072    "metadata": {
   1073     "ExecuteTime": {
   1074      "end_time": "2018-12-28T02:48:08.595370Z",
   1075      "start_time": "2018-12-28T02:48:08.514152Z"
   1076     }
   1077    },
   1078    "outputs": [],
   1079    "source": [
   1080     "cm = confusion_matrix(y_true=y_test, y_pred=y_pred)"
   1081    ]
   1082   },
   1083   {
   1084    "cell_type": "code",
   1085    "execution_count": 99,
   1086    "metadata": {
   1087     "ExecuteTime": {
   1088      "end_time": "2018-12-28T03:56:59.129287Z",
   1089      "start_time": "2018-12-28T03:56:58.910377Z"
   1090     }
   1091    },
   1092    "outputs": [
   1093     {
   1094      "data": {
   1095       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWQAAAD8CAYAAABAWd66AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3Xd4k9XbwPHvSdIFZe9dBGQpG0QUUAFllyF7r7KHbGQPUURBlL2RvX4oKIjKElzsPZS9aZltoUmb9Lx/pNSWbm2bNO/94cplcs55ntzHJ71zcp6ltNYIIYRwPIOjAxBCCGEnCVkIIZyEJGQhhHASkpCFEMJJSEIWQggnIQlZCCGchCRkIYRwEpKQhRDCSUhCFkIIJ2FK6TfwqjfT5U4F/HNhD0eHkCJezuPt6BCSnaueiPr33WBHh5AiyhTwVv91HV7l+yV6q4ccm/2f3y85yQhZCCGcRIqPkIUQIlWptDvOlIQshHAtBqOjI/jXJCELIVyLcqpp4SSRhCyEcC1peMoi7UYuhBCxUSrxjwRXpeoqpS4opS4qpUbGUl9DKXVUKWVVSr0fpbycUup3pdQZpdRJpVSrxIQuI2QhhGtJphGyUsoIzAHqADeBQ0qprVrrs1GaXQc6A0NfWPwZ0FFr/bdSKi9wRCm1U2v9OL73lIQshHAtyTeHXAW4qLW+bF+tWgf4ApEJWWt9NaIuPOqCWuu/ojy/rZTyB3IAkpCFEP+PJN9RFvmAG1Fe3wReS+pKlFJVAHfgUkJtZQ5ZCOFalCHRD6WUn1LqcJSHX9Q1xbL2JJ37qZTKA6wEumitwxNqLyNkIYRrScKUhdZ6IbAwjuqbQIEor/MDtxMfhsoIfA+M0Vr/kZhlZIQshHAtSRghJ+AQUEwpVVgp5Q60BrYmKgR7+y3A11rrjYkNXRKyEMK1JFNC1lpbgX7ATuAcsEFrfUYpNUkp1RhAKVVZKXUTaAEsUEqdiVi8JVAD6KyUOh7xKJdQ6DJlIYRwLcbkO3Vaa70d2P5C2bgozw9hn8p4cblVwKqkvp8kZCGEa5FTp4UQwknIqdPJZ/4Hdbi2tieH53WILGv2ZjGOzO/I0+8HUaFYrniXNxgUv89ux+YJvpFly4bX5eDc9kzs9EZk2cg2r9Gw6kvJ34FECA21MLJvR4b6teaDbi1Yv2J+jDY/btvE4O4tGdqzDWMGduXGtcsAnD99nCE9WjGyTwfu3LIfIvk0OIgpI/qiHXw19nFjRvFW9ddp5tsw1nqtNZ9MnULDunV4v2kjzp21T7ddvXKZ1i2a0aJpY04cPwaA1WrFr1tnQkJCUiv8WI0fM4q3a7xO8yax9+nK5Ut0bNeKyuVfYcWyJZHlDx8+pHOHNjRv0pDdu36OLB/Uvzf+/vdSPO74uOrnL1Iynjqd2pwuIa/86Sy+Y7ZEKztz7QGtJ2/jwOmbCS7fz7c8F64/jHz9ik92AKr0WcUbr+QjYzp3cmdJT6Xiufnuj8vJG3wiubm5M/6z+Xy2cB3TF6zh+KHf+OvsqWht3nynLjMWb+CzBWvxbdWJFfNmALBt0yqGjp9Om259+XGbfeftplWLaNq2K8rBHzDfJs2Yt2BxnPUH9v/C9WtX2bbjR8ZNmMyUSRMA2LhhPQM/GMJnX3zJimVLAdiwfi0NG/ni5eWVGqHHqXGTZsydH3efMmXKzPCRo+nYuVu08h+2f0cj36Z8vXodK5bZl9+3dzclSpYmZ874BxUpzVU/f5GS7yiLVOd0Ef16+hYPg8zRyi7ceMjftx4luGy+7N7UrVKYZTtPR5aF2cLxcjehFLibDNjCNWM7vM7klb8le+yJpZTCyysdADarFZvVGuPLOl36f26nZDGHRH7YjUYToaEWQs1mjEYTd2/f4OH9AEqXrZhq8celYqXKZMyUKc76Pbt30ahxE5RSlClbjqCgQAIC/DGZTFjMZswhZkxuJgIDA9m3dw+NfJukYvSxS6hPWbNl45VXy2AyRZ/9e96n0NBQDAYDVquV1StX0KlLtzjWlHpc9fMXKQ2PkF1qDnl6z7cYvWQ/3l7ukWUXbjzkRkAQv3/VjrW7z1Ekb2aUghOXAhwYKdhsNkb0ac/dWzeo69uSYiVfjdHmh2838N2mVVitVsZPt/+sbNqmCwtmTMHdw4P+Iyfz9YIvaN25d2qH/6/4+98jV+7cka9z5cqN/717tG7TjjEfjiA0NJSx4yeyYN4cevj1cp4R179Qr0EjRg0fwndbv2Hg4GFsWLeGho2bOHzE/5xLf/7+P16gXinVRWu9LDmD+S/qVSmM/+NnHLvoT/VXox+FMmzBvsjnmyb40v/LnxneugplCudg17FrLPvh9IurS3FGo5HPFqzlaXAQ08cP4fqVixQsXDRam7q+Lanr25L9u3awefVi+o2YROGixZk6ewUAZ08eJUu2HGg0MyaPxGQy0bHXB2TOki3V+5MoscwxKqXIkzcvS5avBOD6tWsEBPhTuPBLfDhyGGFhYfTtPxAfn8KpHe1/kiFDBmbPs58AFvjkCcuWLGLGrK+YOH4MQYGBdOjUhbLlyjssPpf+/DnhVERi/ZfIJ8ZVEfX8cOuN3//DWyTe66Xy0rDqS5xf3pWvR9bnrbIFWDqsbrQ2Dau+xNG/7pHO043ShbLR/uPvaftOSbw8HPdDIb13BkqXrcTxQ3FPobzx9nsc/HVvtDKtNZtXL+b99t3Z+PVCWnXqSfVa9dm+ZV0KR/zv5cyVm3t370a+vnfvLjly5ozW5qsvZ9K3/0DWrF5JgwaN6NO3Pwvmzk7tUJPVgvlz6O7Xix3bv6dUqdJMmDyVr2bNcHRYgIt+/tLwlEW8CTniwsqxPU4Bce6Z0Fov1FpX0lpXMhV4PdmDjs245b9StMNiSnReSsdPtrP3xA26Tv8hst5kNNDXtzwzNx8mnYcpcrBmMCjcTan7E+fJ40c8DQ4CwGIxc/Lon+Qr6BOtzZ2b1yOfH/3zAHnyF4xWv/fHbVR47U28M2TEYjGjDAYMBkWoOfr8uzN56+132Lb1G7TWnDxxHG/vDOTI8U9CPnzoIDlz5qJQIR/MISH2PhmNhDhxnxJy7dpVAvz9qVS5SmSflILQ0FCHxeTyn780vFMvoaFhLuA94MU9agpIkb1iK0bUo3qZAmTP6MnFld2ZvPJ3HgWbmdH7bbJn8uJ/E305eTmAxmO2kCdreuYOqkPTcd8kuN5ejcqyatc5QixWTl25j1JwaG4Hdh6+wpOnlpToSpweP7zP7GnjCQ+3obXm9Zq1qVi1BuuWz6PIy6WoXK0mO75dz6mjBzGaTHh7Z6Df8H9+kFjMIez78TvGTJsDQKP32/PZhGGY3NwYNHpqqvYlqhFDB3P40EEeP35EnXdq0Ltvf6xWKwAtW7Wheo2aHPhlHw3r1cHT04tJU/6JVWvNogXzmP75FwA0b9GKUSOGYrNZGT12giO6A8DIYf/06d1aNejd558+tWjVhvv3A2jbqjlPg4NRBgOrV63gf99ux9vbvlNs9pcz6TfgAwDq1W/IoAF9WbPqa/r0G+CwPrnq5y+SEybaxFLxHTuolFoCLNNaH4ilbo3Wum1Cb+BVb6aTHJyYfP5c2MPRIaSIl/N4J9wojXGWQ2OT2993gx0dQoooU8D7P88jePkuSPRWD/m2p1PNW8Q7QtZax3mMTmKSsRBCpDonnBtOLJc67E0IIdLylIUkZCGEa5ERshBCOIe0fEKRJGQhhEuRhCyEEE5CGSQhCyGEU5ARshBCOAlJyEII4SQkIQshhLNIu/lYErIQwrXICFkIIZyEwSBn6gkhhFOQEbIQQjiLtJuPJSELIVyLjJCFEMJJSEIWQggnIadOx+P7mR1T+i1SXc81Rx0dQopY1K6Co0NIdrkzezo6hBSR0UvGUnFJyyPktHt8iBBCxEIplehHItZVVyl1QSl1USk1Mpb6Gkqpo0opq1Lq/RfqOiml/o54dEpM7PI1K4RwKck1QlZKGYE5QB3gJnBIKbVVa302SrPrQGdg6AvLZgXGA5UADRyJWPbFG0ZHIyNkIYRLScYRchXgotb6stY6FFgH+EZtoLW+qrU+CYS/sOx7wE9a64cRSfgnoG5CbygJWQjhWlQSHvHLB9yI8vpmRFli/KtlZcpCCOFSknLqtFLKD/CLUrRQa73weXUsi+jErvrfLCsJWQjhUpIyhxyRfBfGUX0TKBDldX7gdiJXfRN464Vl9ya0kExZCCFcS/JNWRwCiimlCiul3IHWwNZERrETeFcplUUplQV4N6IsXpKQhRAuJbl26mmtrUA/7In0HLBBa31GKTVJKdU44r0qK6VuAi2ABUqpMxHLPgQmY0/qh4BJEWXxkikLIYRLSc4TQ7TW24HtL5SNi/L8EPbpiNiWXQosTcr7SUIWQriUtHymniRkIYRLkWtZCCGEk5ARshBCOAlJyEII4STScD6WhCyEcC0yQhZCCCdhkJ16QgjhHNLwADltnKkXbrPx0aBOzJk8NEbdz9+uZWLftkwZ0IEvxvbngf8dAO7evMbUwV2YMrAjl8+fAsBms/LF2AGEWsypGn9sWlfKx+puFVnVtSITG5XA3RjzU1SrRHbWdKvI6m72NgAFs3qxrFN5vu5SgVfyZgDAqODLVq/iYXLc5gwNtTCiT0cG92jNwK4tWLd8fow2Z04eZWjPtrSoU4Xf9/0cWX7rxlWG9WrH4B6tuXDmJGDfVhOG9cZiDkm1PsRm6sQxNKhdnfYtfWOtX/31Ujq1aUanNs1o39KX6pVfJfDJYx49ekjvru1p39KXX/bsimw/YnA/AgL8Uyv8WPnfu8uwft3o3rYJPdo1ZcuG1XG2vXDuNPWql2f/np8AuHHtKn27tqZXpxacPX0CAJvVyoiBfpgdvK2eMxhUoh/OJk2MkHd/t4HcBXwwP3sao65A4ZcZNWMp7h6e7NvxP7Ysn0v34ZM5sPMbmnbsTbacedjy9Tx6jnyVX3Zs4bW36+Lu4djb+uTwdqdFxXy0XXIYizWcKb4lqV0yJ9tP34tskz+LJx2rFqTnqhMEWaxkSecGQJNyeZi77wp3npjpU7MwH35zjqbl8/LDGX8s1hcvyZp63NzcmfD5fLy80mG1hjFmYDcqVHmDl0u9GtkmR87c9Bs+ka0bV0Zb9sdt/6N99/7kyJ2XVYu/Ynjp6ezcuomatevj4emV2l2Jpn6jJjRv2ZbJ40fFWt+uY1fadewKwIFf9rB+9ddkzJSZjWtXUa+hL7Xeq8+Q/n7UeLsWB37ZQ/ESpciRI2dqdiEGo9GIX/+hFCtekmdPn9KvW2sqVK5KocJForWz2WwsmfsFFatUiyzb/u1GuvYaSO48eVkybxbjps7guy0bqP1eQzwdvK2ec+kRslKqhFKqllLK+4XyBC+2nBwe3ffn9OHfeKNOo1jri5epGJlgXypemkcP7KMPo8lEqMVCqMWM0WjkWXAQpw4doOrb9VIj7AQZDQoPkwGjAk+TgfvBodHqfcvmYdPR2wRZrAA8ehYGgNWm8TAZ8DQZsYZrvD2MvFk0W7Rk7ghKKby80gH2EZPVao1x8ZacufPiU6RYjJ0uJpOJ0FD7tjIZTTwNDuLw77/w1rsNUyv8OJWrUImMmTIlqu3PP2ynznv1AXufLBYLYaGhKGXAarWyYc1K2nbokpLhJkq27DkoVrwkAOnSp6dAoZe4H8uo/dtNa3nzrdpkzpI1suz535XZbMZkMhEcFMgfv+6jdr3Y/z4dITlv4ZTa4h0hK6UGAH2xX1hjiVJqoNb624jqqcAPKRwfGxd/QdNOfbGEPEuw7a8/fUfpilUBqFm/Ocu/mIw1LJS2fUawff1S6rbo7BQbISA4lDUHb7Cl92tYrDYOXnnMwavR7+xSIIt9tLGgXVkMBsWSA9f448ojNh+9zbiGxXEzGpi282+6vlGIFb9fd0Q3YrDZbAzv3Z67t25Q17clL5d8NeGFgLq+Lfhq2njCQkPp+cFoNny9iObtujnFtkosc0gIf/x+gMEjRgNQp24DJowezo7vt9JnwGC2bFxH3QaN8fRyjlHkc3fv3OLS3+cpUTr6trofcI/fftnNtC8X8de5M5HljZu1ZvqUMYSFhjJg+FhWL1tAm049nGpbOVEoSZbQlEUPoKLWOlgp5QNsUkr5aK1nkZiL1/1Hpw79SobMWShUtAR/nYr/Ts9/7v2BaxfPM3jqHACy5sjN4I/sz/3v3OTxw/vkzl+IZTMnYguz0qhdD3LlK5jSXYhVBg8T1Ytlp/n8gwRZrHzkW5L3SuVk59l/Rikmg6JAFi/6rD1JzgwezG9XlnZLDnMvyELftfZ51vyZPcnu7c7VB88Y16A4bkbFwv3XuPHIMXN5RqORzxeu5WlwENPGDeH6lYsULFw0weVy5MrDpBn2S9LeuXWDRw8CyFfAh1kfj8VqDaNN597kLVAopcP/Tw7s30uZsuXJmCkzAN4ZMvDZl/MACAx8wqrlS5j62Rd8MnkcQUGBtGnfmVfKlHNkyIQ8e8bk0UPoNWAY6dNH+wHM/FnT6dZ7EEajMVp5ztx5mD57CQC3bl7nwf0AChQqzKeTPiQsLIxOPfqSv6BPanUhVkm5QL2zSShyo9Y6GOz3jsJ+weV6SqkZxJOQlVJ+SqnDSqnD321Y8a+Du3TuJCcPHmB0j2Ys+WwcF04eYdmMCTHanTt+iB82rqD36Gm4ubnHqN+6cgGN2/mxZ9sGqtR8j4Ztu/H9uiRdhClZVfbJzJ0nZh6HhGEL1+z76z6v5ssYrY1/kIVf/n6ALVxz54mZ6w+eRY6an+tZw4eF+6/SsmI+fjzrz+ID1+j2huMTV3rvDLxSrhLHDv2W5GXXLJ1D6y69+X7LOmrUqkerTj3ZsDKu64c7j107d1A7YrriRcsWzaNTNz9+/mE7xUuW5sNxU5g/+4tUjjA6qzWMyaMH88679Xnzrdox6v86f4aPx4+gY/N67N/7E1999hG//bI7WpvlC76iU4++fLNxDW+/24AO3fuwaumC1OpCnJRK/MPZJDRCvquUKqe1Pg4QMVJuiP2ScnH+Ho16Ff7d5x8k9pYnMTTp2JsmHXsD8Nepo/z0zRq6DJ4Qrc2NyxdYM28a/cfPJGPmrDHW8dfpY2TOlp2ceQsQarGglMJgMDr0SIu7gRZK582Ah8mAxRpOpUJZOHc3KFqbX/5+QJ2IHX2ZvEwUyJqOW4//ibl8gUwEBIdy85EZTzcD4RpsWuPp5pjRwZPHjzCZTKT3zoDFYubkkT9p0jpRdz6PdObEEbJmy0ne/AUJtZhRBvu2spgdf1RMfIKDgjh29BDjpnwSo+7G9WvcDwigfMXK/H3hPB6eHqAUoaGhsawpdWitmfHxBAoUeonmrTvG2ubrTTsin382ZSyvvVGDajXeiSw7eeww2XPkJF+BQljMZgxKYTQYsFgcf6SFM02fJFVCCbkjYI1aEHHR5o5KKYd9FW5bvYiCRUtQ9rXqbF42B0tICIs+HQNAluy56DPm0+exsmPDcroPnwzAm+/5smzGBMJtNtr0Huao8Dl7J4g9F+6zonMFrOGav+4F8+2JO/R4sxDn7gZx4OJD/rjyiCqFs7CmW0XCNczee5lA8z+bonO1goz55hwA35y4w8SGJTAaFNN/vOiQPj16cJ/Zn47HZrOhtaZazdpUer0Ga5fNo2jxUlSuVpOL588wbfxQngYHcvj3/axbsYBZSzcC9m21adUShoyzJ7U6DZrxxdTR2Gw2eg6K/QiH1DD+w6EcO3yIx48f06TeO3Tr2de+wxJo+n4rAPbt+ZkqVd+I3KkZ1cI5s/DrOxCAOnXrM3LIADasXUX3Xv1SrxMvOHPyGLt++I7CRYrRu1NLALr07I//Pfshow2btox3ea01a5YvYvTk6QDU923OtImjsNls9B86OmWDT4Q0nI9RWv/rAWyi/JcRsrMa/e2ZhBulQYvaVXB0CMkud2bHHuKYUoLN1oQbpUE+2T3/czqtOHlPonPOkbFvO1X6ThPHIQshRGKl5RGyJGQhhEtxxjPwEksSshDCpbjyTj0hhEhT0nA+loQshHAtMkIWQggnkYbzsSRkIYRrkZ16QgjhJGTKQgghnIQkZCGEcBJpOB9LQhZCuBYZIQshhJNIw/k4bdzkVAghEis5b3KqlKqrlLqglLqolBoZS72HUmp9RP2fETfyQCnlppRaoZQ6pZQ6p5RK1CULJSELIVyKQalEP+KjlDICc4B6QCmgjVKq1AvNugGPtNZFgZnAtIjyFoCH1vpVoCLQ83myjjf2JPRTCCGcXjLeMaQKcFFrfVlrHQqsA3xfaOMLPL8t0iaglrJPYmsgvVLKBHgBoUBgQm8oCVkI4VKS8a7T+YAbUV7fjCiLtU3EzTueANmwJ+enwB3gOvCZ1vphQm8oCVkI4VIMKvGPqPf/jHj4RVlVbBn7xYvfx9WmCmAD8gKFgSFKqZcSij3Fj7IolTdjwo3SmA/rF3d0CCli2bFbjg4h2bV5JY+jQ0gRbsY0fChBvP77HV6Scup01Pt/xuImUCDK6/zA7Tja3IyYnsgEPATaAj9orcMAf6XUr0Al4HK8sSc6ciGESANUEv4l4BBQTClVWCnlDrQGtr7QZivw/G6+7wO7tf2+eNeBd5RdeqAqcD6hN5TjkIUQLiW5ri2ktbYqpfoBOwEjsFRrfUYpNQk4rLXeCiwBViqlLmIfGbeOWHwOsAw4jX1aY5nW+mRC7ykJWQjhUpLzTD2t9XZg+wtl46I8N2M/xO3F5YJjK0+IJGQhhEtJy2fqSUIWQriUhE74cGaSkIUQLkUuUC+EEE4iDQ+QJSELIVyLTFkIIYSTSLvpWBKyEMLFyAXqhRDCSaThfXqSkIUQrkWOshBCCCchUxZCCOEk0vAAWRKyEMK1yAhZCCGcRNpNx05+PWT/u3cY2KsLHVo0olNLXzatXRmjjdaaWZ9NpW3TenRp05S/zp8F4PrVK/To0JKubZtx+uRxAKxWK4P7dMdsDknVfsQm3GZjxtBuLJk6Itb647/t5tNBHZg+qCOrv5gEgP+t68wc3p3Ph3Th6oXTANhsVhZM/IBQiznVYo+Lp8lAx4p5GfG2D8Pf8qFQlugXGy+SzYspdYsyuEYhBtcoRJ1i2QBI726k3xsFGFrTh1dye0e271I5Lxk9jKnZhRhCQy2MHdCJUb3bMtyvJZtWLoi13R+//MQwv5YM92vJ7E/GAHD7xlVG9+vAqN5t+fus/cqLNpuVqSP7YDE7bnuFhloY1a8jw3q2YXD3lmxYEXufAP745Wda1qnEpQv2v6vzp48z1K81o/p25O4t+92NngYH8dHIftgvA+x4RoNK9MPZOPUI2Wgy0XfQMF4uUYpnT5/So2NLKr1WDZ+XikS2+fO3/dy8fp3V/9vO2dMnmfHJZOYvX8vWLRvx6zeIPHnysWD2TF759Au+3byed+s3wtPTy4G9stu/fRO58hfC/OxpjLqAOzfY/b/V9Jsyl3TeGQh68giAP37aSoN2PcmSMzfbVy3AZ9gUft/5LRVqvou7x3+/08J/1eSVnFwIeMrXR25jVOBmjPl9f+VhCEsORr8zSfl8GTh0I5DjtwLpUbUAp+8GUypXem49sRBosaVW+LFyc3Nn9LR5eHqlw2q1MmlId8pWqkaxkq9Gtrl76zpb1y9nwueLSZ8hI08e22+dtnv7Flp37UeOXHlYt3Q2g0p9ys/fbebNWvXx8HTc9nJzc2f89PmRfRr3QTfKVa7Gy6VejdYu5NlTdnyznmIlXoks+27TaoaM+5SAe7f5cdsmOvb6gM2rFtO0TRenmSpwljj+DaceIWfLnoOXS9jvup0ufXoK+bxEQMC9aG0O7NvDew0ao5Si9KtlCQ4K4sH9AEwmE6FmC2ZzCCaTiaCgQH7bv5f3GjR2RFeiefzAn3NHfqdKrQax1v/583e8Ubcp6bwzAJAhUxYADEYjYaEWwiwWjEYTIU+DOHv4VyrVrJtqscfFw2TgpWxe/Hn9CQA2DWZreKKWtYXbb0lkMhrQWmNQUKNwFvZcSvCekClOKYWnVzoAbFYrNqs1xh/87h3fUKdhC9JnsN+uLFPmrIB9QBFqMWOxmDGaTDwNDuLon/upXjv27Z5aEtMngPXL59O4ZUfc3N0jy4wmE6Ghlsg+3b19k4cP/ClVtmKqxZ+QZLzrdKpLcISslKoCaK31IaVUKaAucD7iws2p5s7tW/x94RylSpeJVn4/4B45c+WOfJ0jZy4C/O/RtEUbpo4fRWhYGENHjWPF4vl06OLnFN+e3y77ioYdemMOeRZrfcBt+0/B2aP7EB4ezrstu1Ci/Gu8UbcZa7/6CJs1jOZ+Q/lp4wpqNe/oFH3Kls6NpxYbrcvlJm9GD24+NvPNGX9CbdF/xhbK4sWQGoV4YrGy7UwA94JDOXYrkHYV8lApfya+PxdANZ/MHL4ZSJjNOX4Ch9tsjO7fgXu3b1KnUQuKRhkxgn2EDDBhcDfCw8Np3r4HZStVo06jFsyfPp6wsDC6DRjFltWLadLaOUaS4TYbI/p04O7tG7zXuAXFSkbv05WL57kfcJeKVauzbeM/U4VNW3dm4cyPcPfwoN+ISaxc+AWtOvVO7fDj5bLXslBKjQfqASal1E/Aa8BeYKRSqrzW+qOUDxGePXvGuBEf0H/wCNJ7e0eri23eSilFrtx5mLVgOQA3b1znQYA/BX0KM2XcSKxhYXTr1Z8ChXxSIfrozh7+De9MWchfpDgXTx+LtU24zcb9OzfpPfFLHj/wZ+7Y/gyduZwsOXLRZ9KXANy/c5PAh/fJma8ga76cgs0aRt3W3cmRt0Cs60xpBgX5Mnmy5bQ/1x+b8S2dk3eKZuWHCw8i29x8YmHKz5cItWlK5ExPl8r5+GTPFczW8MhpDC83A28XzcryQ7doUSYXXm5G9l1+yLVHjptzNRiNfDx3DU+Dg5g5aRg3rl6kgE/RyHqbzca92zcY8+kCHt6/x6Shfkybv47sOXMzZrp9fvbu7Rs8ehhA3gI+zP10HFZrGC069iJP/kIO69P0BfY+fTZhKNevXKRgYXufwsPDWTFvBn2GTYixnE/R4nz01XIAzp48SpZsOdBoZk4ZhdFkomPPQWTOki0VexJTGs7HCU5ZvA/ynz6dAAAgAElEQVS8AdQA+gJNtNaTgPeAVnEtFPXW2iuXLf5PAVqtYYwbMYjadRtQ4506Mepz5MyN/727ka8D/O+RPUfOaG0Wz51Ft1792bx+NXXqNqRLz74sXzTvP8X1b129cIqzh37lo94tWf3FRC6ePsqaWZOjtcmULQelK7+J0WQiW6685MhbgIA7N6O12bF2Ee+16caB7ZupUL0O77Xqyo8blqVmV6J5YrbyxGzl+mN74jx5J4h8maLPk1qs4ZEj5vP+TzEaFOndo++0q/NyNn7++wHl82Xk5hMz60/cpX6JHKnTiQSk985AyTIVOXn492jlWbPnpELVGphMJnLmzkfe/AUjR83PbVg+lxYde7Hz2/W88U5d3u/Qk/+tXpSa4ccqvXcGSpWtyPEofTKHPOPG1UtMHNqTvu0b8fe503w6bnDkjj2wD4T+t3oJzdt1Z9PKRbTs2JMateqxY8s6R3QjGqVUoh/OJqGEbNVa27TWz4BLWutAAK11CBDnBKHWeqHWupLWulKHLt3/dXBaa6ZNHkchn5do1a5TrG3eqPEWO7/fitaaM6dOkN7bm2zZ//kDPn7kENlz5iJ/wUJYzGYMBoXRYMRsccyRFvXb9WTsws2MnreBdoPGU/SVCrQdODZam1eqVOfi6aMAPA18TMCdG2TLlTey/tKZ42TKmoMceQoQajHbP1wGA2GhllTtS1RBFhuPQ8LIkd4NgGLZ03EvKDRamwxRjpgokNkTpeBp6D877bKndyOTh4nLD0JwNyq0BrTG5MC94YGPH/E0OAiAUIuZM8cOkqeAT7Q2larV5NzJIwAEPXnMnZvXyZknX2T9uZNHyJotJ7nzFcRiMaOUAYPBQKjFMdvrxT6dOnqQfFH6lC69N0s272LOqm3MWbWNYiVfYfikGRQpXiqyzb4fv6PCa2/inSGjvU8GhVIGLE5wtI9RqUQ/nE1Cc8ihSql0EQk5ctZeKZWJeBJycjl14hg/bt/GS0WL0a1tcwB69B3Ivbt3APBt3oqqb9Tgj1/307ZpPTw8vRg57p/Rptaar5cuYOLHnwPQsOn7TBk7EpvNyuCRY2O+oQP9sG4JBYoUp3TlNylergp/nTjEp4M6YDAYaNihD+kzZALsffp589d0GDwRgKp1GrFm1mTCbTaa+Q1xZBfYctqfdhXyYjQoHj4LZd3xu7xeyB7379eeUCZPBqr5ZCY8XBMWrll15Ha05euVyM6O8/cBOHYriC6V81H9pSz8cOF+qvfluccP7zP/8wmE28LROpzXatSmwmvV2fT1fAoXK0nF12tSpuLrnDryJ8P8WmIwGGjbfSAZMmYG7Nvrm7VLGfDhxwC8U68pc6eNxWaz0aX/SIf06dHD+8z5dDzh4fY+vV6jDhWrVmf98vkUebkklarVjHd5i9nMvp++Y/QncwBo2Lwdn08cjsnNjYEfpsosZryc8Gi2RFPxHTuolPLQWsf4GldKZQfyaK1PJfQGdwPDnGPPTDI6dM3xe/9Twt6rjx0dQrJr80oeR4eQItyMaTjrxKNswQz/uWODt55PdM6Z0biEU/2PjHeEHFsyjii/Dzhu2CKEEHFwxrnhxHLqE0OEECKp0vKUhSRkIYRLScMDZEnIQgjXYkrDGVkSshDCpaThfCwJWQjhWlz21GkhhEhr0nA+du6rvQkhRFIZVOIfCVFK1VVKXVBKXVRKxTiTRynloZRaH1H/p1LKJ0pdGaXU70qpM0qpU0qpBK+5KiNkIYRLSa4LzyuljMAcoA5wEziklNqqtT4bpVk34JHWuqhSqjUwDWillDIBq4AOWusTSqlsQFhC7ykjZCGES0nGEXIV4KLW+rLWOhRYB/i+0MYXWBHxfBNQS9nPTHkXOKm1PgGgtX6gtU7wbguSkIUQLkUl5V+UK1NGPPyirCofcCPK65sRZcTWRmttBZ4A2YCXAa2U2qmUOqqUGp6Y2GXKQgjhUpIyY6G1XggsjKM6tjW9eJ2MuNqYgDeBysAzYJdS6ojWeld88cgIWQjhUpJxyuImEPWOD/mB23G1iZg3zgQ8jCjfp7W+H3G1zO1AhQRjT0wHhRAirUjGC9QfAooppQorpdyB1sDWF9psBZ5frP19YLe2X0JzJ1BGKZUuIlHXBM6SAJmyEEK4lFhudv6vaK2tSql+2JOrEViqtT6jlJoEHNZabwWWACuVUhexj4xbRyz7SCk1A3tS18B2rfX3Cb2nJGQhhEtJzjP1Im7mvP2FsnFRnpuBFnEsuwr7oW+JJglZCOFS5PKb8Uj3wk0sXUGlglkcHUKKyOTh5ugQkt2ea655HwX/4ATPMUiTyhYs/p/XkZZPnZYRshDCpRhiPRItbZCELIRwKTJCFkIIJ2FKw5PIkpCFEC5FRshCCOEk5AL1QgjhJNJwPpaELIRwLWn5ehCSkIUQLkWmLIQQwklIQhZCCCeRdtOxJGQhhItJwwNkSchCCNeSiOscOy1JyEIIlyJHWQghhJOQnXpCCOEkZMpCCCGchExZCCGEk0jLI2Sn/jKZNG407771Bq2aNYq33ZnTp3itfGl2/bQTgKtXr9ChdXPatmjCyRPHALBarfTx64I5JCTF406I/727DOrdlY4tG9O5VRM2rYt5261rVy/Tp2s76rxRgXWrlkeWP370kH49OtK5dVP2790VWT56aH/uB/inRvjxCrfZmDSwI19OHBKj7q/Tx5g8sBM9fd/kyK+7I8vv3rzG5EGdmdi/A5fOnwLAZrMyY0x/LGZzqsUel3UfdmLzxN78b3JfvvloQIx6y9Mgfpo3ic2TevPtxwN5eOsqACFBj9n26RA2T+zF1eO/Rbb/ce5Enj5+kFrhxyosJJhDKz5h9ye92T2tDw+vno9Wf//iKbaPbs3ezwey9/OBXPhxHQCW4Ccc+GoEe6b3486pPyLbH1w6BfMTx/bpOZWEh7Nx6hFyQ98mtGzTlvGjR8bZxmazMfuLz6la7Y3Isi0b19Nv4GDy5M3H7Fkz+HRGeTZvWEf9hr54enmlRujxMhqN9Bk4lJdLlOLZ06f4dWxFpSqv4/NSkcg2GTNmYsDQURzYuzvasrt+3EHdBo15p049hg/sRfW3avHb/r0UK16K7DlypnZXYvh52wby5Pch5NnTGHVZc+Smy6Cx7NyyOlr5vh++oXmnPmTLlYf/LZ9L7w8/Zu/2LVR9uy4enp6pFXq8Ggz5BE/vTLHWHd+xnmz5i1Cn9zge373Bb2vmUH/wJ1w6tI9ir9fmpco1+eHLsfiUq8a1E3+QvUBR0mfOlso9iO7UN4vIWbwClTuNJNwahi3MEqNNtsKleK37uGhlt479QoHK75CvXHV+XzSBPK9W5e6Zg2TKVwTPTI7t03PG/08jZKXU1ykRSGwqVKxMxoyZ422zfu0q3q5dhyxZ//kwmEwmzBYLZrMZk8lEUGAg+/ftoUEj35QOOVGyZc/ByyVKAZAufXoKFS7M/YB70dpkyZqNEqVewWiK/p1pMpqwWCyEhoWilAGr1cqmtato3aFzaoUfp4f3/Tl16FfefLdxrPXZc+Uhf+GiKBX9Y2c0mQgNtRBqMWM0mXgWHMTJgwd4/Z36qRH2f/b4znXyliwLQObcBQh6cI9ngY8wGI1Yw0IJt4ahlCLcZuPMrm8o815zh8YbZn7Gw8tnKPhaHQAMJjfcvLwTtawyGrGFhWKzWlHKQLjNxuVftlLk7WYpGXKSKJX4h7OJd4SslNr6YhHwtlIqM4DWOva/vFTif+8ee3f/zLxFyzl7ZkxkeYvWbRk/ZiRhoaGMGjuRxQvm0qVHT6ecW7pz+xZ/XzhPydJlEtW+Vt36TBk7gp3fb6Vnvw/4dvN63q3fCE9Px4/81y/6gve79MMc8ixJy71dvzlLZ07CGhZGh74j2LZuKfVbdnKi7aXY8cVoUIqS1etRokb0L4qs+V/i6tHfyF30FfyvXCD4oT/PHt2naJW32bN4Ghd/30XlZl04u+87ir5eC5O7Y0f9zx7cxT19Jo6vm8WT21fInL8orzTpgckjelwPr11g72cD8MyUlVKNupIxd0Hyl6/JkdWfc+PwHko16MTV37aTv9LbmNw9HNSbmJRTTkYkTkJTFvmBs8BiQGNPyJWAz1M4rkSZMf1j+g8agtEY/c7WufPkZcES+0D+xvVrBAT441P4JcZ9OJywsDB69R1AIZ/Cjgg5mmfPnjF+5Af0GzyC9N6JG6F4e2fgk5lzAQgKfMLalUuZNO0Lpn80gaCgQFq17UjpMuVSMuxYnTh4gIyZslCoaAkunDqapGWz5czNsI/tffK/fYMnD++TJ78PSz6fiNUahm97P3LnK5gSYSdKo+Gfkz5zNkICH7Nj1odkyl2APC+/Gllftm4Lfl+/gP9N7kvWfD5kK1AEZTDi7pWe9/pPAuzzzCd3bqR2r7HsXzkLy7MgXq3dnFxFSqZ6f3S4jSe3LvFqUz+yFCrOqW8WcXH3JkrUax/ZJlP+ItQZsxiThxf3zh3m0LKPqDVqAW5e6akaMY0R+iyYi3s2U7nzKI5vmE1YSDBFajYhq0+JVO9TVE7zPf4vJDRlUQk4AowGnmit9wIhWut9Wut9cS2klPJTSh1WSh1etmRh8kX7gnNnTjN6xBAa16vF7p9+ZNpHk9i7++dobeZ+9QW9+g5g/ZpV1G3QiJ59+rNowdwUiymxrNYwxo/4gNrvNaDG27X/1TpWLJ5P+y492P3jdl4uUYoRYyaxaN6XyRxp4lw6d5LjB/czsltTFn46lgsnj7D48wlJXs+WlQvwbe/Hrm0beO2td2nctjvb1i5J/oCT4Pl8r1fGzBQqV42Aqxei1bt7padm58E0GzuHml2GYg5+QobsuaK1Ofb9GsrVb82lQ3vJXrAoNTp+wOFvlqdWF6LxzJQdz0zZyVKoOAB5y1Tj8a3L0dq4eabD5GH/1ZWrZCXCbTYswYHR2vz10zqK1W7JrWO/kDl/Ecq1GsC57StTpxPxMKAS/XA28Y6QtdbhwEyl1MaI/95LaJmI5RYCCwECzeE6OQKNzbc7/km+E8aOonqNt3jrnX+S25HDB8mZMxcFC/lgNodgUAYMBgMWBx9pobXm08njKVj4JVq26/Sv1nHz+jUe3A+gXIXKXPzrAh4eHiilCLXE3DmTGpp16kOzTn0AuHDqKDv/t5ruQyYkaR0XTh0lc7Yc5MpbgFCLGaUMGIxGQi2OO9IizGJG63DcPdMRZjFz6+xRyjdoG62N5VkwJncPjCY3Lhz4gdzFXsXdK31k/ZN7t3j6+CF5Xi7DgxuXMbm5g1LYwkJTuzsAeGbMglfm7AT738Q7Z34C/j5BhlwForUxBz7CI0NmlFI8uv4X6HDc02eIrA8OuI35yUOyF3mFwFuXMbi5o4Bwq2P6FFVaHiEn6igLrfVNoIVSqgEQmFD75DJ6xBCOHD7I48ePaVDnLfx698NqtQLQvGXreJfVWrN00Xw+nj4TgKbNWzJ21DBsNhsjR49P8djjc+rEMX7csY2XihajW7v3AejRZwD37t4FwLd5Sx7cv0/Pzq149vQpShnYtG4lK9Z9Gzm1sXjel3TvbT8Eq9a79RgzbCCb162mS8++julUHL5dtZBCxUpS7rXqXPnrLHOnjrTvtDt0gG9XL2bS3DWAfXt9v2E5PUdMAaBG3SYs/mw8tnAb7XsPc1j8IYGP+Hn+ZMB+SF+RKm9R4JVKnNv3PQAlazbg8Z0b7Fv+GUoZyJynIDU6Doq2jsPfrqCSr/2Lt0jlt/hp3iRO7/6Wio07pG5noni1qR9HVs8g3BZG+qy5Kdd6IFd/2wGAT7V63Dn5K1d/24EyGDG6uVOx/bBoc/rnd6ykRD17/PnK1+Dgsqlc2b+N4nXbOaQ/UaXlU6eV1ik2gAVSdoTsKE8tVkeHkCL+vhfs6BCS3Z+3Hzs6hBThHxzm6BBSxPSGxf9zNt11/n6ic06tEtmdKns79XHIQgiRVGn5KAunPlNPCCGSKjmPQ1ZK1VVKXVBKXVRKxThDTSnloZRaH1H/p1LK54X6gkqpYKXU0MTELglZCOFSVBL+xbsepYzAHKAeUApoo5Qq9UKzbsAjrXVRYCYw7YX6mcCOxMYuCVkI4VIMKvGPBFQBLmqtL2utQ4F1wIun+/oCKyKebwJqqYi9n0qpJsBl4EyiY09sQyGESAsMSiX6EfWciYiHX5RV5QNuRHl9M6KM2Npora3AEyCbUio9MAKYmJTYZaeeEMKlJGWXXtRzJhK5qheP4IirzURgptY6OCmXAJCELIRwKcl4HPJNIOoZM/mB23G0uamUMgGZgIfAa8D7SqlPgcxAuFLKrLWeHd8bSkIWQriUZDzo7RBQTClVGLgFtAbavtBmK9AJ+B14H9it7Sd3VI+MR6kJQHBCyRgkIQshXE0yZWSttVUp1Q/YCRiBpVrrM0qpScBhrfVWYAmwUil1EfvIOP5TiBMgCVkI4VKS89RprfV2YPsLZeOiPDcDLRJYx4TEvp8kZCGES0m75+lJQhZCuJo0nJElIQshXEpavpaFJGQhhEtJw1fflIQshHAtaTgfS0IWQrgW57k5btJJQhZCuJQ0nI9TPiHbXO+GIWRJ7+7oEFJE2QKZHR1CssuSzjW3VZVGMS7N6xKmN0zwZLYEpeF8LCNkIYSLScMZWRKyEMKlyGFvQgjhJGQOWQghnIQkZCGEcBIyZSGEEE5CRshCCOEk0nA+loQshHAxaTgjS0IWQriU5LxAfWqThCyEcClpNx1LQhZCuJo0nJElIQshXIoc9iaEEE4iDU8hS0IWQriWNJyPJSELIVyLXKBeCCGcRBrOxxgcHUB8pkwYTb133qTt+41jrT9y+CC1qlehQ6umdGjVlCUL5gLw6OFD/Lq0p+37jdm35+fI9sMG9SXA3z9VYo/PuDGjeKv66zTzbRhrvdaaT6ZOoWHdOrzftBHnzp4B4OqVy7Ru0YwWTRtz4vgxAKxWK37dOhMSEpJa4cdp8vjRvPf2G7Ru3ijW+iOHDvL2m5Vp17Ip7Vo2ZfGCOYB9e/Xo3I7WzRuxd/c/22uoE2yv0FALI/t0ZEiP1gzq2oL1y+fHaLNt4yoGdXmfwd1bMWFoLwLu3QHg1o2rDO/VjiE9WnPhzEkAbDYrE4f1xmJO/e01f3w7ru36mMMbP4wsmzqoCcf/N4aD60ex/vMeZPL2inXZ899P5NCGD/lj3UgOrB4eWT5lgC8H149i8eQOkWVtGlSmb5u3UqwfCVFJeDgbp07IDRo1ZeachfG2KVe+IivXb2Hl+i1069kHgB9/+J76jXxZtGItq1YsA2D/vj0UL1mKHDlzpnjcCfFt0ox5CxbHWX9g/y9cv3aVbTt+ZNyEyUyZNAGAjRvWM/CDIXz2xZesWLYUgA3r19KwkS9eXrH/IaWmBo2bMGtuwttr9YYtrN6whe49+wL27dWgkS9Lvl7HqhX2fu3ft4fiJRy/vdzc3Bn/+Xw+X7SOzxau4dih3/jr7KlobQoXLc60eSuZsXg9r9eoxcqFswD4adv/aNe9P0PHf8rWjSsB2Ll1EzVr18fDM/W318ptf+Dbd060sl1/nKdii6lUafUxf1/zZ1jXd+Ncvq7fLKq2/oQ3230KQEZvT6qWLUyVVh9jNBgoXTQvnh5udGhUlQUbf0nRvsQrDWdkp07I5StWImOmTElezmRyw2I2ExYaisGgsFqtrF/zNe07dk2BKJOuYqXK8fZrz+5dNGrcBKUUZcqWIygokIAAf0wmExazGXOIGZObicDAQPbt3UMj3yapGH3cKlSsTMaMSb8NlNFkwmKxEBYaijIYsFqtrF39NR06OX57KaXw8koHgM1qxWa1xvhDfqV85cgEW6zkqzwIsI/qjSYToaEWLBYzRqOJp8FBHP79F2q+G/svo5T269FLPHzyLFrZrj/OY7OFA3Dw1BXy5Ur89gsP17i72Wc9vTzcCLPa+KBTLeau24vVGp58gSeRSsI/Z5OkhKyUelMpNVgpFffXaCo7dfI47Vs2ZVBfPy5f+huA9+o14M/ff2VQXz+69+zL5g1rqdfAF08nGEUmhr//PXLlzh35Oleu3Pjfu0frNu1Y+fVypkwaT/cePVkwbw49/HqlqZ0Yp04ep23LJgzs68eli/btVbdeQ/747QAD+vagRy/79qrf0Hm2l81mY6hfG7o1r0OZilV5ueSrcbbdveNbylepBkBd3xZs27SahTOn0qxtVzZ+vYjm7bo57fbq6Ps6O389G2ud1pptc/vx6+rhdG32BgDBzyx8s+s4f6wbydXbDwgMDqFiqUJ8t/dUrOtILUol/uFs4t2pp5Q6qLWuEvG8B9AX2AKMV0pV0Fp/kgoxxqlEiVJ8s/1n0qVLz2/79zH8g/5s2voD3hkyMOMr+1xfYOATVi5fwiefz2LqpHEEBQbStkNnXi1bzpGhx0/HvDGsUoo8efOyZLn9p+/1a9cICPCncOGX+HDkMMLCwujbfyA+PoVTO9pEK16yFFt37CJduvT8un8fwz/ox+ZtO/HOkIGZsxcAEdtr2WKmzfiSjyaOJSjIvr3KlC3vsLiNRiOfLVzL0+AgPh03hOtXLlKwcNEY7X75aTuX/jrLpBmLAMiRKw+TZtincO7cusHDBwHkK+DDlx+PxWoNo3Xn3uQtUChV+xKX4d3ew2YLZ932Q7HWv9NlJncCnpAjizffze/Hhat3+fXoJWas+JkZK+zz/nPHtWXyvO/p3PR1alctyam/bzFt8c7U7AYABidMtImV0AjZLcpzP6CO1noi8C7QLq6FlFJ+SqnDSqnDy5cuSoYwY5fe25t06dIDUK16TaxWK48fPYrWZunCeXTu1pMff9hOiZKlGDNhCvNmf5FiMSWHnLlyc+/u3cjX9+7djTGX+tWXM+nbfyBrVq+kQYNG9OnbnwVz//sde1OSd5Tt9UYc22vxgrl06d6TH3d8T4lSpRkz4SPmfeUc2yu9dwZKl6vEsUO/xag7eeRPNq9ZwsjJM3Fzj3mn67VL59CmS2+2b1lH9Vr1aNWpJxtWxj/fnlraNXqN+jVeofPo5XG2uRPwBICAR8Fs3X2SyqV9otWXLZ4fgL+v+dOu4Wu0H7GU0kXzUqRgjpQKOx7JN4mslKqrlLqglLqolIpxq2+llIdSan1E/Z9KKZ+I8jpKqSNKqVMR/30nMZEnlJANSqksSqlsgNJaBwBorZ8C1rgW0lov1FpX0lpX6ty1R2Li+Fce3A9AR4wmz5w+idbhZMr8zxzY9WtXCQjwp0KlyljMZgwGAyhFqMWSYjElh7fefodtW79Ba83JE8fx9s5Ajhz/JOTDhw6SM2cuChXywRwSgjIYMBiNhJjNDow6Yfejbq9TJwnXOsb2uh/gT4VKVTCbzRiUAaUUllDHba8njx/xNDgIAIvFzMkjf5KvgE+0Npf/Ps+CmR8xcvJMMmXJGmMdZ04cIWu2nOTJXxCLxYwyKAwGI6FOsL3qVCvJkM61eX/QAkLMYbG2Sefpjnc6j8jntV8vwZlLt6O1GdenIZPnfY+byYgxYogaHq5J5xnzyymlJdeUhVLKCMwB6gGlgDZKqVIvNOsGPNJaFwVmAtMiyu8DjbTWrwKdgJWJiT2h45AzAUewf5VopVRurfVdpZQ3qbCPcuzIoRw9cpDHjx/T6L236dGrH1ar/UPTrEVrdv/8I//buA6j0YSHpweTP/482vzcgjmz6Nl3IAB16tZnxAf9Wb9mJT1690/p0OM1YuhgDh86yOPHj6jzTg169+2P1Wr/fmvZqg3Va9TkwC/7aFivDp6eXkyaMjVyWa01ixbMY/rn9lFj8xatGDViKDabldFjJziiO5HGjBzCkcP27dXw3bfo0btfZL+aR2yvzRvWYjSZ8PTw4KNPom+vebNn0buffXu9W68Bwwb1Y92ar+nZZ4BD+gPw6MF9Zn86nnCbDa011WrWptLrNVi3bB5FipeicrWarFw4C3NICJ9PGgFA9py5GTllJmDfXptXLWHwOPvsXp0GzZg1dTQ2mw2/QaNStS8rPu5M9YrFyJ7Zm4s/TGby/O0M6/IuHu4mvpvXD4CDp64y4KN15MmRibnj2tK0/zxyZsvA+hn2gZXJaGT9jsP89Nu5yPU2eqsMR85cixxF/3nyKoc2fMjpv29x6q9bqdpHSNbEVAW4qLW+DKCUWgf4AlEn2n2BCRHPNwGzlVJKa30sSpszgKdSykNrHe/oQulY5isTopRKB+TSWl9JqO2jZ7akv4GT83I3OjqEFGEJc9ye8ZRy/cGzhBulQVUaxfj17BJCjs3+z/n0zpPQROecvJk9emKfjn1uodZ6IYBS6n2grta6e8TrDsBrWut+zxsrpU5HtLkZ8fpSRJv7Udq8D/TSWtdOKJ5/daae1voZkGAyFkKI1JaUo1gikm9ck/mxrejFZB9vG6VUaezTGIk6Ms2pj0MWQoikSsbzQm4CBaK8zg/cjquNUsqEfZr3YcTr/NiPSuuotb6UmNglIQshXEoyHod8CCimlCqslHIHWgNbX2izFftOO4D3gd1aa62Uygx8D4zSWv+a2NglIQshXEpynamntbYC/YCdwDlgg9b6jFJqklLq+QV2lgDZlFIXgcHA88n9fkBRYKxS6njEI8HrAPyrnXpJITv10g7ZqZd2yE69uAUEWxOdc3J4m5zqNBK5/KYQwqU4VYZNIknIQgiXYnDGi1QkkiRkIYRLScP5WHbqCSGEs5ARshDCpaTlEbIkZCGES3HGC88nliRkIYRLkRGyEEI4CUnIQgjhJGTKQgghnISMkIUQwkmk4XwsCVkI4WLScEaWhCyEcClp+dTpFL/aW2pSSvk9v/2KK3HFfrlin8A1++WKfXJWrnbqtF/CTdIkV+yXK/YJXLNfrtgnp+RqCVkIIdIsSchCCOEkXC0hu+o8lyv2yxX7BK7ZL1fsk1NyqZ16QgiRlrnaCFkIIdIsl0jISqmlSil/pdRpR8eSXJRSBePgQ+sAAAJGSURBVJRSe5RS55RSZ5RSAx0dU3JQSnkqpQ4qpU5E9Guio2NKLkopo1LqmFLqO0fHklyUUleVUqci7pp82NHxuDqXmLJQStUAgoH/a9/uWZyI4iiMP6fYQqJiIxJMsZ2NhcqyTToR8WURSwutLC0UC8EvIX4AbUTRZrUSREFFFtYXsqwWrqWFKAQR0W31WGS+QSbcyfX8YMike9L8uXPn5o7tg6V72iCpD/Rtb0jaBYyAs7Y/Fk6biiQBPdvbkhaANeCy7deF06Ym6SqwBOy2vVK6pw2SPgNLtr+XbvkfVLFCtv0K+FG6o022v9neaO5/A1vA/rJV0/PEdvN1obnmflUgaQCcBm6Vbon5VcVArp2kReAw8KZsSTuaR/tNYAw8s13D77oJXAP+lg5pmYGnkkaS8geRGctA7jhJO4FV4IrtX6V72mD7j+1DwABYljTX20ySVoCx7VHplhkY2j4CnAQuNduDMSMZyB3W7LGuAvdsPyzd0zbbP4GXwInCKdMaAmea/dYHwFFJd8smtcP21+ZzDDwClssW1S0DuaOal1+3gS3bN0r3tEXSXkl7mvsdwDHgU9mq6di+bntgexE4Bzy3fb5w1tQk9ZoXykjqAceBak4ydVEVA1nSfWAdOCDpi6SLpZtaMAQuMFltbTbXqdJRLegDLyR9AN4x2UOu5phYZfYBa5LeA2+Bx7afFG6qWhXH3iIialDFCjkiogYZyBERHZGBHBHRERnIEREdkYEcEdERGcgRER2RgRwR0REZyBERHfEPDkQ0fjPnGGcAAAAASUVORK5CYII=\n",
   1096       "text/plain": [
   1097        "<Figure size 432x288 with 2 Axes>"
   1098       ]
   1099      },
   1100      "metadata": {
   1101       "needs_background": "light"
   1102      },
   1103      "output_type": "display_data"
   1104     }
   1105    ],
   1106    "source": [
   1107     "sns.heatmap(pd.DataFrame(cm/np.sum(cm), \n",
   1108     "                         index=stars, \n",
   1109     "                         columns=stars), \n",
   1110     "            annot=True, \n",
   1111     "            cmap='Blues', \n",
   1112     "            fmt='.1%')"
   1113    ]
   1114   },
   1115   {
   1116    "cell_type": "code",
   1117    "execution_count": 81,
   1118    "metadata": {
   1119     "ExecuteTime": {
   1120      "end_time": "2018-12-28T02:45:12.284710Z",
   1121      "start_time": "2018-12-28T02:45:12.277127Z"
   1122     }
   1123    },
   1124    "outputs": [
   1125     {
   1126      "data": {
   1127       "text/plain": [
   1128        "0.44955063467061984"
   1129       ]
   1130      },
   1131      "execution_count": 81,
   1132      "metadata": {},
   1133      "output_type": "execute_result"
   1134     }
   1135    ],
   1136    "source": [
   1137     "accuracy_score(y_true=y_test, y_pred=y_pred)"
   1138    ]
   1139   },
   1140   {
   1141    "cell_type": "code",
   1142    "execution_count": 36,
   1143    "metadata": {
   1144     "ExecuteTime": {
   1145      "end_time": "2018-12-28T02:30:21.727266Z",
   1146      "start_time": "2018-12-28T02:30:21.428123Z"
   1147     }
   1148    },
   1149    "outputs": [
   1150     {
   1151      "data": {
   1152       "text/plain": [
   1153        "0.8614708105573701"
   1154       ]
   1155      },
   1156      "execution_count": 36,
   1157      "metadata": {},
   1158      "output_type": "execute_result"
   1159     }
   1160    ],
   1161    "source": [
   1162     "roc_auc_score(y_score=lgb_model.predict(X_test), y_true=y_test)"
   1163    ]
   1164   },
   1165   {
   1166    "cell_type": "code",
   1167    "execution_count": 55,
   1168    "metadata": {
   1169     "ExecuteTime": {
   1170      "end_time": "2018-12-27T23:56:19.836967Z",
   1171      "start_time": "2018-12-27T23:56:19.660296Z"
   1172     }
   1173    },
   1174    "outputs": [
   1175     {
   1176      "data": {
   1177       "text/html": [
   1178        "<div>\n",
   1179        "<style scoped>\n",
   1180        "    .dataframe tbody tr th:only-of-type {\n",
   1181        "        vertical-align: middle;\n",
   1182        "    }\n",
   1183        "\n",
   1184        "    .dataframe tbody tr th {\n",
   1185        "        vertical-align: top;\n",
   1186        "    }\n",
   1187        "\n",
   1188        "    .dataframe thead th {\n",
   1189        "        text-align: right;\n",
   1190        "    }\n",
   1191        "</style>\n",
   1192        "<table border=\"1\" class=\"dataframe\">\n",
   1193        "  <thead>\n",
   1194        "    <tr style=\"text-align: right;\">\n",
   1195        "      <th></th>\n",
   1196        "      <th>0</th>\n",
   1197        "      <th>1</th>\n",
   1198        "      <th>2</th>\n",
   1199        "    </tr>\n",
   1200        "  </thead>\n",
   1201        "  <tbody>\n",
   1202        "    <tr>\n",
   1203        "      <th>count</th>\n",
   1204        "      <td>139717.000000</td>\n",
   1205        "      <td>139717.000000</td>\n",
   1206        "      <td>139717.000000</td>\n",
   1207        "    </tr>\n",
   1208        "    <tr>\n",
   1209        "      <th>mean</th>\n",
   1210        "      <td>0.630986</td>\n",
   1211        "      <td>0.209083</td>\n",
   1212        "      <td>0.159931</td>\n",
   1213        "    </tr>\n",
   1214        "    <tr>\n",
   1215        "      <th>std</th>\n",
   1216        "      <td>0.007147</td>\n",
   1217        "      <td>0.005648</td>\n",
   1218        "      <td>0.004706</td>\n",
   1219        "    </tr>\n",
   1220        "    <tr>\n",
   1221        "      <th>min</th>\n",
   1222        "      <td>0.502827</td>\n",
   1223        "      <td>0.152388</td>\n",
   1224        "      <td>0.110754</td>\n",
   1225        "    </tr>\n",
   1226        "    <tr>\n",
   1227        "      <th>25%</th>\n",
   1228        "      <td>0.629275</td>\n",
   1229        "      <td>0.206945</td>\n",
   1230        "      <td>0.158686</td>\n",
   1231        "    </tr>\n",
   1232        "    <tr>\n",
   1233        "      <th>50%</th>\n",
   1234        "      <td>0.630822</td>\n",
   1235        "      <td>0.208772</td>\n",
   1236        "      <td>0.160465</td>\n",
   1237        "    </tr>\n",
   1238        "    <tr>\n",
   1239        "      <th>75%</th>\n",
   1240        "      <td>0.632655</td>\n",
   1241        "      <td>0.210202</td>\n",
   1242        "      <td>0.161593</td>\n",
   1243        "    </tr>\n",
   1244        "    <tr>\n",
   1245        "      <th>max</th>\n",
   1246        "      <td>0.700247</td>\n",
   1247        "      <td>0.337525</td>\n",
   1248        "      <td>0.262446</td>\n",
   1249        "    </tr>\n",
   1250        "  </tbody>\n",
   1251        "</table>\n",
   1252        "</div>"
   1253       ],
   1254       "text/plain": [
   1255        "                   0              1              2\n",
   1256        "count  139717.000000  139717.000000  139717.000000\n",
   1257        "mean        0.630986       0.209083       0.159931\n",
   1258        "std         0.007147       0.005648       0.004706\n",
   1259        "min         0.502827       0.152388       0.110754\n",
   1260        "25%         0.629275       0.206945       0.158686\n",
   1261        "50%         0.630822       0.208772       0.160465\n",
   1262        "75%         0.632655       0.210202       0.161593\n",
   1263        "max         0.700247       0.337525       0.262446"
   1264       ]
   1265      },
   1266      "execution_count": 55,
   1267      "metadata": {},
   1268      "output_type": "execute_result"
   1269     }
   1270    ],
   1271    "source": [
   1272     "pd.DataFrame(lgb_model.predict(X_test)).describe()"
   1273    ]
   1274   },
   1275   {
   1276    "cell_type": "markdown",
   1277    "metadata": {},
   1278    "source": [
   1279     "## Random Forest"
   1280    ]
   1281   },
   1282   {
   1283    "cell_type": "code",
   1284    "execution_count": 28,
   1285    "metadata": {
   1286     "ExecuteTime": {
   1287      "end_time": "2018-12-27T23:40:48.817474Z",
   1288      "start_time": "2018-12-27T23:35:53.942018Z"
   1289     }
   1290    },
   1291    "outputs": [
   1292     {
   1293      "name": "stdout",
   1294      "output_type": "stream",
   1295      "text": [
   1296       "Accuracy: 63.39%\n"
   1297      ]
   1298     }
   1299    ],
   1300    "source": [
   1301     "rf = RandomForestClassifier(n_jobs=-1,  \n",
   1302     "                            n_estimators=100,\n",
   1303     "                            class_weight='balanced_subsample')\n",
   1304     "rf.fit(X_train, y_train)\n",
   1305     "y_pred = rf.predict(X_test)\n",
   1306     "print(f'Accuracy: {accuracy_score(y_true=y_test, y_pred=y_pred):.2%}')"
   1307    ]
   1308   },
   1309   {
   1310    "cell_type": "code",
   1311    "execution_count": 38,
   1312    "metadata": {
   1313     "ExecuteTime": {
   1314      "end_time": "2018-12-27T23:50:06.311958Z",
   1315      "start_time": "2018-12-27T23:50:04.657072Z"
   1316     }
   1317    },
   1318    "outputs": [],
   1319    "source": [
   1320     "y_pred_prob = rf.predict_proba(X_test)"
   1321    ]
   1322   },
   1323   {
   1324    "cell_type": "code",
   1325    "execution_count": 39,
   1326    "metadata": {
   1327     "ExecuteTime": {
   1328      "end_time": "2018-12-27T23:50:16.664162Z",
   1329      "start_time": "2018-12-27T23:50:16.598883Z"
   1330     }
   1331    },
   1332    "outputs": [
   1333     {
   1334      "data": {
   1335       "text/html": [
   1336        "<div>\n",
   1337        "<style scoped>\n",
   1338        "    .dataframe tbody tr th:only-of-type {\n",
   1339        "        vertical-align: middle;\n",
   1340        "    }\n",
   1341        "\n",
   1342        "    .dataframe tbody tr th {\n",
   1343        "        vertical-align: top;\n",
   1344        "    }\n",
   1345        "\n",
   1346        "    .dataframe thead th {\n",
   1347        "        text-align: right;\n",
   1348        "    }\n",
   1349        "</style>\n",
   1350        "<table border=\"1\" class=\"dataframe\">\n",
   1351        "  <thead>\n",
   1352        "    <tr style=\"text-align: right;\">\n",
   1353        "      <th></th>\n",
   1354        "      <th>0</th>\n",
   1355        "      <th>1</th>\n",
   1356        "      <th>2</th>\n",
   1357        "    </tr>\n",
   1358        "  </thead>\n",
   1359        "  <tbody>\n",
   1360        "    <tr>\n",
   1361        "      <th>count</th>\n",
   1362        "      <td>139717.000000</td>\n",
   1363        "      <td>139717.000000</td>\n",
   1364        "      <td>139717.000000</td>\n",
   1365        "    </tr>\n",
   1366        "    <tr>\n",
   1367        "      <th>mean</th>\n",
   1368        "      <td>0.635541</td>\n",
   1369        "      <td>0.207648</td>\n",
   1370        "      <td>0.156812</td>\n",
   1371        "    </tr>\n",
   1372        "    <tr>\n",
   1373        "      <th>std</th>\n",
   1374        "      <td>0.048588</td>\n",
   1375        "      <td>0.041110</td>\n",
   1376        "      <td>0.036620</td>\n",
   1377        "    </tr>\n",
   1378        "    <tr>\n",
   1379        "      <th>min</th>\n",
   1380        "      <td>0.190000</td>\n",
   1381        "      <td>0.050000</td>\n",
   1382        "      <td>0.030000</td>\n",
   1383        "    </tr>\n",
   1384        "    <tr>\n",
   1385        "      <th>25%</th>\n",
   1386        "      <td>0.600000</td>\n",
   1387        "      <td>0.180000</td>\n",
   1388        "      <td>0.130000</td>\n",
   1389        "    </tr>\n",
   1390        "    <tr>\n",
   1391        "      <th>50%</th>\n",
   1392        "      <td>0.640000</td>\n",
   1393        "      <td>0.210000</td>\n",
   1394        "      <td>0.160000</td>\n",
   1395        "    </tr>\n",
   1396        "    <tr>\n",
   1397        "      <th>75%</th>\n",
   1398        "      <td>0.670000</td>\n",
   1399        "      <td>0.230000</td>\n",
   1400        "      <td>0.180000</td>\n",
   1401        "    </tr>\n",
   1402        "    <tr>\n",
   1403        "      <th>max</th>\n",
   1404        "      <td>0.860000</td>\n",
   1405        "      <td>0.740000</td>\n",
   1406        "      <td>0.340000</td>\n",
   1407        "    </tr>\n",
   1408        "  </tbody>\n",
   1409        "</table>\n",
   1410        "</div>"
   1411       ],
   1412       "text/plain": [
   1413        "                   0              1              2\n",
   1414        "count  139717.000000  139717.000000  139717.000000\n",
   1415        "mean        0.635541       0.207648       0.156812\n",
   1416        "std         0.048588       0.041110       0.036620\n",
   1417        "min         0.190000       0.050000       0.030000\n",
   1418        "25%         0.600000       0.180000       0.130000\n",
   1419        "50%         0.640000       0.210000       0.160000\n",
   1420        "75%         0.670000       0.230000       0.180000\n",
   1421        "max         0.860000       0.740000       0.340000"
   1422       ]
   1423      },
   1424      "execution_count": 39,
   1425      "metadata": {},
   1426      "output_type": "execute_result"
   1427     }
   1428    ],
   1429    "source": [
   1430     "pd.DataFrame(y_pred_prob).describe()"
   1431    ]
   1432   },
   1433   {
   1434    "cell_type": "code",
   1435    "execution_count": 36,
   1436    "metadata": {
   1437     "ExecuteTime": {
   1438      "end_time": "2018-12-27T23:49:06.348646Z",
   1439      "start_time": "2018-12-27T23:49:06.325988Z"
   1440     }
   1441    },
   1442    "outputs": [
   1443     {
   1444      "data": {
   1445       "text/plain": [
   1446        "0    139715\n",
   1447        "1         2\n",
   1448        "dtype: int64"
   1449       ]
   1450      },
   1451      "execution_count": 36,
   1452      "metadata": {},
   1453      "output_type": "execute_result"
   1454     }
   1455    ],
   1456    "source": [
   1457     "pd.Series(y_pred).value_counts()"
   1458    ]
   1459   },
   1460   {
   1461    "cell_type": "code",
   1462    "execution_count": 32,
   1463    "metadata": {
   1464     "ExecuteTime": {
   1465      "end_time": "2018-12-27T23:47:01.765414Z",
   1466      "start_time": "2018-12-27T23:47:01.758140Z"
   1467     }
   1468    },
   1469    "outputs": [
   1470     {
   1471      "data": {
   1472       "text/plain": [
   1473        "0    354263\n",
   1474        "1    116809\n",
   1475        "2     87796\n",
   1476        "dtype: int64"
   1477       ]
   1478      },
   1479      "execution_count": 32,
   1480      "metadata": {},
   1481      "output_type": "execute_result"
   1482     }
   1483    ],
   1484    "source": [
   1485     "pd.Series(y_train).value_counts()"
   1486    ]
   1487   },
   1488   {
   1489    "cell_type": "code",
   1490    "execution_count": 33,
   1491    "metadata": {
   1492     "ExecuteTime": {
   1493      "end_time": "2018-12-27T23:47:24.456819Z",
   1494      "start_time": "2018-12-27T23:47:24.444497Z"
   1495     }
   1496    },
   1497    "outputs": [
   1498     {
   1499      "data": {
   1500       "text/plain": [
   1501        "0.6338956605137528"
   1502       ]
   1503      },
   1504      "execution_count": 33,
   1505      "metadata": {},
   1506      "output_type": "execute_result"
   1507     }
   1508    ],
   1509    "source": [
   1510     "(y_test == 0).mean()"
   1511    ]
   1512   },
   1513   {
   1514    "cell_type": "code",
   1515    "execution_count": 29,
   1516    "metadata": {
   1517     "ExecuteTime": {
   1518      "end_time": "2018-12-27T23:40:48.919212Z",
   1519      "start_time": "2018-12-27T23:40:48.819056Z"
   1520     }
   1521    },
   1522    "outputs": [
   1523     {
   1524      "data": {
   1525       "text/plain": [
   1526        "array([[88564,     2,     0],\n",
   1527        "       [29202,     0,     0],\n",
   1528        "       [21949,     0,     0]])"
   1529       ]
   1530      },
   1531      "execution_count": 29,
   1532      "metadata": {},
   1533      "output_type": "execute_result"
   1534     }
   1535    ],
   1536    "source": [
   1537     "confusion_matrix(y_true=y_test, y_pred=y_pred)"
   1538    ]
   1539   },
   1540   {
   1541    "cell_type": "markdown",
   1542    "metadata": {},
   1543    "source": [
   1544     "## Logistic Regression"
   1545    ]
   1546   },
   1547   {
   1548    "cell_type": "markdown",
   1549    "metadata": {},
   1550    "source": [
   1551     "### Binary Classification"
   1552    ]
   1553   },
   1554   {
   1555    "cell_type": "code",
   1556    "execution_count": 44,
   1557    "metadata": {
   1558     "ExecuteTime": {
   1559      "end_time": "2018-12-28T01:29:09.839140Z",
   1560      "start_time": "2018-12-28T01:29:04.044264Z"
   1561     }
   1562    },
   1563    "outputs": [
   1564     {
   1565      "name": "stdout",
   1566      "output_type": "stream",
   1567      "text": [
   1568       "Accuracy: 50.05%\n"
   1569      ]
   1570     }
   1571    ],
   1572    "source": [
   1573     "lr = LogisticRegression()\n",
   1574     "lr.fit(X_train, y_train)\n",
   1575     "y_pred = lr.predict(X_test)\n",
   1576     "print(f'Accuracy: {accuracy_score(y_true=y_test, y_pred=y_pred):.2%}')"
   1577    ]
   1578   },
   1579   {
   1580    "cell_type": "markdown",
   1581    "metadata": {},
   1582    "source": [
   1583     "### Multinomial Classification"
   1584    ]
   1585   },
   1586   {
   1587    "cell_type": "code",
   1588    "execution_count": 100,
   1589    "metadata": {
   1590     "ExecuteTime": {
   1591      "end_time": "2018-12-28T04:17:30.953069Z",
   1592      "start_time": "2018-12-28T04:17:16.299083Z"
   1593     }
   1594    },
   1595    "outputs": [
   1596     {
   1597      "name": "stdout",
   1598      "output_type": "stream",
   1599      "text": [
   1600       "Accuracy: 34.60%\n"
   1601      ]
   1602     }
   1603    ],
   1604    "source": [
   1605     "lr = LogisticRegression(multi_class='multinomial', solver='lbfgs', class_weight='balanced')\n",
   1606     "lr.fit(X_train, y_train)\n",
   1607     "y_pred = lr.predict(X_test)\n",
   1608     "print(f'Accuracy: {accuracy_score(y_true=y_test, y_pred=y_pred):.2%}')"
   1609    ]
   1610   },
   1611   {
   1612    "cell_type": "code",
   1613    "execution_count": 101,
   1614    "metadata": {
   1615     "ExecuteTime": {
   1616      "end_time": "2018-12-28T04:18:01.235296Z",
   1617      "start_time": "2018-12-28T04:18:01.179436Z"
   1618     }
   1619    },
   1620    "outputs": [
   1621     {
   1622      "data": {
   1623       "text/plain": [
   1624        "array([[ 7624,  3105,  1792,  2075,  4929],\n",
   1625        "       [ 4957,  5365,  3126,  2027,  4111],\n",
   1626        "       [ 3142,  3631,  4546,  3171,  5017],\n",
   1627        "       [ 2301,  1756,  3307,  4580,  7430],\n",
   1628        "       [ 1742,   888,  1363,  3653, 11499]])"
   1629       ]
   1630      },
   1631      "execution_count": 101,
   1632      "metadata": {},
   1633      "output_type": "execute_result"
   1634     }
   1635    ],
   1636    "source": [
   1637     "confusion_matrix(y_true=y_test, y_pred=y_pred)"
   1638    ]
   1639   },
   1640   {
   1641    "cell_type": "code",
   1642    "execution_count": null,
   1643    "metadata": {},
   1644    "outputs": [],
   1645    "source": []
   1646   }
   1647  ],
   1648  "metadata": {
   1649   "kernelspec": {
   1650    "display_name": "Python 3",
   1651    "language": "python",
   1652    "name": "python3"
   1653   },
   1654   "language_info": {
   1655    "codemirror_mode": {
   1656     "name": "ipython",
   1657     "version": 3
   1658    },
   1659    "file_extension": ".py",
   1660    "mimetype": "text/x-python",
   1661    "name": "python",
   1662    "nbconvert_exporter": "python",
   1663    "pygments_lexer": "ipython3",
   1664    "version": "3.6.8"
   1665   },
   1666   "toc": {
   1667    "base_numbering": 1,
   1668    "nav_menu": {},
   1669    "number_sections": true,
   1670    "sideBar": true,
   1671    "skip_h1_title": false,
   1672    "title_cell": "Table of Contents",
   1673    "title_sidebar": "Contents",
   1674    "toc_cell": false,
   1675    "toc_position": {},
   1676    "toc_section_display": true,
   1677    "toc_window_display": false
   1678   }
   1679  },
   1680  "nbformat": 4,
   1681  "nbformat_minor": 2
   1682 }