ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
lab_105.ipynb

(28253B)
      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "# Deviations from Normality\n",
      8     "\n",
      9     "_(plus python functions with default parameters plus a quick example of recursive functions)_\n",
     10     "\n",
     11     "Today, we'll develop the code for skewness and kurtosis even though these are already available in the `scipy.stats` module, and then we'll apply them to hedge fund index returns.\n",
     12     "\n",
     13     "We'll also look at using `scipy.stats` module to apply the _Jarque-Bera_ test for normality, and apply them to different return series.\n",
     14     "\n",
     15     "First, add the following code to our `edhec_risk_kit.py`\n",
     16     "\n",
     17     "```python\n",
     18     "def get_hfi_returns():\n",
     19     "    \"\"\"\n",
     20     "    Load and format the EDHEC Hedge Fund Index Returns\n",
     21     "    \"\"\"\n",
     22     "    hfi = pd.read_csv(\"data/edhec-hedgefundindices.csv\",\n",
     23     "                      header=0, index_col=0, parse_dates=True)\n",
     24     "    hfi = hfi/100\n",
     25     "    hfi.index = hfi.index.to_period('M')\n",
     26     "    return hfi\n",
     27     "```\n"
     28    ]
     29   },
     30   {
     31    "cell_type": "code",
     32    "execution_count": 1,
     33    "metadata": {},
     34    "outputs": [
     35     {
     36      "data": {
     37       "text/html": [
     38        "<div>\n",
     39        "<style scoped>\n",
     40        "    .dataframe tbody tr th:only-of-type {\n",
     41        "        vertical-align: middle;\n",
     42        "    }\n",
     43        "\n",
     44        "    .dataframe tbody tr th {\n",
     45        "        vertical-align: top;\n",
     46        "    }\n",
     47        "\n",
     48        "    .dataframe thead th {\n",
     49        "        text-align: right;\n",
     50        "    }\n",
     51        "</style>\n",
     52        "<table border=\"1\" class=\"dataframe\">\n",
     53        "  <thead>\n",
     54        "    <tr style=\"text-align: right;\">\n",
     55        "      <th></th>\n",
     56        "      <th>Convertible Arbitrage</th>\n",
     57        "      <th>CTA Global</th>\n",
     58        "      <th>Distressed Securities</th>\n",
     59        "      <th>Emerging Markets</th>\n",
     60        "      <th>Equity Market Neutral</th>\n",
     61        "      <th>Event Driven</th>\n",
     62        "      <th>Fixed Income Arbitrage</th>\n",
     63        "      <th>Global Macro</th>\n",
     64        "      <th>Long/Short Equity</th>\n",
     65        "      <th>Merger Arbitrage</th>\n",
     66        "      <th>Relative Value</th>\n",
     67        "      <th>Short Selling</th>\n",
     68        "      <th>Funds Of Funds</th>\n",
     69        "    </tr>\n",
     70        "    <tr>\n",
     71        "      <th>date</th>\n",
     72        "      <th></th>\n",
     73        "      <th></th>\n",
     74        "      <th></th>\n",
     75        "      <th></th>\n",
     76        "      <th></th>\n",
     77        "      <th></th>\n",
     78        "      <th></th>\n",
     79        "      <th></th>\n",
     80        "      <th></th>\n",
     81        "      <th></th>\n",
     82        "      <th></th>\n",
     83        "      <th></th>\n",
     84        "      <th></th>\n",
     85        "    </tr>\n",
     86        "  </thead>\n",
     87        "  <tbody>\n",
     88        "    <tr>\n",
     89        "      <th>1997-01</th>\n",
     90        "      <td>0.0119</td>\n",
     91        "      <td>0.0393</td>\n",
     92        "      <td>0.0178</td>\n",
     93        "      <td>0.0791</td>\n",
     94        "      <td>0.0189</td>\n",
     95        "      <td>0.0213</td>\n",
     96        "      <td>0.0191</td>\n",
     97        "      <td>0.0573</td>\n",
     98        "      <td>0.0281</td>\n",
     99        "      <td>0.0150</td>\n",
    100        "      <td>0.0180</td>\n",
    101        "      <td>-0.0166</td>\n",
    102        "      <td>0.0317</td>\n",
    103        "    </tr>\n",
    104        "    <tr>\n",
    105        "      <th>1997-02</th>\n",
    106        "      <td>0.0123</td>\n",
    107        "      <td>0.0298</td>\n",
    108        "      <td>0.0122</td>\n",
    109        "      <td>0.0525</td>\n",
    110        "      <td>0.0101</td>\n",
    111        "      <td>0.0084</td>\n",
    112        "      <td>0.0122</td>\n",
    113        "      <td>0.0175</td>\n",
    114        "      <td>-0.0006</td>\n",
    115        "      <td>0.0034</td>\n",
    116        "      <td>0.0118</td>\n",
    117        "      <td>0.0426</td>\n",
    118        "      <td>0.0106</td>\n",
    119        "    </tr>\n",
    120        "    <tr>\n",
    121        "      <th>1997-03</th>\n",
    122        "      <td>0.0078</td>\n",
    123        "      <td>-0.0021</td>\n",
    124        "      <td>-0.0012</td>\n",
    125        "      <td>-0.0120</td>\n",
    126        "      <td>0.0016</td>\n",
    127        "      <td>-0.0023</td>\n",
    128        "      <td>0.0109</td>\n",
    129        "      <td>-0.0119</td>\n",
    130        "      <td>-0.0084</td>\n",
    131        "      <td>0.0060</td>\n",
    132        "      <td>0.0010</td>\n",
    133        "      <td>0.0778</td>\n",
    134        "      <td>-0.0077</td>\n",
    135        "    </tr>\n",
    136        "    <tr>\n",
    137        "      <th>1997-04</th>\n",
    138        "      <td>0.0086</td>\n",
    139        "      <td>-0.0170</td>\n",
    140        "      <td>0.0030</td>\n",
    141        "      <td>0.0119</td>\n",
    142        "      <td>0.0119</td>\n",
    143        "      <td>-0.0005</td>\n",
    144        "      <td>0.0130</td>\n",
    145        "      <td>0.0172</td>\n",
    146        "      <td>0.0084</td>\n",
    147        "      <td>-0.0001</td>\n",
    148        "      <td>0.0122</td>\n",
    149        "      <td>-0.0129</td>\n",
    150        "      <td>0.0009</td>\n",
    151        "    </tr>\n",
    152        "    <tr>\n",
    153        "      <th>1997-05</th>\n",
    154        "      <td>0.0156</td>\n",
    155        "      <td>-0.0015</td>\n",
    156        "      <td>0.0233</td>\n",
    157        "      <td>0.0315</td>\n",
    158        "      <td>0.0189</td>\n",
    159        "      <td>0.0346</td>\n",
    160        "      <td>0.0118</td>\n",
    161        "      <td>0.0108</td>\n",
    162        "      <td>0.0394</td>\n",
    163        "      <td>0.0197</td>\n",
    164        "      <td>0.0173</td>\n",
    165        "      <td>-0.0737</td>\n",
    166        "      <td>0.0275</td>\n",
    167        "    </tr>\n",
    168        "  </tbody>\n",
    169        "</table>\n",
    170        "</div>"
    171       ],
    172       "text/plain": [
    173        "         Convertible Arbitrage  CTA Global  Distressed Securities  \\\n",
    174        "date                                                                \n",
    175        "1997-01                 0.0119      0.0393                 0.0178   \n",
    176        "1997-02                 0.0123      0.0298                 0.0122   \n",
    177        "1997-03                 0.0078     -0.0021                -0.0012   \n",
    178        "1997-04                 0.0086     -0.0170                 0.0030   \n",
    179        "1997-05                 0.0156     -0.0015                 0.0233   \n",
    180        "\n",
    181        "         Emerging Markets  Equity Market Neutral  Event Driven  \\\n",
    182        "date                                                             \n",
    183        "1997-01            0.0791                 0.0189        0.0213   \n",
    184        "1997-02            0.0525                 0.0101        0.0084   \n",
    185        "1997-03           -0.0120                 0.0016       -0.0023   \n",
    186        "1997-04            0.0119                 0.0119       -0.0005   \n",
    187        "1997-05            0.0315                 0.0189        0.0346   \n",
    188        "\n",
    189        "         Fixed Income Arbitrage  Global Macro  Long/Short Equity  \\\n",
    190        "date                                                               \n",
    191        "1997-01                  0.0191        0.0573             0.0281   \n",
    192        "1997-02                  0.0122        0.0175            -0.0006   \n",
    193        "1997-03                  0.0109       -0.0119            -0.0084   \n",
    194        "1997-04                  0.0130        0.0172             0.0084   \n",
    195        "1997-05                  0.0118        0.0108             0.0394   \n",
    196        "\n",
    197        "         Merger Arbitrage  Relative Value  Short Selling  Funds Of Funds  \n",
    198        "date                                                                      \n",
    199        "1997-01            0.0150          0.0180        -0.0166          0.0317  \n",
    200        "1997-02            0.0034          0.0118         0.0426          0.0106  \n",
    201        "1997-03            0.0060          0.0010         0.0778         -0.0077  \n",
    202        "1997-04           -0.0001          0.0122        -0.0129          0.0009  \n",
    203        "1997-05            0.0197          0.0173        -0.0737          0.0275  "
    204       ]
    205      },
    206      "execution_count": 1,
    207      "metadata": {},
    208      "output_type": "execute_result"
    209     }
    210    ],
    211    "source": [
    212     "%load_ext autoreload\n",
    213     "%autoreload 2\n",
    214     "\n",
    215     "import pandas as pd\n",
    216     "import edhec_risk_kit_105 as erk\n",
    217     "hfi = erk.get_hfi_returns()\n",
    218     "hfi.head()"
    219    ]
    220   },
    221   {
    222    "cell_type": "markdown",
    223    "metadata": {},
    224    "source": [
    225     "## Skewness\n",
    226     "\n",
    227     "Intuitively, a negative skew means that you get more negative returns than you would have expected if the returns were distributed like the normal distribution.\n",
    228     "\n",
    229     "Another way of thinking about it is if that returns are normally distributed, the mean and the median would be very close.\n",
    230     "\n",
    231     "However, if they are negatively skewed, the expected value i.e. the mean is less than the median. If they are positively skewed, the expected value (again, the mean) is greater than the median."
    232    ]
    233   },
    234   {
    235    "cell_type": "code",
    236    "execution_count": 2,
    237    "metadata": {},
    238    "outputs": [
    239     {
    240      "data": {
    241       "text/html": [
    242        "<div>\n",
    243        "<style scoped>\n",
    244        "    .dataframe tbody tr th:only-of-type {\n",
    245        "        vertical-align: middle;\n",
    246        "    }\n",
    247        "\n",
    248        "    .dataframe tbody tr th {\n",
    249        "        vertical-align: top;\n",
    250        "    }\n",
    251        "\n",
    252        "    .dataframe thead th {\n",
    253        "        text-align: right;\n",
    254        "    }\n",
    255        "</style>\n",
    256        "<table border=\"1\" class=\"dataframe\">\n",
    257        "  <thead>\n",
    258        "    <tr style=\"text-align: right;\">\n",
    259        "      <th></th>\n",
    260        "      <th>0</th>\n",
    261        "      <th>1</th>\n",
    262        "      <th>2</th>\n",
    263        "    </tr>\n",
    264        "  </thead>\n",
    265        "  <tbody>\n",
    266        "    <tr>\n",
    267        "      <th>Convertible Arbitrage</th>\n",
    268        "      <td>0.005508</td>\n",
    269        "      <td>0.0065</td>\n",
    270        "      <td>False</td>\n",
    271        "    </tr>\n",
    272        "    <tr>\n",
    273        "      <th>CTA Global</th>\n",
    274        "      <td>0.004074</td>\n",
    275        "      <td>0.0014</td>\n",
    276        "      <td>True</td>\n",
    277        "    </tr>\n",
    278        "    <tr>\n",
    279        "      <th>Distressed Securities</th>\n",
    280        "      <td>0.006946</td>\n",
    281        "      <td>0.0089</td>\n",
    282        "      <td>False</td>\n",
    283        "    </tr>\n",
    284        "    <tr>\n",
    285        "      <th>Emerging Markets</th>\n",
    286        "      <td>0.006253</td>\n",
    287        "      <td>0.0096</td>\n",
    288        "      <td>False</td>\n",
    289        "    </tr>\n",
    290        "    <tr>\n",
    291        "      <th>Equity Market Neutral</th>\n",
    292        "      <td>0.004498</td>\n",
    293        "      <td>0.0051</td>\n",
    294        "      <td>False</td>\n",
    295        "    </tr>\n",
    296        "    <tr>\n",
    297        "      <th>Event Driven</th>\n",
    298        "      <td>0.006344</td>\n",
    299        "      <td>0.0084</td>\n",
    300        "      <td>False</td>\n",
    301        "    </tr>\n",
    302        "    <tr>\n",
    303        "      <th>Fixed Income Arbitrage</th>\n",
    304        "      <td>0.004365</td>\n",
    305        "      <td>0.0055</td>\n",
    306        "      <td>False</td>\n",
    307        "    </tr>\n",
    308        "    <tr>\n",
    309        "      <th>Global Macro</th>\n",
    310        "      <td>0.005403</td>\n",
    311        "      <td>0.0038</td>\n",
    312        "      <td>True</td>\n",
    313        "    </tr>\n",
    314        "    <tr>\n",
    315        "      <th>Long/Short Equity</th>\n",
    316        "      <td>0.006331</td>\n",
    317        "      <td>0.0079</td>\n",
    318        "      <td>False</td>\n",
    319        "    </tr>\n",
    320        "    <tr>\n",
    321        "      <th>Merger Arbitrage</th>\n",
    322        "      <td>0.005356</td>\n",
    323        "      <td>0.0060</td>\n",
    324        "      <td>False</td>\n",
    325        "    </tr>\n",
    326        "    <tr>\n",
    327        "      <th>Relative Value</th>\n",
    328        "      <td>0.005792</td>\n",
    329        "      <td>0.0067</td>\n",
    330        "      <td>False</td>\n",
    331        "    </tr>\n",
    332        "    <tr>\n",
    333        "      <th>Short Selling</th>\n",
    334        "      <td>-0.001701</td>\n",
    335        "      <td>-0.0053</td>\n",
    336        "      <td>True</td>\n",
    337        "    </tr>\n",
    338        "    <tr>\n",
    339        "      <th>Funds Of Funds</th>\n",
    340        "      <td>0.004262</td>\n",
    341        "      <td>0.0052</td>\n",
    342        "      <td>False</td>\n",
    343        "    </tr>\n",
    344        "  </tbody>\n",
    345        "</table>\n",
    346        "</div>"
    347       ],
    348       "text/plain": [
    349        "                               0       1      2\n",
    350        "Convertible Arbitrage   0.005508  0.0065  False\n",
    351        "CTA Global              0.004074  0.0014   True\n",
    352        "Distressed Securities   0.006946  0.0089  False\n",
    353        "Emerging Markets        0.006253  0.0096  False\n",
    354        "Equity Market Neutral   0.004498  0.0051  False\n",
    355        "Event Driven            0.006344  0.0084  False\n",
    356        "Fixed Income Arbitrage  0.004365  0.0055  False\n",
    357        "Global Macro            0.005403  0.0038   True\n",
    358        "Long/Short Equity       0.006331  0.0079  False\n",
    359        "Merger Arbitrage        0.005356  0.0060  False\n",
    360        "Relative Value          0.005792  0.0067  False\n",
    361        "Short Selling          -0.001701 -0.0053   True\n",
    362        "Funds Of Funds          0.004262  0.0052  False"
    363       ]
    364      },
    365      "execution_count": 2,
    366      "metadata": {},
    367      "output_type": "execute_result"
    368     }
    369    ],
    370    "source": [
    371     "pd.concat([hfi.mean(), hfi.median(), hfi.mean()>hfi.median()], axis=1)"
    372    ]
    373   },
    374   {
    375    "cell_type": "markdown",
    376    "metadata": {},
    377    "source": [
    378     "Now, let's develop the code to compute the skewness of a series of numbers.\n",
    379     "\n",
    380     "Recall that the skewness is given by:\n",
    381     "\n",
    382     "$$ S(R) = \\frac{E[ (R-E(R))^3 ]}{\\sigma_R^3} $$\n"
    383    ]
    384   },
    385   {
    386    "cell_type": "code",
    387    "execution_count": 3,
    388    "metadata": {},
    389    "outputs": [],
    390    "source": [
    391     "    hfi.index = hfi.index.to_period('M')\n",
    392     "def skewness(r):\n",
    393     "    \"\"\"\n",
    394     "    Alternative to scipy.stats.skew()\n",
    395     "    Computes the skewness of the supplied Series or DataFrame\n",
    396     "    Returns a float or a Series\n",
    397     "    \"\"\"\n",
    398     "    demeaned_r = r - r.mean()\n",
    399     "    # use the population standard deviation, so set dof=0\n",
    400     "    sigma_r = r.std(ddof=0)\n",
    401     "    exp = (demeaned_r**3).mean()\n",
    402     "    return exp/sigma_r**3\n"
    403    ]
    404   },
    405   {
    406    "cell_type": "code",
    407    "execution_count": 4,
    408    "metadata": {},
    409    "outputs": [
    410     {
    411      "data": {
    412       "text/plain": [
    413        "Fixed Income Arbitrage   -3.940320\n",
    414        "Convertible Arbitrage    -2.639592\n",
    415        "Equity Market Neutral    -2.124435\n",
    416        "Relative Value           -1.815470\n",
    417        "Event Driven             -1.409154\n",
    418        "Merger Arbitrage         -1.320083\n",
    419        "Distressed Securities    -1.300842\n",
    420        "Emerging Markets         -1.167067\n",
    421        "Long/Short Equity        -0.390227\n",
    422        "Funds Of Funds           -0.361783\n",
    423        "CTA Global                0.173699\n",
    424        "Short Selling             0.767975\n",
    425        "Global Macro              0.982922\n",
    426        "dtype: float64"
    427       ]
    428      },
    429      "execution_count": 4,
    430      "metadata": {},
    431      "output_type": "execute_result"
    432     }
    433    ],
    434    "source": [
    435     "skewness(hfi).sort_values()"
    436    ]
    437   },
    438   {
    439    "cell_type": "markdown",
    440    "metadata": {},
    441    "source": [
    442     "Just to see if we get the same answer, let's use the skewness function that is built into `scipy.stats`"
    443    ]
    444   },
    445   {
    446    "cell_type": "code",
    447    "execution_count": 5,
    448    "metadata": {},
    449    "outputs": [
    450     {
    451      "data": {
    452       "text/plain": [
    453        "array([-2.63959223,  0.17369864, -1.30084204, -1.16706749, -2.12443538,\n",
    454        "       -1.40915356, -3.94032029,  0.98292188, -0.39022677, -1.32008333,\n",
    455        "       -1.81546975,  0.76797484, -0.36178308])"
    456       ]
    457      },
    458      "execution_count": 5,
    459      "metadata": {},
    460      "output_type": "execute_result"
    461     }
    462    ],
    463    "source": [
    464     "import scipy.stats\n",
    465     "scipy.stats.skew(hfi)"
    466    ]
    467   },
    468   {
    469    "cell_type": "markdown",
    470    "metadata": {},
    471    "source": [
    472     "So, let's add that to our `edhec_risk_kit.py`.\n",
    473     "\n",
    474     "Finally, let's look at the skewness that you would expect from a truly random sequence of returns. Let's use the random normal generator from numpy and generate the same number of returns as we have for the hedge fund data."
    475    ]
    476   },
    477   {
    478    "cell_type": "code",
    479    "execution_count": 6,
    480    "metadata": {},
    481    "outputs": [
    482     {
    483      "data": {
    484       "text/plain": [
    485        "(263, 13)"
    486       ]
    487      },
    488      "execution_count": 6,
    489      "metadata": {},
    490      "output_type": "execute_result"
    491     }
    492    ],
    493    "source": [
    494     "hfi.shape"
    495    ]
    496   },
    497   {
    498    "cell_type": "code",
    499    "execution_count": 7,
    500    "metadata": {},
    501    "outputs": [],
    502    "source": [
    503     "import numpy as np\n",
    504     "normal_rets = np.random.normal(0, 0.15, (263, 1))"
    505    ]
    506   },
    507   {
    508    "cell_type": "code",
    509    "execution_count": 8,
    510    "metadata": {},
    511    "outputs": [
    512     {
    513      "data": {
    514       "text/plain": [
    515        "(-0.0015807457835784748, 0.1506275384850896)"
    516       ]
    517      },
    518      "execution_count": 8,
    519      "metadata": {},
    520      "output_type": "execute_result"
    521     }
    522    ],
    523    "source": [
    524     "normal_rets.mean(), normal_rets.std()"
    525    ]
    526   },
    527   {
    528    "cell_type": "code",
    529    "execution_count": 9,
    530    "metadata": {},
    531    "outputs": [
    532     {
    533      "data": {
    534       "text/plain": [
    535        "-0.17547810773813705"
    536       ]
    537      },
    538      "execution_count": 9,
    539      "metadata": {},
    540      "output_type": "execute_result"
    541     }
    542    ],
    543    "source": [
    544     "erk.skewness(normal_rets)"
    545    ]
    546   },
    547   {
    548    "cell_type": "markdown",
    549    "metadata": {},
    550    "source": [
    551     "# Kurtosis\n",
    552     "\n",
    553     "Intuitively, the kurtosis measures the \"fatness\" of the tails of the distribution. The normal distribution has a kurtosis of 3 and so if the kurtosis of your returns is less than 3 then it tends to have thinner tails, and if the kurtosis is greater than 3 then the distribution has fatter tails.\n",
    554     "\n",
    555     "Kurtosis is given by:\n",
    556     "\n",
    557     "$$ K(R) = \\frac{E[ (R-E(R))^4 ]}{\\sigma_R^4} $$\n",
    558     "\n",
    559     "This is very similar to the skewness, so we can just copy and paste it and then edit it to compute the 4th rather than the 3rd power (as was the case for skewness).\n"
    560    ]
    561   },
    562   {
    563    "cell_type": "code",
    564    "execution_count": 10,
    565    "metadata": {},
    566    "outputs": [
    567     {
    568      "data": {
    569       "text/plain": [
    570        "Convertible Arbitrage     23.280834\n",
    571        "CTA Global                 2.952960\n",
    572        "Distressed Securities      7.889983\n",
    573        "Emerging Markets           9.250788\n",
    574        "Equity Market Neutral     17.218555\n",
    575        "Event Driven               8.035828\n",
    576        "Fixed Income Arbitrage    29.842199\n",
    577        "Global Macro               5.741679\n",
    578        "Long/Short Equity          4.523893\n",
    579        "Merger Arbitrage           8.738950\n",
    580        "Relative Value            12.121208\n",
    581        "Short Selling              6.117772\n",
    582        "Funds Of Funds             7.070153\n",
    583        "dtype: float64"
    584       ]
    585      },
    586      "execution_count": 10,
    587      "metadata": {},
    588      "output_type": "execute_result"
    589     }
    590    ],
    591    "source": [
    592     "erk.kurtosis(hfi)"
    593    ]
    594   },
    595   {
    596    "cell_type": "markdown",
    597    "metadata": {},
    598    "source": [
    599     "Let's compare it with `scipy.stats` ..."
    600    ]
    601   },
    602   {
    603    "cell_type": "code",
    604    "execution_count": 11,
    605    "metadata": {},
    606    "outputs": [
    607     {
    608      "data": {
    609       "text/plain": [
    610        "array([20.28083446, -0.04703963,  4.88998336,  6.25078841, 14.21855526,\n",
    611        "        5.03582817, 26.84219928,  2.74167945,  1.52389258,  5.73894979,\n",
    612        "        9.12120787,  3.11777175,  4.07015278])"
    613       ]
    614      },
    615      "execution_count": 11,
    616      "metadata": {},
    617      "output_type": "execute_result"
    618     }
    619    ],
    620    "source": [
    621     "scipy.stats.kurtosis(hfi)"
    622    ]
    623   },
    624   {
    625    "cell_type": "markdown",
    626    "metadata": {},
    627    "source": [
    628     "Note that these numbers are all lower by 3 from the number we have computed. That's because, as we said above, the expected kurtosis of a normally distributed series of numbers is 3, and `scipy.stats` is returning the _Excess Kurtosis_. We can see this by applying it on the random normal numbers we generated:"
    629    ]
    630   },
    631   {
    632    "cell_type": "code",
    633    "execution_count": 12,
    634    "metadata": {},
    635    "outputs": [
    636     {
    637      "data": {
    638       "text/plain": [
    639        "array([-0.04077066])"
    640       ]
    641      },
    642      "execution_count": 12,
    643      "metadata": {},
    644      "output_type": "execute_result"
    645     }
    646    ],
    647    "source": [
    648     "scipy.stats.kurtosis(normal_rets)"
    649    ]
    650   },
    651   {
    652    "cell_type": "code",
    653    "execution_count": 13,
    654    "metadata": {},
    655    "outputs": [
    656     {
    657      "data": {
    658       "text/plain": [
    659        "2.959229340906525"
    660       ]
    661      },
    662      "execution_count": 13,
    663      "metadata": {},
    664      "output_type": "execute_result"
    665     }
    666    ],
    667    "source": [
    668     "erk.kurtosis(normal_rets)"
    669    ]
    670   },
    671   {
    672    "cell_type": "markdown",
    673    "metadata": {},
    674    "source": [
    675     "## Running the Jarque-Bera Test for Normality\n",
    676     "\n",
    677     "The `scipy.stats` module contains a function that runs the _Jarque-Bera_ test on a sequence of numbers. Let's apply that to the normally generated returns:"
    678    ]
    679   },
    680   {
    681    "cell_type": "code",
    682    "execution_count": 14,
    683    "metadata": {},
    684    "outputs": [
    685     {
    686      "data": {
    687       "text/plain": [
    688        "(1.3679562754084507, 0.5046056036499179)"
    689       ]
    690      },
    691      "execution_count": 14,
    692      "metadata": {},
    693      "output_type": "execute_result"
    694     }
    695    ],
    696    "source": [
    697     "scipy.stats.jarque_bera(normal_rets)"
    698    ]
    699   },
    700   {
    701    "cell_type": "markdown",
    702    "metadata": {},
    703    "source": [
    704     "The first number is the test statistic and the second number is the one we want. It represents the p-value for the hypothesis test. If you want to run the test at a 1% level of significance, you want this number to be greater than 0.01 to accept the hypothesis that the data is normally distributed, and if that number is less than 0.01 then you must reject the hypothesis of normality.\n",
    705     "\n",
    706     "In this case, since we got a number higher than 0.01 we can accept the hypothesis that the numbers are random. Now, let's try this on our different hedge fund indices."
    707    ]
    708   },
    709   {
    710    "cell_type": "code",
    711    "execution_count": 15,
    712    "metadata": {},
    713    "outputs": [
    714     {
    715      "data": {
    716       "text/plain": [
    717        "(25656.585999171326, 0.0)"
    718       ]
    719      },
    720      "execution_count": 15,
    721      "metadata": {},
    722      "output_type": "execute_result"
    723     }
    724    ],
    725    "source": [
    726     "scipy.stats.jarque_bera(hfi)"
    727    ]
    728   },
    729   {
    730    "cell_type": "markdown",
    731    "metadata": {},
    732    "source": [
    733     "Why didn't we get the results for the individual indices? Because the implementation of the test isn't smart enough to realize that we want to treat each column as a separate set of returns. We can write out own wrapper for it to fix that, so let's start by writing a simple wrapper, and adding this code to our python file:\n",
    734     "\n",
    735     "```python\n",
    736     "import scipy.stats\n",
    737     "def is_normal(r, level=0.01):\n",
    738     "    \"\"\"\n",
    739     "    Applies the Jarque-Bera test to determine if a Series is normal or not\n",
    740     "    Test is applied at the 1% level by default\n",
    741     "    Returns True if the hypothesis of normality is accepted, False otherwise\n",
    742     "    \"\"\"\n",
    743     "    statistic, p_value = scipy.stats.jarque_bera(r)\n",
    744     "    return p_value > level\n",
    745     "```"
    746    ]
    747   },
    748   {
    749    "cell_type": "code",
    750    "execution_count": 16,
    751    "metadata": {},
    752    "outputs": [
    753     {
    754      "data": {
    755       "text/plain": [
    756        "True"
    757       ]
    758      },
    759      "execution_count": 16,
    760      "metadata": {},
    761      "output_type": "execute_result"
    762     }
    763    ],
    764    "source": [
    765     "erk.is_normal(normal_rets)"
    766    ]
    767   },
    768   {
    769    "cell_type": "markdown",
    770    "metadata": {},
    771    "source": [
    772     "There are a few different ways to handle the problem. The first is to use the `.aggregate` method on a dataframe, that takes a function as an argument and applies that function to each column:"
    773    ]
    774   },
    775   {
    776    "cell_type": "code",
    777    "execution_count": 17,
    778    "metadata": {},
    779    "outputs": [
    780     {
    781      "data": {
    782       "text/plain": [
    783        "Convertible Arbitrage     False\n",
    784        "CTA Global                 True\n",
    785        "Distressed Securities     False\n",
    786        "Emerging Markets          False\n",
    787        "Equity Market Neutral     False\n",
    788        "Event Driven              False\n",
    789        "Fixed Income Arbitrage    False\n",
    790        "Global Macro              False\n",
    791        "Long/Short Equity         False\n",
    792        "Merger Arbitrage          False\n",
    793        "Relative Value            False\n",
    794        "Short Selling             False\n",
    795        "Funds Of Funds            False\n",
    796        "dtype: bool"
    797       ]
    798      },
    799      "execution_count": 17,
    800      "metadata": {},
    801      "output_type": "execute_result"
    802     }
    803    ],
    804    "source": [
    805     "hfi.aggregate(erk.is_normal)"
    806    ]
    807   },
    808   {
    809    "cell_type": "markdown",
    810    "metadata": {},
    811    "source": [
    812     "However, we can fix this in our wrapper so that we have a uniform interface to test normality:\n",
    813     "\n",
    814     "```python\n",
    815     "import scipy.stats\n",
    816     "def is_normal(r, level=0.01):\n",
    817     "    \"\"\"\n",
    818     "    Applies the Jarque-Bera test to determine if a Series is normal or not\n",
    819     "    Test is applied at the 1% level by default\n",
    820     "    Returns True if the hypothesis of normality is accepted, False otherwise\n",
    821     "    \"\"\"\n",
    822     "    if isinstance(r, pd.DataFrame):\n",
    823     "        return r.aggregate(is_normal)\n",
    824     "    else:\n",
    825     "        statistic, p_value = scipy.stats.jarque_bera(r)\n",
    826     "        return p_value > level\n",
    827     "```\n"
    828    ]
    829   },
    830   {
    831    "cell_type": "code",
    832    "execution_count": 18,
    833    "metadata": {},
    834    "outputs": [
    835     {
    836      "data": {
    837       "text/plain": [
    838        "True"
    839       ]
    840      },
    841      "execution_count": 18,
    842      "metadata": {},
    843      "output_type": "execute_result"
    844     }
    845    ],
    846    "source": [
    847     "import pandas as pd\n",
    848     "isinstance(hfi, pd.DataFrame)"
    849    ]
    850   },
    851   {
    852    "cell_type": "code",
    853    "execution_count": 19,
    854    "metadata": {},
    855    "outputs": [
    856     {
    857      "data": {
    858       "text/plain": [
    859        "True"
    860       ]
    861      },
    862      "execution_count": 19,
    863      "metadata": {},
    864      "output_type": "execute_result"
    865     }
    866    ],
    867    "source": [
    868     "erk.is_normal(normal_rets)"
    869    ]
    870   },
    871   {
    872    "cell_type": "markdown",
    873    "metadata": {},
    874    "source": [
    875     "## Testing CRSP SmallCap and Large Cap returns for Normality\n",
    876     "\n",
    877     "Let's see whether any of the returns we've been studying so far pass the normality hypothesis."
    878    ]
    879   },
    880   {
    881    "cell_type": "code",
    882    "execution_count": 20,
    883    "metadata": {},
    884    "outputs": [
    885     {
    886      "data": {
    887       "text/plain": [
    888        "SmallCap    4.410739\n",
    889        "LargeCap    0.233445\n",
    890        "dtype: float64"
    891       ]
    892      },
    893      "execution_count": 20,
    894      "metadata": {},
    895      "output_type": "execute_result"
    896     }
    897    ],
    898    "source": [
    899     "ffme = erk.get_ffme_returns()\n",
    900     "erk.skewness(ffme)"
    901    ]
    902   },
    903   {
    904    "cell_type": "code",
    905    "execution_count": 21,
    906    "metadata": {},
    907    "outputs": [
    908     {
    909      "data": {
    910       "text/plain": [
    911        "SmallCap    46.845008\n",
    912        "LargeCap    10.694654\n",
    913        "dtype: float64"
    914       ]
    915      },
    916      "execution_count": 21,
    917      "metadata": {},
    918      "output_type": "execute_result"
    919     }
    920    ],
    921    "source": [
    922     "erk.kurtosis(ffme)"
    923    ]
    924   },
    925   {
    926    "cell_type": "code",
    927    "execution_count": 22,
    928    "metadata": {},
    929    "outputs": [
    930     {
    931      "data": {
    932       "text/plain": [
    933        "SmallCap    False\n",
    934        "LargeCap    False\n",
    935        "dtype: bool"
    936       ]
    937      },
    938      "execution_count": 22,
    939      "metadata": {},
    940      "output_type": "execute_result"
    941     }
    942    ],
    943    "source": [
    944     "erk.is_normal(ffme)"
    945    ]
    946   },
    947   {
    948    "cell_type": "code",
    949    "execution_count": null,
    950    "metadata": {},
    951    "outputs": [],
    952    "source": []
    953   }
    954  ],
    955  "metadata": {
    956   "kernelspec": {
    957    "display_name": "Python 3",
    958    "language": "python",
    959    "name": "python3"
    960   },
    961   "language_info": {
    962    "codemirror_mode": {
    963     "name": "ipython",
    964     "version": 3
    965    },
    966    "file_extension": ".py",
    967    "mimetype": "text/x-python",
    968    "name": "python",
    969    "nbconvert_exporter": "python",
    970    "pygments_lexer": "ipython3",
    971    "version": "3.8.8"
    972   }
    973  },
    974  "nbformat": 4,
    975  "nbformat_minor": 2
    976 }