ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

lab_201.ipynb

(38958B)


      1 {
      2  "cells": [
      3   {
      4    "cell_type": "markdown",
      5    "metadata": {},
      6    "source": [
      7     "# Factor Analysis using the CAPM and Fama-French Factor models\n",
      8     "\n",
      9     "The main idea in Factor Analysis is to take a set of observed returns and decompose it into a set of explanatory returns.\n",
     10     "\n",
     11     "We'll follow _Asset Management_ (Ang 2014, Oxford University Press) Chapter 10 and analyze the returns of Berkshire Hathaway.\n",
     12     "\n",
     13     "First, we'll need the returns of Berkshire Hathaway which are contained in `data/brka_d_rets.csv`. Read it in as follows:"
     14    ]
     15   },
     16   {
     17    "cell_type": "code",
     18    "execution_count": 1,
     19    "metadata": {},
     20    "outputs": [
     21     {
     22      "data": {
     23       "text/html": [
     24        "<div>\n",
     25        "<style scoped>\n",
     26        "    .dataframe tbody tr th:only-of-type {\n",
     27        "        vertical-align: middle;\n",
     28        "    }\n",
     29        "\n",
     30        "    .dataframe tbody tr th {\n",
     31        "        vertical-align: top;\n",
     32        "    }\n",
     33        "\n",
     34        "    .dataframe thead th {\n",
     35        "        text-align: right;\n",
     36        "    }\n",
     37        "</style>\n",
     38        "<table border=\"1\" class=\"dataframe\">\n",
     39        "  <thead>\n",
     40        "    <tr style=\"text-align: right;\">\n",
     41        "      <th></th>\n",
     42        "      <th>BRKA</th>\n",
     43        "    </tr>\n",
     44        "    <tr>\n",
     45        "      <th>DATE</th>\n",
     46        "      <th></th>\n",
     47        "    </tr>\n",
     48        "  </thead>\n",
     49        "  <tbody>\n",
     50        "    <tr>\n",
     51        "      <th>1990-01-02</th>\n",
     52        "      <td>-0.005764</td>\n",
     53        "    </tr>\n",
     54        "    <tr>\n",
     55        "      <th>1990-01-03</th>\n",
     56        "      <td>0.000000</td>\n",
     57        "    </tr>\n",
     58        "    <tr>\n",
     59        "      <th>1990-01-04</th>\n",
     60        "      <td>0.005797</td>\n",
     61        "    </tr>\n",
     62        "    <tr>\n",
     63        "      <th>1990-01-05</th>\n",
     64        "      <td>-0.005764</td>\n",
     65        "    </tr>\n",
     66        "    <tr>\n",
     67        "      <th>1990-01-08</th>\n",
     68        "      <td>0.000000</td>\n",
     69        "    </tr>\n",
     70        "  </tbody>\n",
     71        "</table>\n",
     72        "</div>"
     73       ],
     74       "text/plain": [
     75        "                BRKA\n",
     76        "DATE                \n",
     77        "1990-01-02 -0.005764\n",
     78        "1990-01-03  0.000000\n",
     79        "1990-01-04  0.005797\n",
     80        "1990-01-05 -0.005764\n",
     81        "1990-01-08  0.000000"
     82       ]
     83      },
     84      "execution_count": 1,
     85      "metadata": {},
     86      "output_type": "execute_result"
     87     }
     88    ],
     89    "source": [
     90     "import pandas as pd\n",
     91     "\n",
     92     "brka_d = pd.read_csv(\"data/brka_d_ret.csv\", parse_dates=True, index_col=0)\n",
     93     "brka_d.head()"
     94    ]
     95   },
     96   {
     97    "cell_type": "code",
     98    "execution_count": 2,
     99    "metadata": {},
    100    "outputs": [
    101     {
    102      "data": {
    103       "text/html": [
    104        "<div>\n",
    105        "<style scoped>\n",
    106        "    .dataframe tbody tr th:only-of-type {\n",
    107        "        vertical-align: middle;\n",
    108        "    }\n",
    109        "\n",
    110        "    .dataframe tbody tr th {\n",
    111        "        vertical-align: top;\n",
    112        "    }\n",
    113        "\n",
    114        "    .dataframe thead th {\n",
    115        "        text-align: right;\n",
    116        "    }\n",
    117        "</style>\n",
    118        "<table border=\"1\" class=\"dataframe\">\n",
    119        "  <thead>\n",
    120        "    <tr style=\"text-align: right;\">\n",
    121        "      <th></th>\n",
    122        "      <th>BRKA</th>\n",
    123        "    </tr>\n",
    124        "    <tr>\n",
    125        "      <th>DATE</th>\n",
    126        "      <th></th>\n",
    127        "    </tr>\n",
    128        "  </thead>\n",
    129        "  <tbody>\n",
    130        "    <tr>\n",
    131        "      <th>2018-12-24</th>\n",
    132        "      <td>-0.018611</td>\n",
    133        "    </tr>\n",
    134        "    <tr>\n",
    135        "      <th>2018-12-26</th>\n",
    136        "      <td>0.043200</td>\n",
    137        "    </tr>\n",
    138        "    <tr>\n",
    139        "      <th>2018-12-27</th>\n",
    140        "      <td>0.012379</td>\n",
    141        "    </tr>\n",
    142        "    <tr>\n",
    143        "      <th>2018-12-28</th>\n",
    144        "      <td>0.013735</td>\n",
    145        "    </tr>\n",
    146        "    <tr>\n",
    147        "      <th>2018-12-31</th>\n",
    148        "      <td>0.011236</td>\n",
    149        "    </tr>\n",
    150        "  </tbody>\n",
    151        "</table>\n",
    152        "</div>"
    153       ],
    154       "text/plain": [
    155        "                BRKA\n",
    156        "DATE                \n",
    157        "2018-12-24 -0.018611\n",
    158        "2018-12-26  0.043200\n",
    159        "2018-12-27  0.012379\n",
    160        "2018-12-28  0.013735\n",
    161        "2018-12-31  0.011236"
    162       ]
    163      },
    164      "execution_count": 2,
    165      "metadata": {},
    166      "output_type": "execute_result"
    167     }
    168    ],
    169    "source": [
    170     "brka_d.tail()"
    171    ]
    172   },
    173   {
    174    "cell_type": "markdown",
    175    "metadata": {},
    176    "source": [
    177     "Next, we need to convert these to monthly returns. The simplest way to do so is by using the `.resample` method, which allows you to run an aggregation function on each group of returns in a time series. We'll give it the grouping rule of 'M' which means _monthly_ (consult the `pandas`) documentation for other codes)\n",
    178     "\n",
    179     "We want to compound the returns, and we already have the `compound` function in our toolkit, so let's load that up now, and then apply it to the daily returns."
    180    ]
    181   },
    182   {
    183    "cell_type": "code",
    184    "execution_count": 3,
    185    "metadata": {},
    186    "outputs": [
    187     {
    188      "data": {
    189       "text/html": [
    190        "<div>\n",
    191        "<style scoped>\n",
    192        "    .dataframe tbody tr th:only-of-type {\n",
    193        "        vertical-align: middle;\n",
    194        "    }\n",
    195        "\n",
    196        "    .dataframe tbody tr th {\n",
    197        "        vertical-align: top;\n",
    198        "    }\n",
    199        "\n",
    200        "    .dataframe thead th {\n",
    201        "        text-align: right;\n",
    202        "    }\n",
    203        "</style>\n",
    204        "<table border=\"1\" class=\"dataframe\">\n",
    205        "  <thead>\n",
    206        "    <tr style=\"text-align: right;\">\n",
    207        "      <th></th>\n",
    208        "      <th>BRKA</th>\n",
    209        "    </tr>\n",
    210        "    <tr>\n",
    211        "      <th>DATE</th>\n",
    212        "      <th></th>\n",
    213        "    </tr>\n",
    214        "  </thead>\n",
    215        "  <tbody>\n",
    216        "    <tr>\n",
    217        "      <th>1990-01</th>\n",
    218        "      <td>-0.140634</td>\n",
    219        "    </tr>\n",
    220        "    <tr>\n",
    221        "      <th>1990-02</th>\n",
    222        "      <td>-0.030852</td>\n",
    223        "    </tr>\n",
    224        "    <tr>\n",
    225        "      <th>1990-03</th>\n",
    226        "      <td>-0.069204</td>\n",
    227        "    </tr>\n",
    228        "    <tr>\n",
    229        "      <th>1990-04</th>\n",
    230        "      <td>-0.003717</td>\n",
    231        "    </tr>\n",
    232        "    <tr>\n",
    233        "      <th>1990-05</th>\n",
    234        "      <td>0.067164</td>\n",
    235        "    </tr>\n",
    236        "  </tbody>\n",
    237        "</table>\n",
    238        "</div>"
    239       ],
    240       "text/plain": [
    241        "             BRKA\n",
    242        "DATE             \n",
    243        "1990-01 -0.140634\n",
    244        "1990-02 -0.030852\n",
    245        "1990-03 -0.069204\n",
    246        "1990-04 -0.003717\n",
    247        "1990-05  0.067164"
    248       ]
    249      },
    250      "execution_count": 3,
    251      "metadata": {},
    252      "output_type": "execute_result"
    253     }
    254    ],
    255    "source": [
    256     "import edhec_risk_kit_201 as erk\n",
    257     "\n",
    258     "%load_ext autoreload\n",
    259     "%autoreload 2\n",
    260     "\n",
    261     "brka_m = brka_d.resample('M').apply(erk.compound).to_period('M')\n",
    262     "brka_m.head()"
    263    ]
    264   },
    265   {
    266    "cell_type": "code",
    267    "execution_count": 4,
    268    "metadata": {},
    269    "outputs": [],
    270    "source": [
    271     "brka_m.to_csv(\"brka_m.csv\") # for possible future use!"
    272    ]
    273   },
    274   {
    275    "cell_type": "markdown",
    276    "metadata": {},
    277    "source": [
    278     "Next, we need to load the explanatory variables, which is the Fama-French monthly returns data set. Load that as follows:"
    279    ]
    280   },
    281   {
    282    "cell_type": "code",
    283    "execution_count": 5,
    284    "metadata": {},
    285    "outputs": [
    286     {
    287      "data": {
    288       "text/html": [
    289        "<div>\n",
    290        "<style scoped>\n",
    291        "    .dataframe tbody tr th:only-of-type {\n",
    292        "        vertical-align: middle;\n",
    293        "    }\n",
    294        "\n",
    295        "    .dataframe tbody tr th {\n",
    296        "        vertical-align: top;\n",
    297        "    }\n",
    298        "\n",
    299        "    .dataframe thead th {\n",
    300        "        text-align: right;\n",
    301        "    }\n",
    302        "</style>\n",
    303        "<table border=\"1\" class=\"dataframe\">\n",
    304        "  <thead>\n",
    305        "    <tr style=\"text-align: right;\">\n",
    306        "      <th></th>\n",
    307        "      <th>Mkt-RF</th>\n",
    308        "      <th>SMB</th>\n",
    309        "      <th>HML</th>\n",
    310        "      <th>RF</th>\n",
    311        "    </tr>\n",
    312        "  </thead>\n",
    313        "  <tbody>\n",
    314        "    <tr>\n",
    315        "      <th>1926-07</th>\n",
    316        "      <td>0.0296</td>\n",
    317        "      <td>-0.0230</td>\n",
    318        "      <td>-0.0287</td>\n",
    319        "      <td>0.0022</td>\n",
    320        "    </tr>\n",
    321        "    <tr>\n",
    322        "      <th>1926-08</th>\n",
    323        "      <td>0.0264</td>\n",
    324        "      <td>-0.0140</td>\n",
    325        "      <td>0.0419</td>\n",
    326        "      <td>0.0025</td>\n",
    327        "    </tr>\n",
    328        "    <tr>\n",
    329        "      <th>1926-09</th>\n",
    330        "      <td>0.0036</td>\n",
    331        "      <td>-0.0132</td>\n",
    332        "      <td>0.0001</td>\n",
    333        "      <td>0.0023</td>\n",
    334        "    </tr>\n",
    335        "    <tr>\n",
    336        "      <th>1926-10</th>\n",
    337        "      <td>-0.0324</td>\n",
    338        "      <td>0.0004</td>\n",
    339        "      <td>0.0051</td>\n",
    340        "      <td>0.0032</td>\n",
    341        "    </tr>\n",
    342        "    <tr>\n",
    343        "      <th>1926-11</th>\n",
    344        "      <td>0.0253</td>\n",
    345        "      <td>-0.0020</td>\n",
    346        "      <td>-0.0035</td>\n",
    347        "      <td>0.0031</td>\n",
    348        "    </tr>\n",
    349        "  </tbody>\n",
    350        "</table>\n",
    351        "</div>"
    352       ],
    353       "text/plain": [
    354        "         Mkt-RF     SMB     HML      RF\n",
    355        "1926-07  0.0296 -0.0230 -0.0287  0.0022\n",
    356        "1926-08  0.0264 -0.0140  0.0419  0.0025\n",
    357        "1926-09  0.0036 -0.0132  0.0001  0.0023\n",
    358        "1926-10 -0.0324  0.0004  0.0051  0.0032\n",
    359        "1926-11  0.0253 -0.0020 -0.0035  0.0031"
    360       ]
    361      },
    362      "execution_count": 5,
    363      "metadata": {},
    364      "output_type": "execute_result"
    365     }
    366    ],
    367    "source": [
    368     "fff = erk.get_fff_returns()\n",
    369     "fff.head()"
    370    ]
    371   },
    372   {
    373    "cell_type": "markdown",
    374    "metadata": {},
    375    "source": [
    376     "Next, we need to decompose the observed BRKA 1990-May 2012 as in Ang(2014) into the portion that's due to the market and the rest that is not due to the market, using the CAPM as the explanatory model.\n",
    377     "\n",
    378     "i.e.\n",
    379     "\n",
    380     "$$ R_{brka,t} - R_{f,t} = \\alpha + \\beta(R_{mkt,t} - R_{f,t}) + \\epsilon_t $$\n",
    381     "\n",
    382     "We can use the `stats.api` for the linear regression as follows:"
    383    ]
    384   },
    385   {
    386    "cell_type": "code",
    387    "execution_count": 6,
    388    "metadata": {},
    389    "outputs": [],
    390    "source": [
    391     "import statsmodels.api as sm\n",
    392     "import numpy as np\n",
    393     "brka_excess = brka_m[\"1990\":\"2012-05\"] - fff.loc[\"1990\":\"2012-05\", ['RF']].values\n",
    394     "mkt_excess = fff.loc[\"1990\":\"2012-05\",['Mkt-RF']]\n",
    395     "exp_var = mkt_excess.copy()\n",
    396     "exp_var[\"Constant\"] = 1\n",
    397     "lm = sm.OLS(brka_excess, exp_var).fit()"
    398    ]
    399   },
    400   {
    401    "cell_type": "code",
    402    "execution_count": 7,
    403    "metadata": {},
    404    "outputs": [
    405     {
    406      "data": {
    407       "text/html": [
    408        "<table class=\"simpletable\">\n",
    409        "<caption>OLS Regression Results</caption>\n",
    410        "<tr>\n",
    411        "  <th>Dep. Variable:</th>          <td>BRKA</td>       <th>  R-squared:         </th> <td>   0.154</td>\n",
    412        "</tr>\n",
    413        "<tr>\n",
    414        "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.150</td>\n",
    415        "</tr>\n",
    416        "<tr>\n",
    417        "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   48.45</td>\n",
    418        "</tr>\n",
    419        "<tr>\n",
    420        "  <th>Date:</th>             <td>Tue, 13 Aug 2019</td> <th>  Prob (F-statistic):</th> <td>2.62e-11</td>\n",
    421        "</tr>\n",
    422        "<tr>\n",
    423        "  <th>Time:</th>                 <td>09:32:44</td>     <th>  Log-Likelihood:    </th> <td>  388.47</td>\n",
    424        "</tr>\n",
    425        "<tr>\n",
    426        "  <th>No. Observations:</th>      <td>   269</td>      <th>  AIC:               </th> <td>  -772.9</td>\n",
    427        "</tr>\n",
    428        "<tr>\n",
    429        "  <th>Df Residuals:</th>          <td>   267</td>      <th>  BIC:               </th> <td>  -765.7</td>\n",
    430        "</tr>\n",
    431        "<tr>\n",
    432        "  <th>Df Model:</th>              <td>     1</td>      <th>                     </th>     <td> </td>   \n",
    433        "</tr>\n",
    434        "<tr>\n",
    435        "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
    436        "</tr>\n",
    437        "</table>\n",
    438        "<table class=\"simpletable\">\n",
    439        "<tr>\n",
    440        "      <td></td>        <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
    441        "</tr>\n",
    442        "<tr>\n",
    443        "  <th>Mkt-RF</th>   <td>    0.5402</td> <td>    0.078</td> <td>    6.961</td> <td> 0.000</td> <td>    0.387</td> <td>    0.693</td>\n",
    444        "</tr>\n",
    445        "<tr>\n",
    446        "  <th>Constant</th> <td>    0.0061</td> <td>    0.004</td> <td>    1.744</td> <td> 0.082</td> <td>   -0.001</td> <td>    0.013</td>\n",
    447        "</tr>\n",
    448        "</table>\n",
    449        "<table class=\"simpletable\">\n",
    450        "<tr>\n",
    451        "  <th>Omnibus:</th>       <td>45.698</td> <th>  Durbin-Watson:     </th> <td>   2.079</td>\n",
    452        "</tr>\n",
    453        "<tr>\n",
    454        "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td> 102.573</td>\n",
    455        "</tr>\n",
    456        "<tr>\n",
    457        "  <th>Skew:</th>          <td> 0.825</td> <th>  Prob(JB):          </th> <td>5.33e-23</td>\n",
    458        "</tr>\n",
    459        "<tr>\n",
    460        "  <th>Kurtosis:</th>      <td> 5.535</td> <th>  Cond. No.          </th> <td>    22.2</td>\n",
    461        "</tr>\n",
    462        "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
    463       ],
    464       "text/plain": [
    465        "<class 'statsmodels.iolib.summary.Summary'>\n",
    466        "\"\"\"\n",
    467        "                            OLS Regression Results                            \n",
    468        "==============================================================================\n",
    469        "Dep. Variable:                   BRKA   R-squared:                       0.154\n",
    470        "Model:                            OLS   Adj. R-squared:                  0.150\n",
    471        "Method:                 Least Squares   F-statistic:                     48.45\n",
    472        "Date:                Tue, 13 Aug 2019   Prob (F-statistic):           2.62e-11\n",
    473        "Time:                        09:32:44   Log-Likelihood:                 388.47\n",
    474        "No. Observations:                 269   AIC:                            -772.9\n",
    475        "Df Residuals:                     267   BIC:                            -765.7\n",
    476        "Df Model:                           1                                         \n",
    477        "Covariance Type:            nonrobust                                         \n",
    478        "==============================================================================\n",
    479        "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
    480        "------------------------------------------------------------------------------\n",
    481        "Mkt-RF         0.5402      0.078      6.961      0.000       0.387       0.693\n",
    482        "Constant       0.0061      0.004      1.744      0.082      -0.001       0.013\n",
    483        "==============================================================================\n",
    484        "Omnibus:                       45.698   Durbin-Watson:                   2.079\n",
    485        "Prob(Omnibus):                  0.000   Jarque-Bera (JB):              102.573\n",
    486        "Skew:                           0.825   Prob(JB):                     5.33e-23\n",
    487        "Kurtosis:                       5.535   Cond. No.                         22.2\n",
    488        "==============================================================================\n",
    489        "\n",
    490        "Warnings:\n",
    491        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
    492        "\"\"\""
    493       ]
    494      },
    495      "execution_count": 7,
    496      "metadata": {},
    497      "output_type": "execute_result"
    498     }
    499    ],
    500    "source": [
    501     "lm.summary()"
    502    ]
    503   },
    504   {
    505    "cell_type": "markdown",
    506    "metadata": {},
    507    "source": [
    508     "### The CAPM benchmark interpretation\n",
    509     "\n",
    510     "This implies that the CAPM benchmark consists of 46 cents in T-Bills and 54 cents in the market. i.e. each dollar in the Berkshire Hathaway portfolio is equivalent to 46 cents in T-Bills and 54 cents in the market. Relative to this, the Berkshire Hathaway is adding (i.e. has $\\alpha$ of) 0.61% _(per month!)_ although the degree of statistica significance is not very high.\n",
    511     "\n",
    512     "Now, let's add in some additional explanatory variables, namely Value and Size."
    513    ]
    514   },
    515   {
    516    "cell_type": "code",
    517    "execution_count": 8,
    518    "metadata": {},
    519    "outputs": [
    520     {
    521      "data": {
    522       "text/html": [
    523        "<div>\n",
    524        "<style scoped>\n",
    525        "    .dataframe tbody tr th:only-of-type {\n",
    526        "        vertical-align: middle;\n",
    527        "    }\n",
    528        "\n",
    529        "    .dataframe tbody tr th {\n",
    530        "        vertical-align: top;\n",
    531        "    }\n",
    532        "\n",
    533        "    .dataframe thead th {\n",
    534        "        text-align: right;\n",
    535        "    }\n",
    536        "</style>\n",
    537        "<table border=\"1\" class=\"dataframe\">\n",
    538        "  <thead>\n",
    539        "    <tr style=\"text-align: right;\">\n",
    540        "      <th></th>\n",
    541        "      <th>Mkt-RF</th>\n",
    542        "      <th>Constant</th>\n",
    543        "      <th>Value</th>\n",
    544        "      <th>Size</th>\n",
    545        "    </tr>\n",
    546        "  </thead>\n",
    547        "  <tbody>\n",
    548        "    <tr>\n",
    549        "      <th>1990-01</th>\n",
    550        "      <td>-0.0785</td>\n",
    551        "      <td>1</td>\n",
    552        "      <td>0.0087</td>\n",
    553        "      <td>-0.0129</td>\n",
    554        "    </tr>\n",
    555        "    <tr>\n",
    556        "      <th>1990-02</th>\n",
    557        "      <td>0.0111</td>\n",
    558        "      <td>1</td>\n",
    559        "      <td>0.0061</td>\n",
    560        "      <td>0.0103</td>\n",
    561        "    </tr>\n",
    562        "    <tr>\n",
    563        "      <th>1990-03</th>\n",
    564        "      <td>0.0183</td>\n",
    565        "      <td>1</td>\n",
    566        "      <td>-0.0290</td>\n",
    567        "      <td>0.0152</td>\n",
    568        "    </tr>\n",
    569        "    <tr>\n",
    570        "      <th>1990-04</th>\n",
    571        "      <td>-0.0336</td>\n",
    572        "      <td>1</td>\n",
    573        "      <td>-0.0255</td>\n",
    574        "      <td>-0.0050</td>\n",
    575        "    </tr>\n",
    576        "    <tr>\n",
    577        "      <th>1990-05</th>\n",
    578        "      <td>0.0842</td>\n",
    579        "      <td>1</td>\n",
    580        "      <td>-0.0374</td>\n",
    581        "      <td>-0.0257</td>\n",
    582        "    </tr>\n",
    583        "  </tbody>\n",
    584        "</table>\n",
    585        "</div>"
    586       ],
    587       "text/plain": [
    588        "         Mkt-RF  Constant   Value    Size\n",
    589        "1990-01 -0.0785         1  0.0087 -0.0129\n",
    590        "1990-02  0.0111         1  0.0061  0.0103\n",
    591        "1990-03  0.0183         1 -0.0290  0.0152\n",
    592        "1990-04 -0.0336         1 -0.0255 -0.0050\n",
    593        "1990-05  0.0842         1 -0.0374 -0.0257"
    594       ]
    595      },
    596      "execution_count": 8,
    597      "metadata": {},
    598      "output_type": "execute_result"
    599     }
    600    ],
    601    "source": [
    602     "exp_var[\"Value\"] = fff.loc[\"1990\":\"2012-05\",['HML']]\n",
    603     "exp_var[\"Size\"] = fff.loc[\"1990\":\"2012-05\",['SMB']]\n",
    604     "exp_var.head()"
    605    ]
    606   },
    607   {
    608    "cell_type": "code",
    609    "execution_count": 9,
    610    "metadata": {},
    611    "outputs": [
    612     {
    613      "data": {
    614       "text/html": [
    615        "<table class=\"simpletable\">\n",
    616        "<caption>OLS Regression Results</caption>\n",
    617        "<tr>\n",
    618        "  <th>Dep. Variable:</th>          <td>BRKA</td>       <th>  R-squared:         </th> <td>   0.290</td>\n",
    619        "</tr>\n",
    620        "<tr>\n",
    621        "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.282</td>\n",
    622        "</tr>\n",
    623        "<tr>\n",
    624        "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   36.06</td>\n",
    625        "</tr>\n",
    626        "<tr>\n",
    627        "  <th>Date:</th>             <td>Tue, 13 Aug 2019</td> <th>  Prob (F-statistic):</th> <td>1.41e-19</td>\n",
    628        "</tr>\n",
    629        "<tr>\n",
    630        "  <th>Time:</th>                 <td>09:32:44</td>     <th>  Log-Likelihood:    </th> <td>  412.09</td>\n",
    631        "</tr>\n",
    632        "<tr>\n",
    633        "  <th>No. Observations:</th>      <td>   269</td>      <th>  AIC:               </th> <td>  -816.2</td>\n",
    634        "</tr>\n",
    635        "<tr>\n",
    636        "  <th>Df Residuals:</th>          <td>   265</td>      <th>  BIC:               </th> <td>  -801.8</td>\n",
    637        "</tr>\n",
    638        "<tr>\n",
    639        "  <th>Df Model:</th>              <td>     3</td>      <th>                     </th>     <td> </td>   \n",
    640        "</tr>\n",
    641        "<tr>\n",
    642        "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
    643        "</tr>\n",
    644        "</table>\n",
    645        "<table class=\"simpletable\">\n",
    646        "<tr>\n",
    647        "      <td></td>        <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
    648        "</tr>\n",
    649        "<tr>\n",
    650        "  <th>Mkt-RF</th>   <td>    0.6761</td> <td>    0.074</td> <td>    9.155</td> <td> 0.000</td> <td>    0.531</td> <td>    0.821</td>\n",
    651        "</tr>\n",
    652        "<tr>\n",
    653        "  <th>Constant</th> <td>    0.0055</td> <td>    0.003</td> <td>    1.679</td> <td> 0.094</td> <td>   -0.001</td> <td>    0.012</td>\n",
    654        "</tr>\n",
    655        "<tr>\n",
    656        "  <th>Value</th>    <td>    0.3814</td> <td>    0.109</td> <td>    3.508</td> <td> 0.001</td> <td>    0.167</td> <td>    0.595</td>\n",
    657        "</tr>\n",
    658        "<tr>\n",
    659        "  <th>Size</th>     <td>   -0.5023</td> <td>    0.101</td> <td>   -4.962</td> <td> 0.000</td> <td>   -0.702</td> <td>   -0.303</td>\n",
    660        "</tr>\n",
    661        "</table>\n",
    662        "<table class=\"simpletable\">\n",
    663        "<tr>\n",
    664        "  <th>Omnibus:</th>       <td>42.261</td> <th>  Durbin-Watson:     </th> <td>   2.146</td>\n",
    665        "</tr>\n",
    666        "<tr>\n",
    667        "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  67.954</td>\n",
    668        "</tr>\n",
    669        "<tr>\n",
    670        "  <th>Skew:</th>          <td> 0.904</td> <th>  Prob(JB):          </th> <td>1.75e-15</td>\n",
    671        "</tr>\n",
    672        "<tr>\n",
    673        "  <th>Kurtosis:</th>      <td> 4.671</td> <th>  Cond. No.          </th> <td>    37.2</td>\n",
    674        "</tr>\n",
    675        "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
    676       ],
    677       "text/plain": [
    678        "<class 'statsmodels.iolib.summary.Summary'>\n",
    679        "\"\"\"\n",
    680        "                            OLS Regression Results                            \n",
    681        "==============================================================================\n",
    682        "Dep. Variable:                   BRKA   R-squared:                       0.290\n",
    683        "Model:                            OLS   Adj. R-squared:                  0.282\n",
    684        "Method:                 Least Squares   F-statistic:                     36.06\n",
    685        "Date:                Tue, 13 Aug 2019   Prob (F-statistic):           1.41e-19\n",
    686        "Time:                        09:32:44   Log-Likelihood:                 412.09\n",
    687        "No. Observations:                 269   AIC:                            -816.2\n",
    688        "Df Residuals:                     265   BIC:                            -801.8\n",
    689        "Df Model:                           3                                         \n",
    690        "Covariance Type:            nonrobust                                         \n",
    691        "==============================================================================\n",
    692        "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
    693        "------------------------------------------------------------------------------\n",
    694        "Mkt-RF         0.6761      0.074      9.155      0.000       0.531       0.821\n",
    695        "Constant       0.0055      0.003      1.679      0.094      -0.001       0.012\n",
    696        "Value          0.3814      0.109      3.508      0.001       0.167       0.595\n",
    697        "Size          -0.5023      0.101     -4.962      0.000      -0.702      -0.303\n",
    698        "==============================================================================\n",
    699        "Omnibus:                       42.261   Durbin-Watson:                   2.146\n",
    700        "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               67.954\n",
    701        "Skew:                           0.904   Prob(JB):                     1.75e-15\n",
    702        "Kurtosis:                       4.671   Cond. No.                         37.2\n",
    703        "==============================================================================\n",
    704        "\n",
    705        "Warnings:\n",
    706        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
    707        "\"\"\""
    708       ]
    709      },
    710      "execution_count": 9,
    711      "metadata": {},
    712      "output_type": "execute_result"
    713     }
    714    ],
    715    "source": [
    716     "lm = sm.OLS(brka_excess, exp_var).fit()\n",
    717     "lm.summary()"
    718    ]
    719   },
    720   {
    721    "cell_type": "markdown",
    722    "metadata": {},
    723    "source": [
    724     "### The Fama-French Benchmark Interpretation\n",
    725     "\n",
    726     "The alpha has fallen from .61% to about 0.55% per month. The loading on the market has moved up from 0.54 to 0.67, which means that adding these new explanatory factors did change things. If we had added irrelevant variables, the loading on the market would be unaffected.\n",
    727     "\n",
    728     "We can interpret the loadings on Value being positive as saying that Hathaway has a significant Value tilt - which should not be a shock to anyone that follows Buffet. Additionally, the negative tilt on size suggests that Hathaway tends to invest in large companies, not small companies.\n",
    729     "\n",
    730     "In other words, Hathaway appears to be a Large Value investor. Of course, you knew this if you followed the company, but the point here is that numbers reveal it!\n",
    731     "\n",
    732     "The new way to interpret each dollar invested in Hathaway is: 67 cents in the market, 33 cents in Bills, 38 cents in Value stocks and short 38 cents in Growth stocks, short 50 cents in SmallCap stocks and long 50 cents in LargeCap stocks. If you did all this, you would still end up underperforming Hathaway by about 55 basis points per month.\n",
    733     "\n",
    734     "We can now add the following code to the toolkit:\n",
    735     "\n",
    736     "```python\n",
    737     "import statsmodels.api as sm\n",
    738     "def regress(dependent_variable, explanatory_variables, alpha=True):\n",
    739     "    \"\"\"\n",
    740     "    Runs a linear regression to decompose the dependent variable into the explanatory variables\n",
    741     "    returns an object of type statsmodel's RegressionResults on which you can call\n",
    742     "       .summary() to print a full summary\n",
    743     "       .params for the coefficients\n",
    744     "       .tvalues and .pvalues for the significance levels\n",
    745     "       .rsquared_adj and .rsquared for quality of fit\n",
    746     "    \"\"\"\n",
    747     "    if alpha:\n",
    748     "        explanatory_variables = explanatory_variables.copy()\n",
    749     "        explanatory_variables[\"Alpha\"] = 1\n",
    750     "    \n",
    751     "    lm = sm.OLS(dependent_variable, explanatory_variables).fit()\n",
    752     "    return lm\n",
    753     "```\n",
    754     "\n",
    755     "\n",
    756     "## Exercise to the Student\n",
    757     "\n",
    758     "I used this particular period because of the example in Ang (2014). However, I have provided data going up to 2018. Have the results held up? Are Buffet's tilts consistent over time?"
    759    ]
    760   },
    761   {
    762    "cell_type": "code",
    763    "execution_count": 10,
    764    "metadata": {},
    765    "outputs": [],
    766    "source": [
    767     "result = erk.regress(brka_excess, mkt_excess)"
    768    ]
    769   },
    770   {
    771    "cell_type": "code",
    772    "execution_count": 11,
    773    "metadata": {},
    774    "outputs": [
    775     {
    776      "data": {
    777       "text/plain": [
    778        "Mkt-RF    0.540175\n",
    779        "Alpha     0.006133\n",
    780        "dtype: float64"
    781       ]
    782      },
    783      "execution_count": 11,
    784      "metadata": {},
    785      "output_type": "execute_result"
    786     }
    787    ],
    788    "source": [
    789     "result.params"
    790    ]
    791   },
    792   {
    793    "cell_type": "code",
    794    "execution_count": 12,
    795    "metadata": {},
    796    "outputs": [
    797     {
    798      "data": {
    799       "text/plain": [
    800        "Mkt-RF    6.960550\n",
    801        "Alpha     1.744449\n",
    802        "dtype: float64"
    803       ]
    804      },
    805      "execution_count": 12,
    806      "metadata": {},
    807      "output_type": "execute_result"
    808     }
    809    ],
    810    "source": [
    811     "result.tvalues"
    812    ]
    813   },
    814   {
    815    "cell_type": "code",
    816    "execution_count": 13,
    817    "metadata": {},
    818    "outputs": [
    819     {
    820      "data": {
    821       "text/plain": [
    822        "Mkt-RF    2.622873e-11\n",
    823        "Alpha     8.223148e-02\n",
    824        "dtype: float64"
    825       ]
    826      },
    827      "execution_count": 13,
    828      "metadata": {},
    829      "output_type": "execute_result"
    830     }
    831    ],
    832    "source": [
    833     "result.pvalues"
    834    ]
    835   },
    836   {
    837    "cell_type": "code",
    838    "execution_count": 14,
    839    "metadata": {},
    840    "outputs": [
    841     {
    842      "data": {
    843       "text/plain": [
    844        "0.15041804337083986"
    845       ]
    846      },
    847      "execution_count": 14,
    848      "metadata": {},
    849      "output_type": "execute_result"
    850     }
    851    ],
    852    "source": [
    853     "result.rsquared_adj"
    854    ]
    855   },
    856   {
    857    "cell_type": "code",
    858    "execution_count": 15,
    859    "metadata": {},
    860    "outputs": [
    861     {
    862      "data": {
    863       "text/html": [
    864        "<div>\n",
    865        "<style scoped>\n",
    866        "    .dataframe tbody tr th:only-of-type {\n",
    867        "        vertical-align: middle;\n",
    868        "    }\n",
    869        "\n",
    870        "    .dataframe tbody tr th {\n",
    871        "        vertical-align: top;\n",
    872        "    }\n",
    873        "\n",
    874        "    .dataframe thead th {\n",
    875        "        text-align: right;\n",
    876        "    }\n",
    877        "</style>\n",
    878        "<table border=\"1\" class=\"dataframe\">\n",
    879        "  <thead>\n",
    880        "    <tr style=\"text-align: right;\">\n",
    881        "      <th></th>\n",
    882        "      <th>Mkt-RF</th>\n",
    883        "      <th>Constant</th>\n",
    884        "      <th>Value</th>\n",
    885        "      <th>Size</th>\n",
    886        "    </tr>\n",
    887        "  </thead>\n",
    888        "  <tbody>\n",
    889        "    <tr>\n",
    890        "      <th>1990-01</th>\n",
    891        "      <td>-0.0785</td>\n",
    892        "      <td>1</td>\n",
    893        "      <td>0.0087</td>\n",
    894        "      <td>-0.0129</td>\n",
    895        "    </tr>\n",
    896        "    <tr>\n",
    897        "      <th>1990-02</th>\n",
    898        "      <td>0.0111</td>\n",
    899        "      <td>1</td>\n",
    900        "      <td>0.0061</td>\n",
    901        "      <td>0.0103</td>\n",
    902        "    </tr>\n",
    903        "    <tr>\n",
    904        "      <th>1990-03</th>\n",
    905        "      <td>0.0183</td>\n",
    906        "      <td>1</td>\n",
    907        "      <td>-0.0290</td>\n",
    908        "      <td>0.0152</td>\n",
    909        "    </tr>\n",
    910        "    <tr>\n",
    911        "      <th>1990-04</th>\n",
    912        "      <td>-0.0336</td>\n",
    913        "      <td>1</td>\n",
    914        "      <td>-0.0255</td>\n",
    915        "      <td>-0.0050</td>\n",
    916        "    </tr>\n",
    917        "    <tr>\n",
    918        "      <th>1990-05</th>\n",
    919        "      <td>0.0842</td>\n",
    920        "      <td>1</td>\n",
    921        "      <td>-0.0374</td>\n",
    922        "      <td>-0.0257</td>\n",
    923        "    </tr>\n",
    924        "  </tbody>\n",
    925        "</table>\n",
    926        "</div>"
    927       ],
    928       "text/plain": [
    929        "         Mkt-RF  Constant   Value    Size\n",
    930        "1990-01 -0.0785         1  0.0087 -0.0129\n",
    931        "1990-02  0.0111         1  0.0061  0.0103\n",
    932        "1990-03  0.0183         1 -0.0290  0.0152\n",
    933        "1990-04 -0.0336         1 -0.0255 -0.0050\n",
    934        "1990-05  0.0842         1 -0.0374 -0.0257"
    935       ]
    936      },
    937      "execution_count": 15,
    938      "metadata": {},
    939      "output_type": "execute_result"
    940     }
    941    ],
    942    "source": [
    943     "exp_var.head()"
    944    ]
    945   },
    946   {
    947    "cell_type": "code",
    948    "execution_count": 16,
    949    "metadata": {},
    950    "outputs": [
    951     {
    952      "data": {
    953       "text/html": [
    954        "<table class=\"simpletable\">\n",
    955        "<caption>OLS Regression Results</caption>\n",
    956        "<tr>\n",
    957        "  <th>Dep. Variable:</th>          <td>BRKA</td>       <th>  R-squared:         </th> <td>   0.290</td>\n",
    958        "</tr>\n",
    959        "<tr>\n",
    960        "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.282</td>\n",
    961        "</tr>\n",
    962        "<tr>\n",
    963        "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   36.06</td>\n",
    964        "</tr>\n",
    965        "<tr>\n",
    966        "  <th>Date:</th>             <td>Tue, 13 Aug 2019</td> <th>  Prob (F-statistic):</th> <td>1.41e-19</td>\n",
    967        "</tr>\n",
    968        "<tr>\n",
    969        "  <th>Time:</th>                 <td>09:32:44</td>     <th>  Log-Likelihood:    </th> <td>  412.09</td>\n",
    970        "</tr>\n",
    971        "<tr>\n",
    972        "  <th>No. Observations:</th>      <td>   269</td>      <th>  AIC:               </th> <td>  -816.2</td>\n",
    973        "</tr>\n",
    974        "<tr>\n",
    975        "  <th>Df Residuals:</th>          <td>   265</td>      <th>  BIC:               </th> <td>  -801.8</td>\n",
    976        "</tr>\n",
    977        "<tr>\n",
    978        "  <th>Df Model:</th>              <td>     3</td>      <th>                     </th>     <td> </td>   \n",
    979        "</tr>\n",
    980        "<tr>\n",
    981        "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>   \n",
    982        "</tr>\n",
    983        "</table>\n",
    984        "<table class=\"simpletable\">\n",
    985        "<tr>\n",
    986        "      <td></td>        <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
    987        "</tr>\n",
    988        "<tr>\n",
    989        "  <th>Mkt-RF</th>   <td>    0.6761</td> <td>    0.074</td> <td>    9.155</td> <td> 0.000</td> <td>    0.531</td> <td>    0.821</td>\n",
    990        "</tr>\n",
    991        "<tr>\n",
    992        "  <th>Constant</th> <td>    0.0055</td> <td>    0.003</td> <td>    1.679</td> <td> 0.094</td> <td>   -0.001</td> <td>    0.012</td>\n",
    993        "</tr>\n",
    994        "<tr>\n",
    995        "  <th>Value</th>    <td>    0.3814</td> <td>    0.109</td> <td>    3.508</td> <td> 0.001</td> <td>    0.167</td> <td>    0.595</td>\n",
    996        "</tr>\n",
    997        "<tr>\n",
    998        "  <th>Size</th>     <td>   -0.5023</td> <td>    0.101</td> <td>   -4.962</td> <td> 0.000</td> <td>   -0.702</td> <td>   -0.303</td>\n",
    999        "</tr>\n",
   1000        "</table>\n",
   1001        "<table class=\"simpletable\">\n",
   1002        "<tr>\n",
   1003        "  <th>Omnibus:</th>       <td>42.261</td> <th>  Durbin-Watson:     </th> <td>   2.146</td>\n",
   1004        "</tr>\n",
   1005        "<tr>\n",
   1006        "  <th>Prob(Omnibus):</th> <td> 0.000</td> <th>  Jarque-Bera (JB):  </th> <td>  67.954</td>\n",
   1007        "</tr>\n",
   1008        "<tr>\n",
   1009        "  <th>Skew:</th>          <td> 0.904</td> <th>  Prob(JB):          </th> <td>1.75e-15</td>\n",
   1010        "</tr>\n",
   1011        "<tr>\n",
   1012        "  <th>Kurtosis:</th>      <td> 4.671</td> <th>  Cond. No.          </th> <td>    37.2</td>\n",
   1013        "</tr>\n",
   1014        "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
   1015       ],
   1016       "text/plain": [
   1017        "<class 'statsmodels.iolib.summary.Summary'>\n",
   1018        "\"\"\"\n",
   1019        "                            OLS Regression Results                            \n",
   1020        "==============================================================================\n",
   1021        "Dep. Variable:                   BRKA   R-squared:                       0.290\n",
   1022        "Model:                            OLS   Adj. R-squared:                  0.282\n",
   1023        "Method:                 Least Squares   F-statistic:                     36.06\n",
   1024        "Date:                Tue, 13 Aug 2019   Prob (F-statistic):           1.41e-19\n",
   1025        "Time:                        09:32:44   Log-Likelihood:                 412.09\n",
   1026        "No. Observations:                 269   AIC:                            -816.2\n",
   1027        "Df Residuals:                     265   BIC:                            -801.8\n",
   1028        "Df Model:                           3                                         \n",
   1029        "Covariance Type:            nonrobust                                         \n",
   1030        "==============================================================================\n",
   1031        "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
   1032        "------------------------------------------------------------------------------\n",
   1033        "Mkt-RF         0.6761      0.074      9.155      0.000       0.531       0.821\n",
   1034        "Constant       0.0055      0.003      1.679      0.094      -0.001       0.012\n",
   1035        "Value          0.3814      0.109      3.508      0.001       0.167       0.595\n",
   1036        "Size          -0.5023      0.101     -4.962      0.000      -0.702      -0.303\n",
   1037        "==============================================================================\n",
   1038        "Omnibus:                       42.261   Durbin-Watson:                   2.146\n",
   1039        "Prob(Omnibus):                  0.000   Jarque-Bera (JB):               67.954\n",
   1040        "Skew:                           0.904   Prob(JB):                     1.75e-15\n",
   1041        "Kurtosis:                       4.671   Cond. No.                         37.2\n",
   1042        "==============================================================================\n",
   1043        "\n",
   1044        "Warnings:\n",
   1045        "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
   1046        "\"\"\""
   1047       ]
   1048      },
   1049      "execution_count": 16,
   1050      "metadata": {},
   1051      "output_type": "execute_result"
   1052     }
   1053    ],
   1054    "source": [
   1055     "erk.regress(brka_excess, exp_var, alpha=False).summary()"
   1056    ]
   1057   },
   1058   {
   1059    "cell_type": "code",
   1060    "execution_count": null,
   1061    "metadata": {},
   1062    "outputs": [],
   1063    "source": []
   1064   },
   1065   {
   1066    "cell_type": "code",
   1067    "execution_count": null,
   1068    "metadata": {},
   1069    "outputs": [],
   1070    "source": []
   1071   }
   1072  ],
   1073  "metadata": {
   1074   "kernelspec": {
   1075    "display_name": "Python 3",
   1076    "language": "python",
   1077    "name": "python3"
   1078   },
   1079   "language_info": {
   1080    "codemirror_mode": {
   1081     "name": "ipython",
   1082     "version": 3
   1083    },
   1084    "file_extension": ".py",
   1085    "mimetype": "text/x-python",
   1086    "name": "python",
   1087    "nbconvert_exporter": "python",
   1088    "pygments_lexer": "ipython3",
   1089    "version": "3.8.8"
   1090   }
   1091  },
   1092  "nbformat": 4,
   1093  "nbformat_minor": 2
   1094 }