ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_110.py

(9949B)
      1 import pandas as pd
      2 import numpy as np
      3 
      4 def get_ffme_returns():
      5     """
      6     Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
      7     """
      8     me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
      9                        header=0, index_col=0, na_values=-99.99)
     10     rets = me_m[['Lo 10', 'Hi 10']]
     11     rets.columns = ['SmallCap', 'LargeCap']
     12     rets = rets/100
     13     rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
     14     return rets
     15 
     16 
     17 def get_hfi_returns():
     18     """
     19     Load and format the EDHEC Hedge Fund Index Returns
     20     """
     21     hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
     22                       header=0, index_col=0, parse_dates=True)
     23     hfi = hfi/100
     24     hfi.index = hfi.index.to_period('M')
     25     return hfi
     26 
     27 def get_ind_returns():
     28     """
     29     Load and format the Ken French 30 Industry Portfolios Value Weighted Monthly Returns
     30     """
     31     ind = pd.read_csv("data/ind30_m_vw_rets.csv", header=0, index_col=0)/100
     32     ind.index = pd.to_datetime(ind.index, format="%Y%m").to_period('M')
     33     ind.columns = ind.columns.str.strip()
     34     return ind
     35 
     36 
     37 def skewness(r):
     38     """
     39     Alternative to scipy.stats.skew()
     40     Computes the skewness of the supplied Series or DataFrame
     41     Returns a float or a Series
     42     """
     43     demeaned_r = r - r.mean()
     44     # use the population standard deviation, so set dof=0
     45     sigma_r = r.std(ddof=0)
     46     exp = (demeaned_r**3).mean()
     47     return exp/sigma_r**3
     48 
     49 
     50 def kurtosis(r):
     51     """
     52     Alternative to scipy.stats.kurtosis()
     53     Computes the kurtosis of the supplied Series or DataFrame
     54     Returns a float or a Series
     55     """
     56     demeaned_r = r - r.mean()
     57     # use the population standard deviation, so set dof=0
     58     sigma_r = r.std(ddof=0)
     59     exp = (demeaned_r**4).mean()
     60     return exp/sigma_r**4
     61 
     62 
     63 def annualize_rets(r, periods_per_year):
     64     """
     65     Annualizes a set of returns
     66     We should infer the periods per year
     67     but that is currently left as an exercise
     68     to the reader :-)
     69     """
     70     compounded_growth = (1+r).prod()
     71     n_periods = r.shape[0]
     72     return compounded_growth**(periods_per_year/n_periods)-1
     73 
     74 
     75 def annualize_vol(r, periods_per_year):
     76     """
     77     Annualizes the vol of a set of returns
     78     We should infer the periods per year
     79     but that is currently left as an exercise
     80     to the reader :-)
     81     """
     82     return r.std()*(periods_per_year**0.5)
     83 
     84 
     85 def sharpe_ratio(r, riskfree_rate, periods_per_year):
     86     """
     87     Computes the annualized sharpe ratio of a set of returns
     88     """
     89     # convert the annual riskfree rate to per period
     90     rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
     91     excess_ret = r - rf_per_period
     92     ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
     93     ann_vol = annualize_vol(r, periods_per_year)
     94     return ann_ex_ret/ann_vol
     95 
     96 
     97 import scipy.stats
     98 def is_normal(r, level=0.01):
     99     """
    100     Applies the Jarque-Bera test to determine if a Series is normal or not
    101     Test is applied at the 1% level by default
    102     Returns True if the hypothesis of normality is accepted, False otherwise
    103     """
    104     if isinstance(r, pd.DataFrame):
    105         return r.aggregate(is_normal)
    106     else:
    107         statistic, p_value = scipy.stats.jarque_bera(r)
    108         return p_value > level
    109 
    110 
    111 def drawdown(return_series: pd.Series):
    112     """Takes a time series of asset returns.
    113        returns a DataFrame with columns for
    114        the wealth index, 
    115        the previous peaks, and 
    116        the percentage drawdown
    117     """
    118     wealth_index = 1000*(1+return_series).cumprod()
    119     previous_peaks = wealth_index.cummax()
    120     drawdowns = (wealth_index - previous_peaks)/previous_peaks
    121     return pd.DataFrame({"Wealth": wealth_index, 
    122                          "Previous Peak": previous_peaks, 
    123                          "Drawdown": drawdowns})
    124 
    125 
    126 def semideviation(r):
    127     """
    128     Returns the semideviation aka negative semideviation of r
    129     r must be a Series or a DataFrame, else raises a TypeError
    130     """
    131     if isinstance(r, pd.Series):
    132         is_negative = r < 0
    133         return r[is_negative].std(ddof=0)
    134     elif isinstance(r, pd.DataFrame):
    135         return r.aggregate(semideviation)
    136     else:
    137         raise TypeError("Expected r to be a Series or DataFrame")
    138 
    139 
    140 def var_historic(r, level=5):
    141     """
    142     Returns the historic Value at Risk at a specified level
    143     i.e. returns the number such that "level" percent of the returns
    144     fall below that number, and the (100-level) percent are above
    145     """
    146     if isinstance(r, pd.DataFrame):
    147         return r.aggregate(var_historic, level=level)
    148     elif isinstance(r, pd.Series):
    149         return -np.percentile(r, level)
    150     else:
    151         raise TypeError("Expected r to be a Series or DataFrame")
    152 
    153 
    154 def cvar_historic(r, level=5):
    155     """
    156     Computes the Conditional VaR of Series or DataFrame
    157     """
    158     if isinstance(r, pd.Series):
    159         is_beyond = r <= var_historic(r, level=level)
    160         return -r[is_beyond].mean()
    161     elif isinstance(r, pd.DataFrame):
    162         return r.aggregate(cvar_historic, level=level)
    163     else:
    164         raise TypeError("Expected r to be a Series or DataFrame")
    165 
    166 
    167 from scipy.stats import norm
    168 def var_gaussian(r, level=5, modified=False):
    169     """
    170     Returns the Parametric Gauusian VaR of a Series or DataFrame
    171     If "modified" is True, then the modified VaR is returned,
    172     using the Cornish-Fisher modification
    173     """
    174     # compute the Z score assuming it was Gaussian
    175     z = norm.ppf(level/100)
    176     if modified:
    177         # modify the Z score based on observed skewness and kurtosis
    178         s = skewness(r)
    179         k = kurtosis(r)
    180         z = (z +
    181                 (z**2 - 1)*s/6 +
    182                 (z**3 -3*z)*(k-3)/24 -
    183                 (2*z**3 - 5*z)*(s**2)/36
    184             )
    185     return -(r.mean() + z*r.std(ddof=0))
    186 
    187 
    188 def portfolio_return(weights, returns):
    189     """
    190     Computes the return on a portfolio from constituent returns and weights
    191     weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
    192     """
    193     return weights.T @ returns
    194 
    195 
    196 def portfolio_vol(weights, covmat):
    197     """
    198     Computes the vol of a portfolio from a covariance matrix and constituent weights
    199     weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
    200     """
    201     return (weights.T @ covmat @ weights)**0.5
    202 
    203 
    204 def plot_ef2(n_points, er, cov):
    205     """
    206     Plots the 2-asset efficient frontier
    207     """
    208     if er.shape[0] != 2 or er.shape[0] != 2:
    209         raise ValueError("plot_ef2 can only plot 2-asset frontiers")
    210     weights = [np.array([w, 1-w]) for w in np.linspace(0, 1, n_points)]
    211     rets = [portfolio_return(w, er) for w in weights]
    212     vols = [portfolio_vol(w, cov) for w in weights]
    213     ef = pd.DataFrame({
    214         "Returns": rets, 
    215         "Volatility": vols
    216     })
    217     return ef.plot.line(x="Volatility", y="Returns", style=".-")
    218 
    219 
    220 from scipy.optimize import minimize
    221 
    222 def minimize_vol(target_return, er, cov):
    223     """
    224     Returns the optimal weights that achieve the target return
    225     given a set of expected returns and a covariance matrix
    226     """
    227     n = er.shape[0]
    228     init_guess = np.repeat(1/n, n)
    229     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    230     # construct the constraints
    231     weights_sum_to_1 = {'type': 'eq',
    232                         'fun': lambda weights: np.sum(weights) - 1
    233     }
    234     return_is_target = {'type': 'eq',
    235                         'args': (er,),
    236                         'fun': lambda weights, er: target_return - portfolio_return(weights,er)
    237     }
    238     weights = minimize(portfolio_vol, init_guess,
    239                        args=(cov,), method='SLSQP',
    240                        options={'disp': False},
    241                        constraints=(weights_sum_to_1,return_is_target),
    242                        bounds=bounds)
    243     return weights.x
    244 
    245 
    246 def msr(riskfree_rate, er, cov):
    247     """
    248     Returns the weights of the portfolio that gives you the maximum sharpe ratio
    249     given the riskfree rate and expected returns and a covariance matrix
    250     """
    251     n = er.shape[0]
    252     init_guess = np.repeat(1/n, n)
    253     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    254     # construct the constraints
    255     weights_sum_to_1 = {'type': 'eq',
    256                         'fun': lambda weights: np.sum(weights) - 1
    257     }
    258     def neg_sharpe(weights, riskfree_rate, er, cov):
    259         """
    260         Returns the negative of the sharpe ratio
    261         of the given portfolio
    262         """
    263         r = portfolio_return(weights, er)
    264         vol = portfolio_vol(weights, cov)
    265         return -(r - riskfree_rate)/vol
    266     
    267     weights = minimize(neg_sharpe, init_guess,
    268                        args=(riskfree_rate, er, cov), method='SLSQP',
    269                        options={'disp': False},
    270                        constraints=(weights_sum_to_1,),
    271                        bounds=bounds)
    272     return weights.x
    273 
    274 
    275 def optimal_weights(n_points, er, cov):
    276     """
    277     Returns a list of weights that represent a grid of n_points on the efficient frontier
    278     """
    279     target_rs = np.linspace(er.min(), er.max(), n_points)
    280     weights = [minimize_vol(target_return, er, cov) for target_return in target_rs]
    281     return weights
    282 
    283 
    284 def plot_ef(n_points, er, cov, style='.-', legend=False, show_cml=False, riskfree_rate=0):
    285     """
    286     Plots the multi-asset efficient frontier
    287     """
    288     weights = optimal_weights(n_points, er, cov)
    289     rets = [portfolio_return(w, er) for w in weights]
    290     vols = [portfolio_vol(w, cov) for w in weights]
    291     ef = pd.DataFrame({
    292         "Returns": rets, 
    293         "Volatility": vols
    294     })
    295     ax = ef.plot.line(x="Volatility", y="Returns", style=style, legend=legend)
    296     if show_cml:
    297         ax.set_xlim(left = 0)
    298         # get MSR
    299         w_msr = msr(riskfree_rate, er, cov)
    300         r_msr = portfolio_return(w_msr, er)
    301         vol_msr = portfolio_vol(w_msr, cov)
    302         # add CML
    303         cml_x = [0, vol_msr]
    304         cml_y = [riskfree_rate, r_msr]
    305         ax.plot(cml_x, cml_y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=12)
    306     return ax