ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_118.py

(12094B)
      1 llimport pandas as pd
      2 import numpy as np
      3 
      4 def get_ffme_returns():
      5     """
      6     Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
      7     """
      8     me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
      9                        header=0, index_col=0, na_values=-99.99)
     10     rets = me_m[['Lo 10', 'Hi 10']]
     11     rets.columns = ['SmallCap', 'LargeCap']
     12     rets = rets/100
     13     rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
     14     return rets
     15 
     16 
     17 def get_hfi_returns():
     18     """
     19     Load and format the EDHEC Hedge Fund Index Returns
     20     """
     21     hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
     22                       header=0, index_col=0, parse_dates=True)
     23     hfi = hfi/100
     24     hfi.index = hfi.index.to_period('M')
     25     return hfi
     26 
     27 def get_ind_file(filetype):
     28     """
     29     Load and format the Ken French 30 Industry Portfolios files
     30     """
     31     known_types = ["returns", "nfirms", "size"]
     32     if filetype not in known_types:
     33         raise ValueError(f"filetype must be one of:{','.join(known_types)}")
     34     if filetype is "returns":
     35         name = "vw_rets"
     36         divisor = 100
     37     elif filetype is "nfirms":
     38         name = "nfirms"
     39         divisor = 1
     40     elif filetype is "size":
     41         name = "size"
     42         divisor = 1
     43                          
     44     ind = pd.read_csv(f"data/ind30_m_{name}.csv", header=0, index_col=0)/divisor
     45     ind.index = pd.to_datetime(ind.index, format="%Y%m").to_period('M')
     46     ind.columns = ind.columns.str.strip()
     47     return ind
     48 
     49 def get_ind_returns():
     50     """
     51     Load and format the Ken French 30 Industry Portfolios Value Weighted Monthly Returns
     52     """
     53     return get_ind_file("returns")
     54 
     55 def get_ind_nfirms():
     56     """
     57     Load and format the Ken French 30 Industry Portfolios Average number of Firms
     58     """
     59     return get_ind_file("nfirms")
     60 
     61 def get_ind_size():
     62     """
     63     Load and format the Ken French 30 Industry Portfolios Average size (market cap)
     64     """
     65     return get_ind_file("size")
     66 
     67                          
     68 def get_total_market_index_returns():
     69     """
     70     Load the 30 industry portfolio data and derive the returns of a capweighted total market index
     71     """
     72     ind_nfirms = get_ind_nfirms()
     73     ind_size = get_ind_size()
     74     ind_return = get_ind_returns()
     75     ind_mktcap = ind_nfirms * ind_size
     76     total_mktcap = ind_mktcap.sum(axis=1)
     77     ind_capweight = ind_mktcap.divide(total_mktcap, axis="rows")
     78     total_market_return = (ind_capweight * ind_return).sum(axis="columns")
     79     return total_market_return
     80                          
     81 def skewness(r):
     82     """
     83     Alternative to scipy.stats.skew()
     84     Computes the skewness of the supplied Series or DataFrame
     85     Returns a float or a Series
     86     """
     87     demeaned_r = r - r.mean()
     88     # use the population standard deviation, so set dof=0
     89     sigma_r = r.std(ddof=0)
     90     exp = (demeaned_r**3).mean()
     91     return exp/sigma_r**3
     92 
     93 
     94 def kurtosis(r):
     95     """
     96     Alternative to scipy.stats.kurtosis()
     97     Computes the kurtosis of the supplied Series or DataFrame
     98     Returns a float or a Series
     99     """
    100     demeaned_r = r - r.mean()
    101     # use the population standard deviation, so set dof=0
    102     sigma_r = r.std(ddof=0)
    103     exp = (demeaned_r**4).mean()
    104     return exp/sigma_r**4
    105 
    106 
    107 def annualize_rets(r, periods_per_year):
    108     """
    109     Annualizes a set of returns
    110     We should infer the periods per year
    111     but that is currently left as an exercise
    112     to the reader :-)
    113     """
    114     compounded_growth = (1+r).prod()
    115     n_periods = r.shape[0]
    116     return compounded_growth**(periods_per_year/n_periods)-1
    117 
    118 
    119 def annualize_vol(r, periods_per_year):
    120     """
    121     Annualizes the vol of a set of returns
    122     We should infer the periods per year
    123     but that is currently left as an exercise
    124     to the reader :-)
    125     """
    126     return r.std()*(periods_per_year**0.5)
    127 
    128 
    129 def sharpe_ratio(r, riskfree_rate, periods_per_year):
    130     """
    131     Computes the annualized sharpe ratio of a set of returns
    132     """
    133     # convert the annual riskfree rate to per period
    134     rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
    135     excess_ret = r - rf_per_period
    136     ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
    137     ann_vol = annualize_vol(r, periods_per_year)
    138     return ann_ex_ret/ann_vol
    139 
    140 
    141 import scipy.stats
    142 def is_normal(r, level=0.01):
    143     """
    144     Applies the Jarque-Bera test to determine if a Series is normal or not
    145     Test is applied at the 1% level by default
    146     Returns True if the hypothesis of normality is accepted, False otherwise
    147     """
    148     if isinstance(r, pd.DataFrame):
    149         return r.aggregate(is_normal)
    150     else:
    151         statistic, p_value = scipy.stats.jarque_bera(r)
    152         return p_value > level
    153 
    154 
    155 def drawdown(return_series: pd.Series):
    156     """Takes a time series of asset returns.
    157        returns a DataFrame with columns for
    158        the wealth index, 
    159        the previous peaks, and 
    160        the percentage drawdown
    161     """
    162     wealth_index = 1000*(1+return_series).cumprod()
    163     previous_peaks = wealth_index.cummax()
    164     drawdowns = (wealth_index - previous_peaks)/previous_peaks
    165     return pd.DataFrame({"Wealth": wealth_index, 
    166                          "Previous Peak": previous_peaks, 
    167                          "Drawdown": drawdowns})
    168 
    169 
    170 def semideviation(r):
    171     """
    172     Returns the semideviation aka negative semideviation of r
    173     r must be a Series or a DataFrame, else raises a TypeError
    174     """
    175     if isinstance(r, pd.Series):
    176         is_negative = r < 0
    177         return r[is_negative].std(ddof=0)
    178     elif isinstance(r, pd.DataFrame):
    179         return r.aggregate(semideviation)
    180     else:
    181         raise TypeError("Expected r to be a Series or DataFrame")
    182 
    183 
    184 def var_historic(r, level=5):
    185     """
    186     Returns the historic Value at Risk at a specified level
    187     i.e. returns the number such that "level" percent of the returns
    188     fall below that number, and the (100-level) percent are above
    189     """
    190     if isinstance(r, pd.DataFrame):
    191         return r.aggregate(var_historic, level=level)
    192     elif isinstance(r, pd.Series):
    193         return -np.percentile(r, level)
    194     else:
    195         raise TypeError("Expected r to be a Series or DataFrame")
    196 
    197 
    198 def cvar_historic(r, level=5):
    199     """
    200     Computes the Conditional VaR of Series or DataFrame
    201     """
    202     if isinstance(r, pd.Series):
    203         is_beyond = r <= -var_historic(r, level=level)
    204         return -r[is_beyond].mean()
    205     elif isinstance(r, pd.DataFrame):
    206         return r.aggregate(cvar_historic, level=level)
    207     else:
    208         raise TypeError("Expected r to be a Series or DataFrame")
    209 
    210 
    211 from scipy.stats import norm
    212 def var_gaussian(r, level=5, modified=False):
    213     """
    214     Returns the Parametric Gauusian VaR of a Series or DataFrame
    215     If "modified" is True, then the modified VaR is returned,
    216     using the Cornish-Fisher modification
    217     """
    218     # compute the Z score assuming it was Gaussian
    219     z = norm.ppf(level/100)
    220     if modified:
    221         # modify the Z score based on observed skewness and kurtosis
    222         s = skewness(r)
    223         k = kurtosis(r)
    224         z = (z +
    225                 (z**2 - 1)*s/6 +
    226                 (z**3 -3*z)*(k-3)/24 -
    227                 (2*z**3 - 5*z)*(s**2)/36
    228             )
    229     return -(r.mean() + z*r.std(ddof=0))
    230 
    231 
    232 def portfolio_return(weights, returns):
    233     """
    234     Computes the return on a portfolio from constituent returns and weights
    235     weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
    236     """
    237     return weights.T @ returns
    238 
    239 
    240 def portfolio_vol(weights, covmat):
    241     """
    242     Computes the vol of a portfolio from a covariance matrix and constituent weights
    243     weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
    244     """
    245     return (weights.T @ covmat @ weights)**0.5
    246 
    247 
    248 def plot_ef2(n_points, er, cov):
    249     """
    250     Plots the 2-asset efficient frontier
    251     """
    252     if er.shape[0] != 2 or er.shape[0] != 2:
    253         raise ValueError("plot_ef2 can only plot 2-asset frontiers")
    254     weights = [np.array([w, 1-w]) for w in np.linspace(0, 1, n_points)]
    255     rets = [portfolio_return(w, er) for w in weights]
    256     vols = [portfolio_vol(w, cov) for w in weights]
    257     ef = pd.DataFrame({
    258         "Returns": rets, 
    259         "Volatility": vols
    260     })
    261     return ef.plot.line(x="Volatility", y="Returns", style=".-")
    262 
    263 
    264 from scipy.optimize import minimize
    265 
    266 def minimize_vol(target_return, er, cov):
    267     """
    268     Returns the optimal weights that achieve the target return
    269     given a set of expected returns and a covariance matrix
    270     """
    271     n = er.shape[0]
    272     init_guess = np.repeat(1/n, n)
    273     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    274     # construct the constraints
    275     weights_sum_to_1 = {'type': 'eq',
    276                         'fun': lambda weights: np.sum(weights) - 1
    277     }
    278     return_is_target = {'type': 'eq',
    279                         'args': (er,),
    280                         'fun': lambda weights, er: target_return - portfolio_return(weights,er)
    281     }
    282     weights = minimize(portfolio_vol, init_guess,
    283                        args=(cov,), method='SLSQP',
    284                        options={'disp': False},
    285                        constraints=(weights_sum_to_1,return_is_target),
    286                        bounds=bounds)
    287     return weights.x
    288 
    289 
    290 def msr(riskfree_rate, er, cov):
    291     """
    292     Returns the weights of the portfolio that gives you the maximum sharpe ratio
    293     given the riskfree rate and expected returns and a covariance matrix
    294     """
    295     n = er.shape[0]
    296     init_guess = np.repeat(1/n, n)
    297     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    298     # construct the constraints
    299     weights_sum_to_1 = {'type': 'eq',
    300                         'fun': lambda weights: np.sum(weights) - 1
    301     }
    302     def neg_sharpe(weights, riskfree_rate, er, cov):
    303         """
    304         Returns the negative of the sharpe ratio
    305         of the given portfolio
    306         """
    307         r = portfolio_return(weights, er)
    308         vol = portfolio_vol(weights, cov)
    309         return -(r - riskfree_rate)/vol
    310     
    311     weights = minimize(neg_sharpe, init_guess,
    312                        args=(riskfree_rate, er, cov), method='SLSQP',
    313                        options={'disp': False},
    314                        constraints=(weights_sum_to_1,),
    315                        bounds=bounds)
    316     return weights.x
    317 
    318 
    319 def gmv(cov):
    320     """
    321     Returns the weights of the Global Minimum Volatility portfolio
    322     given a covariance matrix
    323     """
    324     n = cov.shape[0]
    325     return msr(0, np.repeat(1, n), cov)
    326 
    327 
    328 def optimal_weights(n_points, er, cov):
    329     """
    330     Returns a list of weights that represent a grid of n_points on the efficient frontier
    331     """
    332     target_rs = np.linspace(er.min(), er.max(), n_points)
    333     weights = [minimize_vol(target_return, er, cov) for target_return in target_rs]
    334     return weights
    335 
    336 
    337 def plot_ef(n_points, er, cov, style='.-', legend=False, show_cml=False, riskfree_rate=0, show_ew=False, show_gmv=False):
    338     """
    339     Plots the multi-asset efficient frontier
    340     """
    341     weights = optimal_weights(n_points, er, cov)
    342     rets = [portfolio_return(w, er) for w in weights]
    343     vols = [portfolio_vol(w, cov) for w in weights]
    344     ef = pd.DataFrame({
    345         "Returns": rets, 
    346         "Volatility": vols
    347     })
    348     ax = ef.plot.line(x="Volatility", y="Returns", style=style, legend=legend)
    349     if show_cml:
    350         ax.set_xlim(left = 0)
    351         # get MSR
    352         w_msr = msr(riskfree_rate, er, cov)
    353         r_msr = portfolio_return(w_msr, er)
    354         vol_msr = portfolio_vol(w_msr, cov)
    355         # add CML
    356         cml_x = [0, vol_msr]
    357         cml_y = [riskfree_rate, r_msr]
    358         ax.plot(cml_x, cml_y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=10)
    359     if show_ew:
    360         n = er.shape[0]
    361         w_ew = np.repeat(1/n, n)
    362         r_ew = portfolio_return(w_ew, er)
    363         vol_ew = portfolio_vol(w_ew, cov)
    364         # add EW
    365         ax.plot([vol_ew], [r_ew], color='goldenrod', marker='o', markersize=10)
    366     if show_gmv:
    367         w_gmv = gmv(cov)
    368         r_gmv = portfolio_return(w_gmv, er)
    369         vol_gmv = portfolio_vol(w_gmv, cov)
    370         # add EW
    371         ax.plot([vol_gmv], [r_gmv], color='midnightblue', marker='o', markersize=10)
    372         
    373         return ax