ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_206.py

(25414B)
      1 import pandas as pd
      2 import numpy as np
      3 
      4 def get_ffme_returns():
      5     """
      6     Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
      7     """
      8     me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
      9                        header=0, index_col=0, na_values=-99.99)
     10     rets = me_m[['Lo 10', 'Hi 10']]
     11     rets.columns = ['SmallCap', 'LargeCap']
     12     rets = rets/100
     13     rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
     14     return rets
     15 
     16 def get_fff_returns():
     17     """
     18     Load the Fama-French Research Factor Monthly Dataset
     19     """
     20     rets = pd.read_csv("data/F-F_Research_Data_Factors_m.csv",
     21                        header=0, index_col=0, na_values=-99.99)/100
     22     rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
     23     return rets
     24 
     25 
     26 def get_hfi_returns():
     27     """
     28     Load and format the EDHEC Hedge Fund Index Returns
     29     """
     30     hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
     31                       header=0, index_col=0, parse_dates=True)
     32     hfi = hfi/100
     33     hfi.index = hfi.index.to_period('M')
     34     return hfi
     35 
     36 def get_ind_file(filetype, weighting="vw", n_inds=30):
     37     """
     38     Load and format the Ken French Industry Portfolios files
     39     Variant is a tuple of (weighting, size) where:
     40         weighting is one of "ew", "vw"
     41         number of inds is 30 or 49
     42     """    
     43     if filetype is "returns":
     44         name = f"{weighting}_rets" 
     45         divisor = 100
     46     elif filetype is "nfirms":
     47         name = "nfirms"
     48         divisor = 1
     49     elif filetype is "size":
     50         name = "size"
     51         divisor = 1
     52     else:
     53         raise ValueError(f"filetype must be one of: returns, nfirms, size")
     54     
     55     ind = pd.read_csv(f"data/ind{n_inds}_m_{name}.csv", header=0, index_col=0, na_values=-99.99)/divisor
     56     ind.index = pd.to_datetime(ind.index, format="%Y%m").to_period('M')
     57     ind.columns = ind.columns.str.strip()
     58     return ind
     59 
     60 def get_ind_returns(weighting="vw", n_inds=30):
     61     """
     62     Load and format the Ken French Industry Portfolios Monthly Returns
     63     """
     64     return get_ind_file("returns", weighting=weighting, n_inds=n_inds)
     65 
     66 def get_ind_nfirms(n_inds=30):
     67     """
     68     Load and format the Ken French 30 Industry Portfolios Average number of Firms
     69     """
     70     return get_ind_file("nfirms", n_inds=n_inds)
     71 
     72 def get_ind_size(n_inds=30):
     73     """
     74     Load and format the Ken French 30 Industry Portfolios Average size (market cap)
     75     """
     76     return get_ind_file("size", n_inds=n_inds)
     77 
     78 
     79 def get_ind_market_caps(n_inds=30, weights=False):
     80     """
     81     Load the industry portfolio data and derive the market caps
     82     """
     83     ind_nfirms = get_ind_nfirms(n_inds=n_inds)
     84     ind_size = get_ind_size(n_inds=n_inds)
     85     ind_mktcap = ind_nfirms * ind_size
     86     if weights:
     87         total_mktcap = ind_mktcap.sum(axis=1)
     88         ind_capweight = ind_mktcap.divide(total_mktcap, axis="rows")
     89         return ind_capweight
     90     #else
     91     return ind_mktcap
     92 
     93 def get_total_market_index_returns(n_inds=30):
     94     """
     95     Load the 30 industry portfolio data and derive the returns of a capweighted total market index
     96     """
     97     ind_capweight = get_ind_market_caps(n_inds=n_inds)
     98     ind_return = get_ind_returns(weighting="vw", n_inds=n_inds)
     99     total_market_return = (ind_capweight * ind_return).sum(axis="columns")
    100     return total_market_return
    101                          
    102 def skewness(r):
    103     """
    104     Alternative to scipy.stats.skew()
    105     Computes the skewness of the supplied Series or DataFrame
    106     Returns a float or a Series
    107     """
    108     demeaned_r = r - r.mean()
    109     # use the population standard deviation, so set dof=0
    110     sigma_r = r.std(ddof=0)
    111     exp = (demeaned_r**3).mean()
    112     return exp/sigma_r**3
    113 
    114 
    115 def kurtosis(r):
    116     """
    117     Alternative to scipy.stats.kurtosis()
    118     Computes the kurtosis of the supplied Series or DataFrame
    119     Returns a float or a Series
    120     """
    121     demeaned_r = r - r.mean()
    122     # use the population standard deviation, so set dof=0
    123     sigma_r = r.std(ddof=0)
    124     exp = (demeaned_r**4).mean()
    125     return exp/sigma_r**4
    126 
    127 
    128 def compound(r):
    129     """
    130     returns the result of compounding the set of returns in r
    131     """
    132     return np.expm1(np.log1p(r).sum())
    133 
    134                          
    135 def annualize_rets(r, periods_per_year):
    136     """
    137     Annualizes a set of returns
    138     We should infer the periods per year
    139     but that is currently left as an exercise
    140     to the reader :-)
    141     """
    142     compounded_growth = (1+r).prod()
    143     n_periods = r.shape[0]
    144     return compounded_growth**(periods_per_year/n_periods)-1
    145 
    146 
    147 def annualize_vol(r, periods_per_year):
    148     """
    149     Annualizes the vol of a set of returns
    150     We should infer the periods per year
    151     but that is currently left as an exercise
    152     to the reader :-)
    153     """
    154     return r.std()*(periods_per_year**0.5)
    155 
    156 
    157 def sharpe_ratio(r, riskfree_rate, periods_per_year):
    158     """
    159     Computes the annualized sharpe ratio of a set of returns
    160     """
    161     # convert the annual riskfree rate to per period
    162     rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
    163     excess_ret = r - rf_per_period
    164     ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
    165     ann_vol = annualize_vol(r, periods_per_year)
    166     return ann_ex_ret/ann_vol
    167 
    168 
    169 import scipy.stats
    170 def is_normal(r, level=0.01):
    171     """
    172     Applies the Jarque-Bera test to determine if a Series is normal or not
    173     Test is applied at the 1% level by default
    174     Returns True if the hypothesis of normality is accepted, False otherwise
    175     """
    176     if isinstance(r, pd.DataFrame):
    177         return r.aggregate(is_normal)
    178     else:
    179         statistic, p_value = scipy.stats.jarque_bera(r)
    180         return p_value > level
    181 
    182 
    183 def drawdown(return_series: pd.Series):
    184     """Takes a time series of asset returns.
    185        returns a DataFrame with columns for
    186        the wealth index, 
    187        the previous peaks, and 
    188        the percentage drawdown
    189     """
    190     wealth_index = 1000*(1+return_series).cumprod()
    191     previous_peaks = wealth_index.cummax()
    192     drawdowns = (wealth_index - previous_peaks)/previous_peaks
    193     return pd.DataFrame({"Wealth": wealth_index, 
    194                          "Previous Peak": previous_peaks, 
    195                          "Drawdown": drawdowns})
    196 
    197 
    198 def semideviation(r):
    199     """
    200     Returns the semideviation aka negative semideviation of r
    201     r must be a Series or a DataFrame, else raises a TypeError
    202     """
    203     if isinstance(r, pd.Series):
    204         is_negative = r < 0
    205         return r[is_negative].std(ddof=0)
    206     elif isinstance(r, pd.DataFrame):
    207         return r.aggregate(semideviation)
    208     else:
    209         raise TypeError("Expected r to be a Series or DataFrame")
    210 
    211 
    212 def var_historic(r, level=5):
    213     """
    214     Returns the historic Value at Risk at a specified level
    215     i.e. returns the number such that "level" percent of the returns
    216     fall below that number, and the (100-level) percent are above
    217     """
    218     if isinstance(r, pd.DataFrame):
    219         return r.aggregate(var_historic, level=level)
    220     elif isinstance(r, pd.Series):
    221         return -np.percentile(r, level)
    222     else:
    223         raise TypeError("Expected r to be a Series or DataFrame")
    224 
    225 
    226 def cvar_historic(r, level=5):
    227     """
    228     Computes the Conditional VaR of Series or DataFrame
    229     """
    230     if isinstance(r, pd.Series):
    231         is_beyond = r <= -var_historic(r, level=level)
    232         return -r[is_beyond].mean()
    233     elif isinstance(r, pd.DataFrame):
    234         return r.aggregate(cvar_historic, level=level)
    235     else:
    236         raise TypeError("Expected r to be a Series or DataFrame")
    237 
    238 
    239 from scipy.stats import norm
    240 def var_gaussian(r, level=5, modified=False):
    241     """
    242     Returns the Parametric Gauusian VaR of a Series or DataFrame
    243     If "modified" is True, then the modified VaR is returned,
    244     using the Cornish-Fisher modification
    245     """
    246     # compute the Z score assuming it was Gaussian
    247     z = norm.ppf(level/100)
    248     if modified:
    249         # modify the Z score based on observed skewness and kurtosis
    250         s = skewness(r)
    251         k = kurtosis(r)
    252         z = (z +
    253                 (z**2 - 1)*s/6 +
    254                 (z**3 -3*z)*(k-3)/24 -
    255                 (2*z**3 - 5*z)*(s**2)/36
    256             )
    257     return -(r.mean() + z*r.std(ddof=0))
    258 
    259 
    260 def portfolio_return(weights, returns):
    261     """
    262     Computes the return on a portfolio from constituent returns and weights
    263     weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
    264     """
    265     return weights.T @ returns
    266 
    267 
    268 def portfolio_vol(weights, covmat):
    269     """
    270     Computes the vol of a portfolio from a covariance matrix and constituent weights
    271     weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
    272     """
    273     vol = (weights.T @ covmat @ weights)**0.5
    274     return vol 
    275 
    276 
    277 def plot_ef2(n_points, er, cov):
    278     """
    279     Plots the 2-asset efficient frontier
    280     """
    281     if er.shape[0] != 2 or er.shape[0] != 2:
    282         raise ValueError("plot_ef2 can only plot 2-asset frontiers")
    283     weights = [np.array([w, 1-w]) for w in np.linspace(0, 1, n_points)]
    284     rets = [portfolio_return(w, er) for w in weights]
    285     vols = [portfolio_vol(w, cov) for w in weights]
    286     ef = pd.DataFrame({
    287         "Returns": rets, 
    288         "Volatility": vols
    289     })
    290     return ef.plot.line(x="Volatility", y="Returns", style=".-")
    291 
    292 
    293 from scipy.optimize import minimize
    294 
    295 def minimize_vol(target_return, er, cov):
    296     """
    297     Returns the optimal weights that achieve the target return
    298     given a set of expected returns and a covariance matrix
    299     """
    300     n = er.shape[0]
    301     init_guess = np.repeat(1/n, n)
    302     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    303     # construct the constraints
    304     weights_sum_to_1 = {'type': 'eq',
    305                         'fun': lambda weights: np.sum(weights) - 1
    306     }
    307     return_is_target = {'type': 'eq',
    308                         'args': (er,),
    309                         'fun': lambda weights, er: target_return - portfolio_return(weights,er)
    310     }
    311     weights = minimize(portfolio_vol, init_guess,
    312                        args=(cov,), method='SLSQP',
    313                        options={'disp': False},
    314                        constraints=(weights_sum_to_1,return_is_target),
    315                        bounds=bounds)
    316     return weights.x
    317 
    318 
    319 def tracking_error(r_a, r_b):
    320     """
    321     Returns the Tracking Error between the two return series
    322     """
    323     return np.sqrt(((r_a - r_b)**2).sum())
    324 
    325                          
    326 def msr(riskfree_rate, er, cov):
    327     """
    328     Returns the weights of the portfolio that gives you the maximum sharpe ratio
    329     given the riskfree rate and expected returns and a covariance matrix
    330     """
    331     n = er.shape[0]
    332     init_guess = np.repeat(1/n, n)
    333     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    334     # construct the constraints
    335     weights_sum_to_1 = {'type': 'eq',
    336                         'fun': lambda weights: np.sum(weights) - 1
    337     }
    338     def neg_sharpe(weights, riskfree_rate, er, cov):
    339         """
    340         Returns the negative of the sharpe ratio
    341         of the given portfolio
    342         """
    343         r = portfolio_return(weights, er)
    344         vol = portfolio_vol(weights, cov)
    345         return -(r - riskfree_rate)/vol
    346     
    347     weights = minimize(neg_sharpe, init_guess,
    348                        args=(riskfree_rate, er, cov), method='SLSQP',
    349                        options={'disp': False},
    350                        constraints=(weights_sum_to_1,),
    351                        bounds=bounds)
    352     return weights.x
    353 
    354 
    355 def gmv(cov):
    356     """
    357     Returns the weights of the Global Minimum Volatility portfolio
    358     given a covariance matrix
    359     """
    360     n = cov.shape[0]
    361     return msr(0, np.repeat(1, n), cov)
    362 
    363 
    364 def optimal_weights(n_points, er, cov):
    365     """
    366     Returns a list of weights that represent a grid of n_points on the efficient frontier
    367     """
    368     target_rs = np.linspace(er.min(), er.max(), n_points)
    369     weights = [minimize_vol(target_return, er, cov) for target_return in target_rs]
    370     return weights
    371 
    372 
    373 def plot_ef(n_points, er, cov, style='.-', legend=False, show_cml=False, riskfree_rate=0, show_ew=False, show_gmv=False):
    374     """
    375     Plots the multi-asset efficient frontier
    376     """
    377     weights = optimal_weights(n_points, er, cov)
    378     rets = [portfolio_return(w, er) for w in weights]
    379     vols = [portfolio_vol(w, cov) for w in weights]
    380     ef = pd.DataFrame({
    381         "Returns": rets, 
    382         "Volatility": vols
    383     })
    384     ax = ef.plot.line(x="Volatility", y="Returns", style=style, legend=legend)
    385     if show_cml:
    386         ax.set_xlim(left = 0)
    387         # get MSR
    388         w_msr = msr(riskfree_rate, er, cov)
    389         r_msr = portfolio_return(w_msr, er)
    390         vol_msr = portfolio_vol(w_msr, cov)
    391         # add CML
    392         cml_x = [0, vol_msr]
    393         cml_y = [riskfree_rate, r_msr]
    394         ax.plot(cml_x, cml_y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=10)
    395     if show_ew:
    396         n = er.shape[0]
    397         w_ew = np.repeat(1/n, n)
    398         r_ew = portfolio_return(w_ew, er)
    399         vol_ew = portfolio_vol(w_ew, cov)
    400         # add EW
    401         ax.plot([vol_ew], [r_ew], color='goldenrod', marker='o', markersize=10)
    402     if show_gmv:
    403         w_gmv = gmv(cov)
    404         r_gmv = portfolio_return(w_gmv, er)
    405         vol_gmv = portfolio_vol(w_gmv, cov)
    406         # add EW
    407         ax.plot([vol_gmv], [r_gmv], color='midnightblue', marker='o', markersize=10)
    408         
    409         return ax
    410 
    411                          
    412 def run_cppi(risky_r, safe_r=None, m=3, start=1000, floor=0.8, riskfree_rate=0.03, drawdown=None):
    413     """
    414     Run a backtest of the CPPI strategy, given a set of returns for the risky asset
    415     Returns a dictionary containing: Asset Value History, Risk Budget History, Risky Weight History
    416     """
    417     # set up the CPPI parameters
    418     dates = risky_r.index
    419     n_steps = len(dates)
    420     account_value = start
    421     floor_value = start*floor
    422     peak = account_value
    423     if isinstance(risky_r, pd.Series): 
    424         risky_r = pd.DataFrame(risky_r, columns=["R"])
    425 
    426     if safe_r is None:
    427         safe_r = pd.DataFrame().reindex_like(risky_r)
    428         safe_r.values[:] = riskfree_rate/12 # fast way to set all values to a number
    429     # set up some DataFrames for saving intermediate values
    430     account_history = pd.DataFrame().reindex_like(risky_r)
    431     risky_w_history = pd.DataFrame().reindex_like(risky_r)
    432     cushion_history = pd.DataFrame().reindex_like(risky_r)
    433     floorval_history = pd.DataFrame().reindex_like(risky_r)
    434     peak_history = pd.DataFrame().reindex_like(risky_r)
    435 
    436     for step in range(n_steps):
    437         if drawdown is not None:
    438             peak = np.maximum(peak, account_value)
    439             floor_value = peak*(1-drawdown)
    440         cushion = (account_value - floor_value)/account_value
    441         risky_w = m*cushion
    442         risky_w = np.minimum(risky_w, 1)
    443         risky_w = np.maximum(risky_w, 0)
    444         safe_w = 1-risky_w
    445         risky_alloc = account_value*risky_w
    446         safe_alloc = account_value*safe_w
    447         # recompute the new account value at the end of this step
    448         account_value = risky_alloc*(1+risky_r.iloc[step]) + safe_alloc*(1+safe_r.iloc[step])
    449         # save the histories for analysis and plotting
    450         cushion_history.iloc[step] = cushion
    451         risky_w_history.iloc[step] = risky_w
    452         account_history.iloc[step] = account_value
    453         floorval_history.iloc[step] = floor_value
    454         peak_history.iloc[step] = peak
    455     risky_wealth = start*(1+risky_r).cumprod()
    456     backtest_result = {
    457         "Wealth": account_history,
    458         "Risky Wealth": risky_wealth, 
    459         "Risk Budget": cushion_history,
    460         "Risky Allocation": risky_w_history,
    461         "m": m,
    462         "start": start,
    463         "floor": floor,
    464         "risky_r":risky_r,
    465         "safe_r": safe_r,
    466         "drawdown": drawdown,
    467         "peak": peak_history,
    468         "floor": floorval_history
    469     }
    470     return backtest_result
    471 
    472 
    473 def summary_stats(r, riskfree_rate=0.03):
    474     """
    475     Return a DataFrame that contains aggregated summary stats for the returns in the columns of r
    476     """
    477     ann_r = r.aggregate(annualize_rets, periods_per_year=12)
    478     ann_vol = r.aggregate(annualize_vol, periods_per_year=12)
    479     ann_sr = r.aggregate(sharpe_ratio, riskfree_rate=riskfree_rate, periods_per_year=12)
    480     dd = r.aggregate(lambda r: drawdown(r).Drawdown.min())
    481     skew = r.aggregate(skewness)
    482     kurt = r.aggregate(kurtosis)
    483     cf_var5 = r.aggregate(var_gaussian, modified=True)
    484     hist_cvar5 = r.aggregate(cvar_historic)
    485     return pd.DataFrame({
    486         "Annualized Return": ann_r,
    487         "Annualized Vol": ann_vol,
    488         "Skewness": skew,
    489         "Kurtosis": kurt,
    490         "Cornish-Fisher VaR (5%)": cf_var5,
    491         "Historic CVaR (5%)": hist_cvar5,
    492         "Sharpe Ratio": ann_sr,
    493         "Max Drawdown": dd
    494     })
    495 
    496                          
    497 def gbm(n_years = 10, n_scenarios=1000, mu=0.07, sigma=0.15, steps_per_year=12, s_0=100.0, prices=True):
    498     """
    499     Evolution of Geometric Brownian Motion trajectories, such as for Stock Prices through Monte Carlo
    500     :param n_years:  The number of years to generate data for
    501     :param n_paths: The number of scenarios/trajectories
    502     :param mu: Annualized Drift, e.g. Market Return
    503     :param sigma: Annualized Volatility
    504     :param steps_per_year: granularity of the simulation
    505     :param s_0: initial value
    506     :return: a numpy array of n_paths columns and n_years*steps_per_year rows
    507     """
    508     # Derive per-step Model Parameters from User Specifications
    509     dt = 1/steps_per_year
    510     n_steps = int(n_years*steps_per_year) + 1
    511     # the standard way ...
    512     # rets_plus_1 = np.random.normal(loc=mu*dt+1, scale=sigma*np.sqrt(dt), size=(n_steps, n_scenarios))
    513     # without discretization error ...
    514     rets_plus_1 = np.random.normal(loc=(1+mu)**dt, scale=(sigma*np.sqrt(dt)), size=(n_steps, n_scenarios))
    515     rets_plus_1[0] = 1
    516     ret_val = s_0*pd.DataFrame(rets_plus_1).cumprod() if prices else rets_plus_1-1
    517     return ret_val
    518 
    519                          
    520 import statsmodels.api as sm
    521 def regress(dependent_variable, explanatory_variables, alpha=True):
    522     """
    523     Runs a linear regression to decompose the dependent variable into the explanatory variables
    524     returns an object of type statsmodel's RegressionResults on which you can call
    525        .summary() to print a full summary
    526        .params for the coefficients
    527        .tvalues and .pvalues for the significance levels
    528        .rsquared_adj and .rsquared for quality of fit
    529     """
    530     if alpha:
    531         explanatory_variables = explanatory_variables.copy()
    532         explanatory_variables["Alpha"] = 1
    533     
    534     lm = sm.OLS(dependent_variable, explanatory_variables).fit()
    535     return lm
    536 
    537 def portfolio_tracking_error(weights, ref_r, bb_r):
    538     """
    539     returns the tracking error between the reference returns
    540     and a portfolio of building block returns held with given weights
    541     """
    542     return tracking_error(ref_r, (weights*bb_r).sum(axis=1))
    543                          
    544 def style_analysis(dependent_variable, explanatory_variables):
    545     """
    546     Returns the optimal weights that minimizes the Tracking error between
    547     a portfolio of the explanatory variables and the dependent variable
    548     """
    549     n = explanatory_variables.shape[1]
    550     init_guess = np.repeat(1/n, n)
    551     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    552     # construct the constraints
    553     weights_sum_to_1 = {'type': 'eq',
    554                         'fun': lambda weights: np.sum(weights) - 1
    555     }
    556     solution = minimize(portfolio_tracking_error, init_guess,
    557                        args=(dependent_variable, explanatory_variables,), method='SLSQP',
    558                        options={'disp': False},
    559                        constraints=(weights_sum_to_1,),
    560                        bounds=bounds)
    561     weights = pd.Series(solution.x, index=explanatory_variables.columns)
    562     return weights
    563 
    564 
    565 def ff_analysis(r, factors):
    566     """
    567     Returns the loadings  of r on the Fama French Factors
    568     which can be read in using get_fff_returns()
    569     the index of r must be a (not necessarily proper) subset of the index of factors
    570     r is either a Series or a DataFrame
    571     """
    572     if isinstance(r, pd.Series):
    573         dependent_variable = r
    574         explanatory_variables = factors.loc[r.index]
    575         tilts = regress(dependent_variable, explanatory_variables).params
    576     elif isinstance(r, pd.DataFrame):
    577         tilts = pd.DataFrame({col: ff_analysis(r[col], factors) for col in r.columns})
    578     else:
    579         raise TypeError("r must be a Series or a DataFrame")
    580     return tilts
    581 
    582 def weight_ew(r, cap_weights=None, max_cw_mult=None, microcap_threshold=None, **kwargs):
    583     """
    584     Returns the weights of the EW portfolio based on the asset returns "r" as a DataFrame
    585     If supplied a set of capweights and a capweight tether, it is applied and reweighted 
    586     """
    587     n = len(r.columns)
    588     ew = pd.Series(1/n, index=r.columns)
    589     if cap_weights is not None:
    590         cw = cap_weights.loc[r.index[0]] # starting cap weight
    591         ## exclude microcaps
    592         if microcap_threshold is not None and microcap_threshold > 0:
    593             microcap = cw < microcap_threshold
    594             ew[microcap] = 0
    595             ew = ew/ew.sum()
    596         #limit weight to a multiple of capweight
    597         if max_cw_mult is not None and max_cw_mult > 0:
    598             ew = np.minimum(ew, cw*max_cw_mult)
    599             ew = ew/ew.sum() #reweight
    600     return ew
    601 
    602 def weight_cw(r, cap_weights, **kwargs):
    603     """
    604     Returns the weights of the CW portfolio based on the time series of capweights
    605     """
    606     w = cap_weights.loc[r.index[1]]
    607     return w/w.sum()
    608 
    609 def backtest_ws(r, estimation_window=60, weighting=weight_ew, verbose=False, **kwargs):
    610     """
    611     Backtests a given weighting scheme, given some parameters:
    612     r : asset returns to use to build the portfolio
    613     estimation_window: the window to use to estimate parameters
    614     weighting: the weighting scheme to use, must be a function that takes "r", and a variable number of keyword-value arguments
    615     """
    616     n_periods = r.shape[0]
    617     # return windows
    618     windows = [(start, start+estimation_window) for start in range(n_periods-estimation_window)]
    619     weights = [weighting(r.iloc[win[0]:win[1]], **kwargs) for win in windows]
    620     # convert List of weights to DataFrame
    621     weights = pd.DataFrame(weights, index=r.iloc[estimation_window:].index, columns=r.columns)
    622     returns = (weights * r).sum(axis="columns",  min_count=1) #mincount is to generate NAs if all inputs are NAs
    623     return returns
    624 
    625 def sample_cov(r, **kwargs):
    626     """
    627     Returns the sample covariance of the supplied returns
    628     """
    629     return r.cov()
    630 
    631 def weight_gmv(r, cov_estimator=sample_cov, **kwargs):
    632     """
    633     Produces the weights of the GMV portfolio given a covariance matrix of the returns 
    634     """
    635     est_cov = cov_estimator(r, **kwargs)
    636     return gmv(est_cov)
    637 
    638 def cc_cov(r, **kwargs):
    639     """
    640     Estimates a covariance matrix by using the Elton/Gruber Constant Correlation model
    641     """
    642     rhos = r.corr()
    643     n = rhos.shape[0]
    644     # this is a symmetric matrix with diagonals all 1 - so the mean correlation is ...
    645     rho_bar = (rhos.values.sum()-n)/(n*(n-1))
    646     ccor = np.full_like(rhos, rho_bar)
    647     np.fill_diagonal(ccor, 1.)
    648     sd = r.std()
    649     return pd.DataFrame(ccor * np.outer(sd, sd), index=r.columns, columns=r.columns)
    650 
    651 def shrinkage_cov(r, delta=0.5, **kwargs):
    652     """
    653     Covariance estimator that shrinks between the Sample Covariance and the Constant Correlation Estimators
    654     """
    655     prior = cc_cov(r, **kwargs)
    656     sample = sample_cov(r, **kwargs)
    657     return delta*prior + (1-delta)*sample
    658 
    659 def risk_contribution(w,cov):
    660     """
    661     Compute the contributions to risk of the constituents of a portfolio, given a set of portfolio weights and a covariance matrix
    662     """
    663     total_portfolio_var = portfolio_vol(w,cov)**2
    664     # Marginal contribution of each constituent
    665     marginal_contrib = cov@w
    666     risk_contrib = np.multiply(marginal_contrib,w.T)/total_portfolio_var
    667     return risk_contrib
    668 
    669 def target_risk_contributions(target_risk, cov):
    670     """
    671     Returns the weights of the portfolio that gives you the weights such
    672     that the contributions to portfolio risk are as close as possible to
    673     the target_risk, given the covariance matrix
    674     """
    675     n = cov.shape[0]
    676     init_guess = np.repeat(1/n, n)
    677     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    678     # construct the constraints
    679     weights_sum_to_1 = {'type': 'eq',
    680                         'fun': lambda weights: np.sum(weights) - 1
    681     }
    682     def msd_risk(weights, target_risk, cov):
    683         """
    684         Returns the Mean Squared Difference in risk contributions
    685         between weights and target_risk
    686         """
    687         w_contribs = risk_contribution(weights, cov)
    688         return ((w_contribs-target_risk)**2).sum()
    689     
    690     weights = minimize(msd_risk, init_guess,
    691                        args=(target_risk, cov), method='SLSQP',
    692                        options={'disp': False},
    693                        constraints=(weights_sum_to_1,),
    694                        bounds=bounds)
    695     return weights.x
    696 
    697 def equal_risk_contributions(cov):
    698     """
    699     Returns the weights of the portfolio that equalizes the contributions
    700     of the constituents based on the given covariance matrix
    701     """
    702     n = cov.shape[0]
    703     return target_risk_contributions(target_risk=np.repeat(1/n,n), cov=cov)
    704 
    705 def weight_erc(r, cov_estimator=sample_cov, **kwargs):
    706     """
    707     Produces the weights of the ERC portfolio given a covariance matrix of the returns 
    708     """
    709     est_cov = cov_estimator(r, **kwargs)
    710     return equal_risk_contributions(est_cov)