ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_205.py

(23410B)
      1 import pandas as pd
      2 import numpy as np
      3 
      4 def get_ffme_returns():
      5     """
      6     Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
      7     """
      8     me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
      9                        header=0, index_col=0, na_values=-99.99)
     10     rets = me_m[['Lo 10', 'Hi 10']]
     11     rets.columns = ['SmallCap', 'LargeCap']
     12     rets = rets/100
     13     rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
     14     return rets
     15 
     16 def get_fff_returns():
     17     """
     18     Load the Fama-French Research Factor Monthly Dataset
     19     """
     20     rets = pd.read_csv("data/F-F_Research_Data_Factors_m.csv",
     21                        header=0, index_col=0, na_values=-99.99)/100
     22     rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
     23     return rets
     24 
     25 
     26 def get_hfi_returns():
     27     """
     28     Load and format the EDHEC Hedge Fund Index Returns
     29     """
     30     hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
     31                       header=0, index_col=0, parse_dates=True)
     32     hfi = hfi/100
     33     hfi.index = hfi.index.to_period('M')
     34     return hfi
     35 
     36 def get_ind_file(filetype, weighting="vw", n_inds=30):
     37     """
     38     Load and format the Ken French Industry Portfolios files
     39     Variant is a tuple of (weighting, size) where:
     40         weighting is one of "ew", "vw"
     41         number of inds is 30 or 49
     42     """    
     43     if filetype is "returns":
     44         name = f"{weighting}_rets" 
     45         divisor = 100
     46     elif filetype is "nfirms":
     47         name = "nfirms"
     48         divisor = 1
     49     elif filetype is "size":
     50         name = "size"
     51         divisor = 1
     52     else:
     53         raise ValueError(f"filetype must be one of: returns, nfirms, size")
     54     
     55     ind = pd.read_csv(f"data/ind{n_inds}_m_{name}.csv", header=0, index_col=0, na_values=-99.99)/divisor
     56     ind.index = pd.to_datetime(ind.index, format="%Y%m").to_period('M')
     57     ind.columns = ind.columns.str.strip()
     58     return ind
     59 
     60 def get_ind_returns(weighting="vw", n_inds=30):
     61     """
     62     Load and format the Ken French Industry Portfolios Monthly Returns
     63     """
     64     return get_ind_file("returns", weighting=weighting, n_inds=n_inds)
     65 
     66 def get_ind_nfirms(n_inds=30):
     67     """
     68     Load and format the Ken French 30 Industry Portfolios Average number of Firms
     69     """
     70     return get_ind_file("nfirms", n_inds=n_inds)
     71 
     72 def get_ind_size(n_inds=30):
     73     """
     74     Load and format the Ken French 30 Industry Portfolios Average size (market cap)
     75     """
     76     return get_ind_file("size", n_inds=n_inds)
     77 
     78 
     79 def get_ind_market_caps(n_inds=30, weights=False):
     80     """
     81     Load the industry portfolio data and derive the market caps
     82     """
     83     ind_nfirms = get_ind_nfirms(n_inds=n_inds)
     84     ind_size = get_ind_size(n_inds=n_inds)
     85     ind_mktcap = ind_nfirms * ind_size
     86     if weights:
     87         total_mktcap = ind_mktcap.sum(axis=1)
     88         ind_capweight = ind_mktcap.divide(total_mktcap, axis="rows")
     89         return ind_capweight
     90     #else
     91     return ind_mktcap
     92 
     93 def get_total_market_index_returns(n_inds=30):
     94     """
     95     Load the 30 industry portfolio data and derive the returns of a capweighted total market index
     96     """
     97     ind_capweight = get_ind_market_caps(n_inds=n_inds)
     98     ind_return = get_ind_returns(weighting="vw", n_inds=n_inds)
     99     total_market_return = (ind_capweight * ind_return).sum(axis="columns")
    100     return total_market_return
    101                          
    102 def skewness(r):
    103     """
    104     Alternative to scipy.stats.skew()
    105     Computes the skewness of the supplied Series or DataFrame
    106     Returns a float or a Series
    107     """
    108     demeaned_r = r - r.mean()
    109     # use the population standard deviation, so set dof=0
    110     sigma_r = r.std(ddof=0)
    111     exp = (demeaned_r**3).mean()
    112     return exp/sigma_r**3
    113 
    114 
    115 def kurtosis(r):
    116     """
    117     Alternative to scipy.stats.kurtosis()
    118     Computes the kurtosis of the supplied Series or DataFrame
    119     Returns a float or a Series
    120     """
    121     demeaned_r = r - r.mean()
    122     # use the population standard deviation, so set dof=0
    123     sigma_r = r.std(ddof=0)
    124     exp = (demeaned_r**4).mean()
    125     return exp/sigma_r**4
    126 
    127 
    128 def compound(r):
    129     """
    130     returns the result of compounding the set of returns in r
    131     """
    132     return np.expm1(np.log1p(r).sum())
    133 
    134                          
    135 def annualize_rets(r, periods_per_year):
    136     """
    137     Annualizes a set of returns
    138     We should infer the periods per year
    139     but that is currently left as an exercise
    140     to the reader :-)
    141     """
    142     compounded_growth = (1+r).prod()
    143     n_periods = r.shape[0]
    144     return compounded_growth**(periods_per_year/n_periods)-1
    145 
    146 
    147 def annualize_vol(r, periods_per_year):
    148     """
    149     Annualizes the vol of a set of returns
    150     We should infer the periods per year
    151     but that is currently left as an exercise
    152     to the reader :-)
    153     """
    154     return r.std()*(periods_per_year**0.5)
    155 
    156 
    157 def sharpe_ratio(r, riskfree_rate, periods_per_year):
    158     """
    159     Computes the annualized sharpe ratio of a set of returns
    160     """
    161     # convert the annual riskfree rate to per period
    162     rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
    163     excess_ret = r - rf_per_period
    164     ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
    165     ann_vol = annualize_vol(r, periods_per_year)
    166     return ann_ex_ret/ann_vol
    167 
    168 
    169 import scipy.stats
    170 def is_normal(r, level=0.01):
    171     """
    172     Applies the Jarque-Bera test to determine if a Series is normal or not
    173     Test is applied at the 1% level by default
    174     Returns True if the hypothesis of normality is accepted, False otherwise
    175     """
    176     if isinstance(r, pd.DataFrame):
    177         return r.aggregate(is_normal)
    178     else:
    179         statistic, p_value = scipy.stats.jarque_bera(r)
    180         return p_value > level
    181 
    182 
    183 def drawdown(return_series: pd.Series):
    184     """Takes a time series of asset returns.
    185        returns a DataFrame with columns for
    186        the wealth index, 
    187        the previous peaks, and 
    188        the percentage drawdown
    189     """
    190     wealth_index = 1000*(1+return_series).cumprod()
    191     previous_peaks = wealth_index.cummax()
    192     drawdowns = (wealth_index - previous_peaks)/previous_peaks
    193     return pd.DataFrame({"Wealth": wealth_index, 
    194                          "Previous Peak": previous_peaks, 
    195                          "Drawdown": drawdowns})
    196 
    197 
    198 def semideviation(r):
    199     """
    200     Returns the semideviation aka negative semideviation of r
    201     r must be a Series or a DataFrame, else raises a TypeError
    202     """
    203     if isinstance(r, pd.Series):
    204         is_negative = r < 0
    205         return r[is_negative].std(ddof=0)
    206     elif isinstance(r, pd.DataFrame):
    207         return r.aggregate(semideviation)
    208     else:
    209         raise TypeError("Expected r to be a Series or DataFrame")
    210 
    211 
    212 def var_historic(r, level=5):
    213     """
    214     Returns the historic Value at Risk at a specified level
    215     i.e. returns the number such that "level" percent of the returns
    216     fall below that number, and the (100-level) percent are above
    217     """
    218     if isinstance(r, pd.DataFrame):
    219         return r.aggregate(var_historic, level=level)
    220     elif isinstance(r, pd.Series):
    221         return -np.percentile(r, level)
    222     else:
    223         raise TypeError("Expected r to be a Series or DataFrame")
    224 
    225 
    226 def cvar_historic(r, level=5):
    227     """
    228     Computes the Conditional VaR of Series or DataFrame
    229     """
    230     if isinstance(r, pd.Series):
    231         is_beyond = r <= -var_historic(r, level=level)
    232         return -r[is_beyond].mean()
    233     elif isinstance(r, pd.DataFrame):
    234         return r.aggregate(cvar_historic, level=level)
    235     else:
    236         raise TypeError("Expected r to be a Series or DataFrame")
    237 
    238 
    239 from scipy.stats import norm
    240 def var_gaussian(r, level=5, modified=False):
    241     """
    242     Returns the Parametric Gauusian VaR of a Series or DataFrame
    243     If "modified" is True, then the modified VaR is returned,
    244     using the Cornish-Fisher modification
    245     """
    246     # compute the Z score assuming it was Gaussian
    247     z = norm.ppf(level/100)
    248     if modified:
    249         # modify the Z score based on observed skewness and kurtosis
    250         s = skewness(r)
    251         k = kurtosis(r)
    252         z = (z +
    253                 (z**2 - 1)*s/6 +
    254                 (z**3 -3*z)*(k-3)/24 -
    255                 (2*z**3 - 5*z)*(s**2)/36
    256             )
    257     return -(r.mean() + z*r.std(ddof=0))
    258 
    259 
    260 def portfolio_return(weights, returns):
    261     """
    262     Computes the return on a portfolio from constituent returns and weights
    263     weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
    264     """
    265     return weights.T @ returns
    266 
    267 
    268 def portfolio_vol(weights, covmat):
    269     """
    270     Computes the vol of a portfolio from a covariance matrix and constituent weights
    271     weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
    272     """
    273     vol = (weights.T @ covmat @ weights)**0.5
    274     return vol 
    275 
    276 
    277 def plot_ef2(n_points, er, cov):
    278     """
    279     Plots the 2-asset efficient frontier
    280     """
    281     if er.shape[0] != 2 or er.shape[0] != 2:
    282         raise ValueError("plot_ef2 can only plot 2-asset frontiers")
    283     weights = [np.array([w, 1-w]) for w in np.linspace(0, 1, n_points)]
    284     rets = [portfolio_return(w, er) for w in weights]
    285     vols = [portfolio_vol(w, cov) for w in weights]
    286     ef = pd.DataFrame({
    287         "Returns": rets, 
    288         "Volatility": vols
    289     })
    290     return ef.plot.line(x="Volatility", y="Returns", style=".-")
    291 
    292 
    293 from scipy.optimize import minimize
    294 
    295 def minimize_vol(target_return, er, cov):
    296     """
    297     Returns the optimal weights that achieve the target return
    298     given a set of expected returns and a covariance matrix
    299     """
    300     n = er.shape[0]
    301     init_guess = np.repeat(1/n, n)
    302     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    303     # construct the constraints
    304     weights_sum_to_1 = {'type': 'eq',
    305                         'fun': lambda weights: np.sum(weights) - 1
    306     }
    307     return_is_target = {'type': 'eq',
    308                         'args': (er,),
    309                         'fun': lambda weights, er: target_return - portfolio_return(weights,er)
    310     }
    311     weights = minimize(portfolio_vol, init_guess,
    312                        args=(cov,), method='SLSQP',
    313                        options={'disp': False},
    314                        constraints=(weights_sum_to_1,return_is_target),
    315                        bounds=bounds)
    316     return weights.x
    317 
    318 
    319 def tracking_error(r_a, r_b):
    320     """
    321     Returns the Tracking Error between the two return series
    322     """
    323     return np.sqrt(((r_a - r_b)**2).sum())
    324 
    325                          
    326 def msr(riskfree_rate, er, cov):
    327     """
    328     Returns the weights of the portfolio that gives you the maximum sharpe ratio
    329     given the riskfree rate and expected returns and a covariance matrix
    330     """
    331     n = er.shape[0]
    332     init_guess = np.repeat(1/n, n)
    333     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    334     # construct the constraints
    335     weights_sum_to_1 = {'type': 'eq',
    336                         'fun': lambda weights: np.sum(weights) - 1
    337     }
    338     def neg_sharpe(weights, riskfree_rate, er, cov):
    339         """
    340         Returns the negative of the sharpe ratio
    341         of the given portfolio
    342         """
    343         r = portfolio_return(weights, er)
    344         vol = portfolio_vol(weights, cov)
    345         return -(r - riskfree_rate)/vol
    346     
    347     weights = minimize(neg_sharpe, init_guess,
    348                        args=(riskfree_rate, er, cov), method='SLSQP',
    349                        options={'disp': False},
    350                        constraints=(weights_sum_to_1,),
    351                        bounds=bounds)
    352     return weights.x
    353 
    354 
    355 def gmv(cov):
    356     """
    357     Returns the weights of the Global Minimum Volatility portfolio
    358     given a covariance matrix
    359     """
    360     n = cov.shape[0]
    361     return msr(0, np.repeat(1, n), cov)
    362 
    363 
    364 def optimal_weights(n_points, er, cov):
    365     """
    366     Returns a list of weights that represent a grid of n_points on the efficient frontier
    367     """
    368     target_rs = np.linspace(er.min(), er.max(), n_points)
    369     weights = [minimize_vol(target_return, er, cov) for target_return in target_rs]
    370     return weights
    371 
    372 
    373 def plot_ef(n_points, er, cov, style='.-', legend=False, show_cml=False, riskfree_rate=0, show_ew=False, show_gmv=False):
    374     """
    375     Plots the multi-asset efficient frontier
    376     """
    377     weights = optimal_weights(n_points, er, cov)
    378     rets = [portfolio_return(w, er) for w in weights]
    379     vols = [portfolio_vol(w, cov) for w in weights]
    380     ef = pd.DataFrame({
    381         "Returns": rets, 
    382         "Volatility": vols
    383     })
    384     ax = ef.plot.line(x="Volatility", y="Returns", style=style, legend=legend)
    385     if show_cml:
    386         ax.set_xlim(left = 0)
    387         # get MSR
    388         w_msr = msr(riskfree_rate, er, cov)
    389         r_msr = portfolio_return(w_msr, er)
    390         vol_msr = portfolio_vol(w_msr, cov)
    391         # add CML
    392         cml_x = [0, vol_msr]
    393         cml_y = [riskfree_rate, r_msr]
    394         ax.plot(cml_x, cml_y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=10)
    395     if show_ew:
    396         n = er.shape[0]
    397         w_ew = np.repeat(1/n, n)
    398         r_ew = portfolio_return(w_ew, er)
    399         vol_ew = portfolio_vol(w_ew, cov)
    400         # add EW
    401         ax.plot([vol_ew], [r_ew], color='goldenrod', marker='o', markersize=10)
    402     if show_gmv:
    403         w_gmv = gmv(cov)
    404         r_gmv = portfolio_return(w_gmv, er)
    405         vol_gmv = portfolio_vol(w_gmv, cov)
    406         # add EW
    407         ax.plot([vol_gmv], [r_gmv], color='midnightblue', marker='o', markersize=10)
    408         
    409         return ax
    410 
    411                          
    412 def run_cppi(risky_r, safe_r=None, m=3, start=1000, floor=0.8, riskfree_rate=0.03, drawdown=None):
    413     """
    414     Run a backtest of the CPPI strategy, given a set of returns for the risky asset
    415     Returns a dictionary containing: Asset Value History, Risk Budget History, Risky Weight History
    416     """
    417     # set up the CPPI parameters
    418     dates = risky_r.index
    419     n_steps = len(dates)
    420     account_value = start
    421     floor_value = start*floor
    422     peak = account_value
    423     if isinstance(risky_r, pd.Series): 
    424         risky_r = pd.DataFrame(risky_r, columns=["R"])
    425 
    426     if safe_r is None:
    427         safe_r = pd.DataFrame().reindex_like(risky_r)
    428         safe_r.values[:] = riskfree_rate/12 # fast way to set all values to a number
    429     # set up some DataFrames for saving intermediate values
    430     account_history = pd.DataFrame().reindex_like(risky_r)
    431     risky_w_history = pd.DataFrame().reindex_like(risky_r)
    432     cushion_history = pd.DataFrame().reindex_like(risky_r)
    433     floorval_history = pd.DataFrame().reindex_like(risky_r)
    434     peak_history = pd.DataFrame().reindex_like(risky_r)
    435 
    436     for step in range(n_steps):
    437         if drawdown is not None:
    438             peak = np.maximum(peak, account_value)
    439             floor_value = peak*(1-drawdown)
    440         cushion = (account_value - floor_value)/account_value
    441         risky_w = m*cushion
    442         risky_w = np.minimum(risky_w, 1)
    443         risky_w = np.maximum(risky_w, 0)
    444         safe_w = 1-risky_w
    445         risky_alloc = account_value*risky_w
    446         safe_alloc = account_value*safe_w
    447         # recompute the new account value at the end of this step
    448         account_value = risky_alloc*(1+risky_r.iloc[step]) + safe_alloc*(1+safe_r.iloc[step])
    449         # save the histories for analysis and plotting
    450         cushion_history.iloc[step] = cushion
    451         risky_w_history.iloc[step] = risky_w
    452         account_history.iloc[step] = account_value
    453         floorval_history.iloc[step] = floor_value
    454         peak_history.iloc[step] = peak
    455     risky_wealth = start*(1+risky_r).cumprod()
    456     backtest_result = {
    457         "Wealth": account_history,
    458         "Risky Wealth": risky_wealth, 
    459         "Risk Budget": cushion_history,
    460         "Risky Allocation": risky_w_history,
    461         "m": m,
    462         "start": start,
    463         "floor": floor,
    464         "risky_r":risky_r,
    465         "safe_r": safe_r,
    466         "drawdown": drawdown,
    467         "peak": peak_history,
    468         "floor": floorval_history
    469     }
    470     return backtest_result
    471 
    472 
    473 def summary_stats(r, riskfree_rate=0.03):
    474     """
    475     Return a DataFrame that contains aggregated summary stats for the returns in the columns of r
    476     """
    477     ann_r = r.aggregate(annualize_rets, periods_per_year=12)
    478     ann_vol = r.aggregate(annualize_vol, periods_per_year=12)
    479     ann_sr = r.aggregate(sharpe_ratio, riskfree_rate=riskfree_rate, periods_per_year=12)
    480     dd = r.aggregate(lambda r: drawdown(r).Drawdown.min())
    481     skew = r.aggregate(skewness)
    482     kurt = r.aggregate(kurtosis)
    483     cf_var5 = r.aggregate(var_gaussian, modified=True)
    484     hist_cvar5 = r.aggregate(cvar_historic)
    485     return pd.DataFrame({
    486         "Annualized Return": ann_r,
    487         "Annualized Vol": ann_vol,
    488         "Skewness": skew,
    489         "Kurtosis": kurt,
    490         "Cornish-Fisher VaR (5%)": cf_var5,
    491         "Historic CVaR (5%)": hist_cvar5,
    492         "Sharpe Ratio": ann_sr,
    493         "Max Drawdown": dd
    494     })
    495 
    496                          
    497 def gbm(n_years = 10, n_scenarios=1000, mu=0.07, sigma=0.15, steps_per_year=12, s_0=100.0, prices=True):
    498     """
    499     Evolution of Geometric Brownian Motion trajectories, such as for Stock Prices through Monte Carlo
    500     :param n_years:  The number of years to generate data for
    501     :param n_paths: The number of scenarios/trajectories
    502     :param mu: Annualized Drift, e.g. Market Return
    503     :param sigma: Annualized Volatility
    504     :param steps_per_year: granularity of the simulation
    505     :param s_0: initial value
    506     :return: a numpy array of n_paths columns and n_years*steps_per_year rows
    507     """
    508     # Derive per-step Model Parameters from User Specifications
    509     dt = 1/steps_per_year
    510     n_steps = int(n_years*steps_per_year) + 1
    511     # the standard way ...
    512     # rets_plus_1 = np.random.normal(loc=mu*dt+1, scale=sigma*np.sqrt(dt), size=(n_steps, n_scenarios))
    513     # without discretization error ...
    514     rets_plus_1 = np.random.normal(loc=(1+mu)**dt, scale=(sigma*np.sqrt(dt)), size=(n_steps, n_scenarios))
    515     rets_plus_1[0] = 1
    516     ret_val = s_0*pd.DataFrame(rets_plus_1).cumprod() if prices else rets_plus_1-1
    517     return ret_val
    518 
    519                          
    520 import statsmodels.api as sm
    521 def regress(dependent_variable, explanatory_variables, alpha=True):
    522     """
    523     Runs a linear regression to decompose the dependent variable into the explanatory variables
    524     returns an object of type statsmodel's RegressionResults on which you can call
    525        .summary() to print a full summary
    526        .params for the coefficients
    527        .tvalues and .pvalues for the significance levels
    528        .rsquared_adj and .rsquared for quality of fit
    529     """
    530     if alpha:
    531         explanatory_variables = explanatory_variables.copy()
    532         explanatory_variables["Alpha"] = 1
    533     
    534     lm = sm.OLS(dependent_variable, explanatory_variables).fit()
    535     return lm
    536 
    537 def portfolio_tracking_error(weights, ref_r, bb_r):
    538     """
    539     returns the tracking error between the reference returns
    540     and a portfolio of building block returns held with given weights
    541     """
    542     return tracking_error(ref_r, (weights*bb_r).sum(axis=1))
    543                          
    544 def style_analysis(dependent_variable, explanatory_variables):
    545     """
    546     Returns the optimal weights that minimizes the Tracking error between
    547     a portfolio of the explanatory variables and the dependent variable
    548     """
    549     n = explanatory_variables.shape[1]
    550     init_guess = np.repeat(1/n, n)
    551     bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
    552     # construct the constraints
    553     weights_sum_to_1 = {'type': 'eq',
    554                         'fun': lambda weights: np.sum(weights) - 1
    555     }
    556     solution = minimize(portfolio_tracking_error, init_guess,
    557                        args=(dependent_variable, explanatory_variables,), method='SLSQP',
    558                        options={'disp': False},
    559                        constraints=(weights_sum_to_1,),
    560                        bounds=bounds)
    561     weights = pd.Series(solution.x, index=explanatory_variables.columns)
    562     return weights
    563 
    564 
    565 def ff_analysis(r, factors):
    566     """
    567     Returns the loadings  of r on the Fama French Factors
    568     which can be read in using get_fff_returns()
    569     the index of r must be a (not necessarily proper) subset of the index of factors
    570     r is either a Series or a DataFrame
    571     """
    572     if isinstance(r, pd.Series):
    573         dependent_variable = r
    574         explanatory_variables = factors.loc[r.index]
    575         tilts = regress(dependent_variable, explanatory_variables).params
    576     elif isinstance(r, pd.DataFrame):
    577         tilts = pd.DataFrame({col: ff_analysis(r[col], factors) for col in r.columns})
    578     else:
    579         raise TypeError("r must be a Series or a DataFrame")
    580     return tilts
    581 
    582 def weight_ew(r, cap_weights=None, max_cw_mult=None, microcap_threshold=None, **kwargs):
    583     """
    584     Returns the weights of the EW portfolio based on the asset returns "r" as a DataFrame
    585     If supplied a set of capweights and a capweight tether, it is applied and reweighted 
    586     """
    587     n = len(r.columns)
    588     ew = pd.Series(1/n, index=r.columns)
    589     if cap_weights is not None:
    590         cw = cap_weights.loc[r.index[0]] # starting cap weight
    591         ## exclude microcaps
    592         if microcap_threshold is not None and microcap_threshold > 0:
    593             microcap = cw < microcap_threshold
    594             ew[microcap] = 0
    595             ew = ew/ew.sum()
    596         #limit weight to a multiple of capweight
    597         if max_cw_mult is not None and max_cw_mult > 0:
    598             ew = np.minimum(ew, cw*max_cw_mult)
    599             ew = ew/ew.sum() #reweight
    600     return ew
    601 
    602 def weight_cw(r, cap_weights, **kwargs):
    603     """
    604     Returns the weights of the CW portfolio based on the time series of capweights
    605     """
    606     w = cap_weights.loc[r.index[1]]
    607     return w/w.sum()
    608 
    609 def backtest_ws(r, estimation_window=60, weighting=weight_ew, verbose=False, **kwargs):
    610     """
    611     Backtests a given weighting scheme, given some parameters:
    612     r : asset returns to use to build the portfolio
    613     estimation_window: the window to use to estimate parameters
    614     weighting: the weighting scheme to use, must be a function that takes "r", and a variable number of keyword-value arguments
    615     """
    616     n_periods = r.shape[0]
    617     # return windows
    618     windows = [(start, start+estimation_window) for start in range(n_periods-estimation_window)]
    619     weights = [weighting(r.iloc[win[0]:win[1]], **kwargs) for win in windows]
    620     # convert List of weights to DataFrame
    621     weights = pd.DataFrame(weights, index=r.iloc[estimation_window:].index, columns=r.columns)
    622     returns = (weights * r).sum(axis="columns",  min_count=1) #mincount is to generate NAs if all inputs are NAs
    623     return returns
    624 
    625 def sample_cov(r, **kwargs):
    626     """
    627     Returns the sample covariance of the supplied returns
    628     """
    629     return r.cov()
    630 
    631 def weight_gmv(r, cov_estimator=sample_cov, **kwargs):
    632     """
    633     Produces the weights of the GMV portfolio given a covariance matrix of the returns 
    634     """
    635     est_cov = cov_estimator(r, **kwargs)
    636     return gmv(est_cov)
    637 
    638 def cc_cov(r, **kwargs):
    639     """
    640     Estimates a covariance matrix by using the Elton/Gruber Constant Correlation model
    641     """
    642     rhos = r.corr()
    643     n = rhos.shape[0]
    644     # this is a symmetric matrix with diagonals all 1 - so the mean correlation is ...
    645     rho_bar = (rhos.values.sum()-n)/(n*(n-1))
    646     ccor = np.full_like(rhos, rho_bar)
    647     np.fill_diagonal(ccor, 1.)
    648     sd = r.std()
    649     return pd.DataFrame(ccor * np.outer(sd, sd), index=r.columns, columns=r.columns)
    650 
    651 def shrinkage_cov(r, delta=0.5, **kwargs):
    652     """
    653     Covariance estimator that shrinks between the Sample Covariance and the Constant Correlation Estimators
    654     """
    655     prior = cc_cov(r, **kwargs)
    656     sample = sample_cov(r, **kwargs)
    657     return delta*prior + (1-delta)*sample
    658