ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_206.py
(25414B)
1 import pandas as pd
2 import numpy as np
3
4 def get_ffme_returns():
5 """
6 Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
7 """
8 me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
9 header=0, index_col=0, na_values=-99.99)
10 rets = me_m[['Lo 10', 'Hi 10']]
11 rets.columns = ['SmallCap', 'LargeCap']
12 rets = rets/100
13 rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
14 return rets
15
16 def get_fff_returns():
17 """
18 Load the Fama-French Research Factor Monthly Dataset
19 """
20 rets = pd.read_csv("data/F-F_Research_Data_Factors_m.csv",
21 header=0, index_col=0, na_values=-99.99)/100
22 rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
23 return rets
24
25
26 def get_hfi_returns():
27 """
28 Load and format the EDHEC Hedge Fund Index Returns
29 """
30 hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
31 header=0, index_col=0, parse_dates=True)
32 hfi = hfi/100
33 hfi.index = hfi.index.to_period('M')
34 return hfi
35
36 def get_ind_file(filetype, weighting="vw", n_inds=30):
37 """
38 Load and format the Ken French Industry Portfolios files
39 Variant is a tuple of (weighting, size) where:
40 weighting is one of "ew", "vw"
41 number of inds is 30 or 49
42 """
43 if filetype is "returns":
44 name = f"{weighting}_rets"
45 divisor = 100
46 elif filetype is "nfirms":
47 name = "nfirms"
48 divisor = 1
49 elif filetype is "size":
50 name = "size"
51 divisor = 1
52 else:
53 raise ValueError(f"filetype must be one of: returns, nfirms, size")
54
55 ind = pd.read_csv(f"data/ind{n_inds}_m_{name}.csv", header=0, index_col=0, na_values=-99.99)/divisor
56 ind.index = pd.to_datetime(ind.index, format="%Y%m").to_period('M')
57 ind.columns = ind.columns.str.strip()
58 return ind
59
60 def get_ind_returns(weighting="vw", n_inds=30):
61 """
62 Load and format the Ken French Industry Portfolios Monthly Returns
63 """
64 return get_ind_file("returns", weighting=weighting, n_inds=n_inds)
65
66 def get_ind_nfirms(n_inds=30):
67 """
68 Load and format the Ken French 30 Industry Portfolios Average number of Firms
69 """
70 return get_ind_file("nfirms", n_inds=n_inds)
71
72 def get_ind_size(n_inds=30):
73 """
74 Load and format the Ken French 30 Industry Portfolios Average size (market cap)
75 """
76 return get_ind_file("size", n_inds=n_inds)
77
78
79 def get_ind_market_caps(n_inds=30, weights=False):
80 """
81 Load the industry portfolio data and derive the market caps
82 """
83 ind_nfirms = get_ind_nfirms(n_inds=n_inds)
84 ind_size = get_ind_size(n_inds=n_inds)
85 ind_mktcap = ind_nfirms * ind_size
86 if weights:
87 total_mktcap = ind_mktcap.sum(axis=1)
88 ind_capweight = ind_mktcap.divide(total_mktcap, axis="rows")
89 return ind_capweight
90 #else
91 return ind_mktcap
92
93 def get_total_market_index_returns(n_inds=30):
94 """
95 Load the 30 industry portfolio data and derive the returns of a capweighted total market index
96 """
97 ind_capweight = get_ind_market_caps(n_inds=n_inds)
98 ind_return = get_ind_returns(weighting="vw", n_inds=n_inds)
99 total_market_return = (ind_capweight * ind_return).sum(axis="columns")
100 return total_market_return
101
102 def skewness(r):
103 """
104 Alternative to scipy.stats.skew()
105 Computes the skewness of the supplied Series or DataFrame
106 Returns a float or a Series
107 """
108 demeaned_r = r - r.mean()
109 # use the population standard deviation, so set dof=0
110 sigma_r = r.std(ddof=0)
111 exp = (demeaned_r**3).mean()
112 return exp/sigma_r**3
113
114
115 def kurtosis(r):
116 """
117 Alternative to scipy.stats.kurtosis()
118 Computes the kurtosis of the supplied Series or DataFrame
119 Returns a float or a Series
120 """
121 demeaned_r = r - r.mean()
122 # use the population standard deviation, so set dof=0
123 sigma_r = r.std(ddof=0)
124 exp = (demeaned_r**4).mean()
125 return exp/sigma_r**4
126
127
128 def compound(r):
129 """
130 returns the result of compounding the set of returns in r
131 """
132 return np.expm1(np.log1p(r).sum())
133
134
135 def annualize_rets(r, periods_per_year):
136 """
137 Annualizes a set of returns
138 We should infer the periods per year
139 but that is currently left as an exercise
140 to the reader :-)
141 """
142 compounded_growth = (1+r).prod()
143 n_periods = r.shape[0]
144 return compounded_growth**(periods_per_year/n_periods)-1
145
146
147 def annualize_vol(r, periods_per_year):
148 """
149 Annualizes the vol of a set of returns
150 We should infer the periods per year
151 but that is currently left as an exercise
152 to the reader :-)
153 """
154 return r.std()*(periods_per_year**0.5)
155
156
157 def sharpe_ratio(r, riskfree_rate, periods_per_year):
158 """
159 Computes the annualized sharpe ratio of a set of returns
160 """
161 # convert the annual riskfree rate to per period
162 rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
163 excess_ret = r - rf_per_period
164 ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
165 ann_vol = annualize_vol(r, periods_per_year)
166 return ann_ex_ret/ann_vol
167
168
169 import scipy.stats
170 def is_normal(r, level=0.01):
171 """
172 Applies the Jarque-Bera test to determine if a Series is normal or not
173 Test is applied at the 1% level by default
174 Returns True if the hypothesis of normality is accepted, False otherwise
175 """
176 if isinstance(r, pd.DataFrame):
177 return r.aggregate(is_normal)
178 else:
179 statistic, p_value = scipy.stats.jarque_bera(r)
180 return p_value > level
181
182
183 def drawdown(return_series: pd.Series):
184 """Takes a time series of asset returns.
185 returns a DataFrame with columns for
186 the wealth index,
187 the previous peaks, and
188 the percentage drawdown
189 """
190 wealth_index = 1000*(1+return_series).cumprod()
191 previous_peaks = wealth_index.cummax()
192 drawdowns = (wealth_index - previous_peaks)/previous_peaks
193 return pd.DataFrame({"Wealth": wealth_index,
194 "Previous Peak": previous_peaks,
195 "Drawdown": drawdowns})
196
197
198 def semideviation(r):
199 """
200 Returns the semideviation aka negative semideviation of r
201 r must be a Series or a DataFrame, else raises a TypeError
202 """
203 if isinstance(r, pd.Series):
204 is_negative = r < 0
205 return r[is_negative].std(ddof=0)
206 elif isinstance(r, pd.DataFrame):
207 return r.aggregate(semideviation)
208 else:
209 raise TypeError("Expected r to be a Series or DataFrame")
210
211
212 def var_historic(r, level=5):
213 """
214 Returns the historic Value at Risk at a specified level
215 i.e. returns the number such that "level" percent of the returns
216 fall below that number, and the (100-level) percent are above
217 """
218 if isinstance(r, pd.DataFrame):
219 return r.aggregate(var_historic, level=level)
220 elif isinstance(r, pd.Series):
221 return -np.percentile(r, level)
222 else:
223 raise TypeError("Expected r to be a Series or DataFrame")
224
225
226 def cvar_historic(r, level=5):
227 """
228 Computes the Conditional VaR of Series or DataFrame
229 """
230 if isinstance(r, pd.Series):
231 is_beyond = r <= -var_historic(r, level=level)
232 return -r[is_beyond].mean()
233 elif isinstance(r, pd.DataFrame):
234 return r.aggregate(cvar_historic, level=level)
235 else:
236 raise TypeError("Expected r to be a Series or DataFrame")
237
238
239 from scipy.stats import norm
240 def var_gaussian(r, level=5, modified=False):
241 """
242 Returns the Parametric Gauusian VaR of a Series or DataFrame
243 If "modified" is True, then the modified VaR is returned,
244 using the Cornish-Fisher modification
245 """
246 # compute the Z score assuming it was Gaussian
247 z = norm.ppf(level/100)
248 if modified:
249 # modify the Z score based on observed skewness and kurtosis
250 s = skewness(r)
251 k = kurtosis(r)
252 z = (z +
253 (z**2 - 1)*s/6 +
254 (z**3 -3*z)*(k-3)/24 -
255 (2*z**3 - 5*z)*(s**2)/36
256 )
257 return -(r.mean() + z*r.std(ddof=0))
258
259
260 def portfolio_return(weights, returns):
261 """
262 Computes the return on a portfolio from constituent returns and weights
263 weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
264 """
265 return weights.T @ returns
266
267
268 def portfolio_vol(weights, covmat):
269 """
270 Computes the vol of a portfolio from a covariance matrix and constituent weights
271 weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
272 """
273 vol = (weights.T @ covmat @ weights)**0.5
274 return vol
275
276
277 def plot_ef2(n_points, er, cov):
278 """
279 Plots the 2-asset efficient frontier
280 """
281 if er.shape[0] != 2 or er.shape[0] != 2:
282 raise ValueError("plot_ef2 can only plot 2-asset frontiers")
283 weights = [np.array([w, 1-w]) for w in np.linspace(0, 1, n_points)]
284 rets = [portfolio_return(w, er) for w in weights]
285 vols = [portfolio_vol(w, cov) for w in weights]
286 ef = pd.DataFrame({
287 "Returns": rets,
288 "Volatility": vols
289 })
290 return ef.plot.line(x="Volatility", y="Returns", style=".-")
291
292
293 from scipy.optimize import minimize
294
295 def minimize_vol(target_return, er, cov):
296 """
297 Returns the optimal weights that achieve the target return
298 given a set of expected returns and a covariance matrix
299 """
300 n = er.shape[0]
301 init_guess = np.repeat(1/n, n)
302 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
303 # construct the constraints
304 weights_sum_to_1 = {'type': 'eq',
305 'fun': lambda weights: np.sum(weights) - 1
306 }
307 return_is_target = {'type': 'eq',
308 'args': (er,),
309 'fun': lambda weights, er: target_return - portfolio_return(weights,er)
310 }
311 weights = minimize(portfolio_vol, init_guess,
312 args=(cov,), method='SLSQP',
313 options={'disp': False},
314 constraints=(weights_sum_to_1,return_is_target),
315 bounds=bounds)
316 return weights.x
317
318
319 def tracking_error(r_a, r_b):
320 """
321 Returns the Tracking Error between the two return series
322 """
323 return np.sqrt(((r_a - r_b)**2).sum())
324
325
326 def msr(riskfree_rate, er, cov):
327 """
328 Returns the weights of the portfolio that gives you the maximum sharpe ratio
329 given the riskfree rate and expected returns and a covariance matrix
330 """
331 n = er.shape[0]
332 init_guess = np.repeat(1/n, n)
333 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
334 # construct the constraints
335 weights_sum_to_1 = {'type': 'eq',
336 'fun': lambda weights: np.sum(weights) - 1
337 }
338 def neg_sharpe(weights, riskfree_rate, er, cov):
339 """
340 Returns the negative of the sharpe ratio
341 of the given portfolio
342 """
343 r = portfolio_return(weights, er)
344 vol = portfolio_vol(weights, cov)
345 return -(r - riskfree_rate)/vol
346
347 weights = minimize(neg_sharpe, init_guess,
348 args=(riskfree_rate, er, cov), method='SLSQP',
349 options={'disp': False},
350 constraints=(weights_sum_to_1,),
351 bounds=bounds)
352 return weights.x
353
354
355 def gmv(cov):
356 """
357 Returns the weights of the Global Minimum Volatility portfolio
358 given a covariance matrix
359 """
360 n = cov.shape[0]
361 return msr(0, np.repeat(1, n), cov)
362
363
364 def optimal_weights(n_points, er, cov):
365 """
366 Returns a list of weights that represent a grid of n_points on the efficient frontier
367 """
368 target_rs = np.linspace(er.min(), er.max(), n_points)
369 weights = [minimize_vol(target_return, er, cov) for target_return in target_rs]
370 return weights
371
372
373 def plot_ef(n_points, er, cov, style='.-', legend=False, show_cml=False, riskfree_rate=0, show_ew=False, show_gmv=False):
374 """
375 Plots the multi-asset efficient frontier
376 """
377 weights = optimal_weights(n_points, er, cov)
378 rets = [portfolio_return(w, er) for w in weights]
379 vols = [portfolio_vol(w, cov) for w in weights]
380 ef = pd.DataFrame({
381 "Returns": rets,
382 "Volatility": vols
383 })
384 ax = ef.plot.line(x="Volatility", y="Returns", style=style, legend=legend)
385 if show_cml:
386 ax.set_xlim(left = 0)
387 # get MSR
388 w_msr = msr(riskfree_rate, er, cov)
389 r_msr = portfolio_return(w_msr, er)
390 vol_msr = portfolio_vol(w_msr, cov)
391 # add CML
392 cml_x = [0, vol_msr]
393 cml_y = [riskfree_rate, r_msr]
394 ax.plot(cml_x, cml_y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=10)
395 if show_ew:
396 n = er.shape[0]
397 w_ew = np.repeat(1/n, n)
398 r_ew = portfolio_return(w_ew, er)
399 vol_ew = portfolio_vol(w_ew, cov)
400 # add EW
401 ax.plot([vol_ew], [r_ew], color='goldenrod', marker='o', markersize=10)
402 if show_gmv:
403 w_gmv = gmv(cov)
404 r_gmv = portfolio_return(w_gmv, er)
405 vol_gmv = portfolio_vol(w_gmv, cov)
406 # add EW
407 ax.plot([vol_gmv], [r_gmv], color='midnightblue', marker='o', markersize=10)
408
409 return ax
410
411
412 def run_cppi(risky_r, safe_r=None, m=3, start=1000, floor=0.8, riskfree_rate=0.03, drawdown=None):
413 """
414 Run a backtest of the CPPI strategy, given a set of returns for the risky asset
415 Returns a dictionary containing: Asset Value History, Risk Budget History, Risky Weight History
416 """
417 # set up the CPPI parameters
418 dates = risky_r.index
419 n_steps = len(dates)
420 account_value = start
421 floor_value = start*floor
422 peak = account_value
423 if isinstance(risky_r, pd.Series):
424 risky_r = pd.DataFrame(risky_r, columns=["R"])
425
426 if safe_r is None:
427 safe_r = pd.DataFrame().reindex_like(risky_r)
428 safe_r.values[:] = riskfree_rate/12 # fast way to set all values to a number
429 # set up some DataFrames for saving intermediate values
430 account_history = pd.DataFrame().reindex_like(risky_r)
431 risky_w_history = pd.DataFrame().reindex_like(risky_r)
432 cushion_history = pd.DataFrame().reindex_like(risky_r)
433 floorval_history = pd.DataFrame().reindex_like(risky_r)
434 peak_history = pd.DataFrame().reindex_like(risky_r)
435
436 for step in range(n_steps):
437 if drawdown is not None:
438 peak = np.maximum(peak, account_value)
439 floor_value = peak*(1-drawdown)
440 cushion = (account_value - floor_value)/account_value
441 risky_w = m*cushion
442 risky_w = np.minimum(risky_w, 1)
443 risky_w = np.maximum(risky_w, 0)
444 safe_w = 1-risky_w
445 risky_alloc = account_value*risky_w
446 safe_alloc = account_value*safe_w
447 # recompute the new account value at the end of this step
448 account_value = risky_alloc*(1+risky_r.iloc[step]) + safe_alloc*(1+safe_r.iloc[step])
449 # save the histories for analysis and plotting
450 cushion_history.iloc[step] = cushion
451 risky_w_history.iloc[step] = risky_w
452 account_history.iloc[step] = account_value
453 floorval_history.iloc[step] = floor_value
454 peak_history.iloc[step] = peak
455 risky_wealth = start*(1+risky_r).cumprod()
456 backtest_result = {
457 "Wealth": account_history,
458 "Risky Wealth": risky_wealth,
459 "Risk Budget": cushion_history,
460 "Risky Allocation": risky_w_history,
461 "m": m,
462 "start": start,
463 "floor": floor,
464 "risky_r":risky_r,
465 "safe_r": safe_r,
466 "drawdown": drawdown,
467 "peak": peak_history,
468 "floor": floorval_history
469 }
470 return backtest_result
471
472
473 def summary_stats(r, riskfree_rate=0.03):
474 """
475 Return a DataFrame that contains aggregated summary stats for the returns in the columns of r
476 """
477 ann_r = r.aggregate(annualize_rets, periods_per_year=12)
478 ann_vol = r.aggregate(annualize_vol, periods_per_year=12)
479 ann_sr = r.aggregate(sharpe_ratio, riskfree_rate=riskfree_rate, periods_per_year=12)
480 dd = r.aggregate(lambda r: drawdown(r).Drawdown.min())
481 skew = r.aggregate(skewness)
482 kurt = r.aggregate(kurtosis)
483 cf_var5 = r.aggregate(var_gaussian, modified=True)
484 hist_cvar5 = r.aggregate(cvar_historic)
485 return pd.DataFrame({
486 "Annualized Return": ann_r,
487 "Annualized Vol": ann_vol,
488 "Skewness": skew,
489 "Kurtosis": kurt,
490 "Cornish-Fisher VaR (5%)": cf_var5,
491 "Historic CVaR (5%)": hist_cvar5,
492 "Sharpe Ratio": ann_sr,
493 "Max Drawdown": dd
494 })
495
496
497 def gbm(n_years = 10, n_scenarios=1000, mu=0.07, sigma=0.15, steps_per_year=12, s_0=100.0, prices=True):
498 """
499 Evolution of Geometric Brownian Motion trajectories, such as for Stock Prices through Monte Carlo
500 :param n_years: The number of years to generate data for
501 :param n_paths: The number of scenarios/trajectories
502 :param mu: Annualized Drift, e.g. Market Return
503 :param sigma: Annualized Volatility
504 :param steps_per_year: granularity of the simulation
505 :param s_0: initial value
506 :return: a numpy array of n_paths columns and n_years*steps_per_year rows
507 """
508 # Derive per-step Model Parameters from User Specifications
509 dt = 1/steps_per_year
510 n_steps = int(n_years*steps_per_year) + 1
511 # the standard way ...
512 # rets_plus_1 = np.random.normal(loc=mu*dt+1, scale=sigma*np.sqrt(dt), size=(n_steps, n_scenarios))
513 # without discretization error ...
514 rets_plus_1 = np.random.normal(loc=(1+mu)**dt, scale=(sigma*np.sqrt(dt)), size=(n_steps, n_scenarios))
515 rets_plus_1[0] = 1
516 ret_val = s_0*pd.DataFrame(rets_plus_1).cumprod() if prices else rets_plus_1-1
517 return ret_val
518
519
520 import statsmodels.api as sm
521 def regress(dependent_variable, explanatory_variables, alpha=True):
522 """
523 Runs a linear regression to decompose the dependent variable into the explanatory variables
524 returns an object of type statsmodel's RegressionResults on which you can call
525 .summary() to print a full summary
526 .params for the coefficients
527 .tvalues and .pvalues for the significance levels
528 .rsquared_adj and .rsquared for quality of fit
529 """
530 if alpha:
531 explanatory_variables = explanatory_variables.copy()
532 explanatory_variables["Alpha"] = 1
533
534 lm = sm.OLS(dependent_variable, explanatory_variables).fit()
535 return lm
536
537 def portfolio_tracking_error(weights, ref_r, bb_r):
538 """
539 returns the tracking error between the reference returns
540 and a portfolio of building block returns held with given weights
541 """
542 return tracking_error(ref_r, (weights*bb_r).sum(axis=1))
543
544 def style_analysis(dependent_variable, explanatory_variables):
545 """
546 Returns the optimal weights that minimizes the Tracking error between
547 a portfolio of the explanatory variables and the dependent variable
548 """
549 n = explanatory_variables.shape[1]
550 init_guess = np.repeat(1/n, n)
551 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
552 # construct the constraints
553 weights_sum_to_1 = {'type': 'eq',
554 'fun': lambda weights: np.sum(weights) - 1
555 }
556 solution = minimize(portfolio_tracking_error, init_guess,
557 args=(dependent_variable, explanatory_variables,), method='SLSQP',
558 options={'disp': False},
559 constraints=(weights_sum_to_1,),
560 bounds=bounds)
561 weights = pd.Series(solution.x, index=explanatory_variables.columns)
562 return weights
563
564
565 def ff_analysis(r, factors):
566 """
567 Returns the loadings of r on the Fama French Factors
568 which can be read in using get_fff_returns()
569 the index of r must be a (not necessarily proper) subset of the index of factors
570 r is either a Series or a DataFrame
571 """
572 if isinstance(r, pd.Series):
573 dependent_variable = r
574 explanatory_variables = factors.loc[r.index]
575 tilts = regress(dependent_variable, explanatory_variables).params
576 elif isinstance(r, pd.DataFrame):
577 tilts = pd.DataFrame({col: ff_analysis(r[col], factors) for col in r.columns})
578 else:
579 raise TypeError("r must be a Series or a DataFrame")
580 return tilts
581
582 def weight_ew(r, cap_weights=None, max_cw_mult=None, microcap_threshold=None, **kwargs):
583 """
584 Returns the weights of the EW portfolio based on the asset returns "r" as a DataFrame
585 If supplied a set of capweights and a capweight tether, it is applied and reweighted
586 """
587 n = len(r.columns)
588 ew = pd.Series(1/n, index=r.columns)
589 if cap_weights is not None:
590 cw = cap_weights.loc[r.index[0]] # starting cap weight
591 ## exclude microcaps
592 if microcap_threshold is not None and microcap_threshold > 0:
593 microcap = cw < microcap_threshold
594 ew[microcap] = 0
595 ew = ew/ew.sum()
596 #limit weight to a multiple of capweight
597 if max_cw_mult is not None and max_cw_mult > 0:
598 ew = np.minimum(ew, cw*max_cw_mult)
599 ew = ew/ew.sum() #reweight
600 return ew
601
602 def weight_cw(r, cap_weights, **kwargs):
603 """
604 Returns the weights of the CW portfolio based on the time series of capweights
605 """
606 w = cap_weights.loc[r.index[1]]
607 return w/w.sum()
608
609 def backtest_ws(r, estimation_window=60, weighting=weight_ew, verbose=False, **kwargs):
610 """
611 Backtests a given weighting scheme, given some parameters:
612 r : asset returns to use to build the portfolio
613 estimation_window: the window to use to estimate parameters
614 weighting: the weighting scheme to use, must be a function that takes "r", and a variable number of keyword-value arguments
615 """
616 n_periods = r.shape[0]
617 # return windows
618 windows = [(start, start+estimation_window) for start in range(n_periods-estimation_window)]
619 weights = [weighting(r.iloc[win[0]:win[1]], **kwargs) for win in windows]
620 # convert List of weights to DataFrame
621 weights = pd.DataFrame(weights, index=r.iloc[estimation_window:].index, columns=r.columns)
622 returns = (weights * r).sum(axis="columns", min_count=1) #mincount is to generate NAs if all inputs are NAs
623 return returns
624
625 def sample_cov(r, **kwargs):
626 """
627 Returns the sample covariance of the supplied returns
628 """
629 return r.cov()
630
631 def weight_gmv(r, cov_estimator=sample_cov, **kwargs):
632 """
633 Produces the weights of the GMV portfolio given a covariance matrix of the returns
634 """
635 est_cov = cov_estimator(r, **kwargs)
636 return gmv(est_cov)
637
638 def cc_cov(r, **kwargs):
639 """
640 Estimates a covariance matrix by using the Elton/Gruber Constant Correlation model
641 """
642 rhos = r.corr()
643 n = rhos.shape[0]
644 # this is a symmetric matrix with diagonals all 1 - so the mean correlation is ...
645 rho_bar = (rhos.values.sum()-n)/(n*(n-1))
646 ccor = np.full_like(rhos, rho_bar)
647 np.fill_diagonal(ccor, 1.)
648 sd = r.std()
649 return pd.DataFrame(ccor * np.outer(sd, sd), index=r.columns, columns=r.columns)
650
651 def shrinkage_cov(r, delta=0.5, **kwargs):
652 """
653 Covariance estimator that shrinks between the Sample Covariance and the Constant Correlation Estimators
654 """
655 prior = cc_cov(r, **kwargs)
656 sample = sample_cov(r, **kwargs)
657 return delta*prior + (1-delta)*sample
658
659 def risk_contribution(w,cov):
660 """
661 Compute the contributions to risk of the constituents of a portfolio, given a set of portfolio weights and a covariance matrix
662 """
663 total_portfolio_var = portfolio_vol(w,cov)**2
664 # Marginal contribution of each constituent
665 marginal_contrib = cov@w
666 risk_contrib = np.multiply(marginal_contrib,w.T)/total_portfolio_var
667 return risk_contrib
668
669 def target_risk_contributions(target_risk, cov):
670 """
671 Returns the weights of the portfolio that gives you the weights such
672 that the contributions to portfolio risk are as close as possible to
673 the target_risk, given the covariance matrix
674 """
675 n = cov.shape[0]
676 init_guess = np.repeat(1/n, n)
677 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
678 # construct the constraints
679 weights_sum_to_1 = {'type': 'eq',
680 'fun': lambda weights: np.sum(weights) - 1
681 }
682 def msd_risk(weights, target_risk, cov):
683 """
684 Returns the Mean Squared Difference in risk contributions
685 between weights and target_risk
686 """
687 w_contribs = risk_contribution(weights, cov)
688 return ((w_contribs-target_risk)**2).sum()
689
690 weights = minimize(msd_risk, init_guess,
691 args=(target_risk, cov), method='SLSQP',
692 options={'disp': False},
693 constraints=(weights_sum_to_1,),
694 bounds=bounds)
695 return weights.x
696
697 def equal_risk_contributions(cov):
698 """
699 Returns the weights of the portfolio that equalizes the contributions
700 of the constituents based on the given covariance matrix
701 """
702 n = cov.shape[0]
703 return target_risk_contributions(target_risk=np.repeat(1/n,n), cov=cov)
704
705 def weight_erc(r, cov_estimator=sample_cov, **kwargs):
706 """
707 Produces the weights of the ERC portfolio given a covariance matrix of the returns
708 """
709 est_cov = cov_estimator(r, **kwargs)
710 return equal_risk_contributions(est_cov)