ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_205.py
(23412B)
1 import pandas as pd
2 import numpy as np
3
4 def get_ffme_returns():
5 """
6 # Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
7 """
8 me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
9 header=0, index_col=0, na_values=-99.99)
10 rets = me_m[['Lo 10', 'Hi 10']]
11 rets.columns = ['SmallCap', 'LargeCap']
12 rets = rets/100
13 rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
14 return rets
15
16 def get_fff_returns():
17 """
18 Load the Fama-French Research Factor Monthly Dataset
19 """
20 rets = pd.read_csv("data/F-F_Research_Data_Factors_m.csv",
21 header=0, index_col=0, na_values=-99.99)/100
22 rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
23 return rets
24
25
26 def get_hfi_returns():
27 """
28 Load and format the EDHEC Hedge Fund Index Returns
29 """
30 hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
31 header=0, index_col=0, parse_dates=True)
32 hfi = hfi/100
33 hfi.index = hfi.index.to_period('M')
34 return hfi
35
36 def get_ind_file(filetype, weighting="vw", n_inds=30):
37 """
38 Load and format the Ken French Industry Portfolios files
39 Variant is a tuple of (weighting, size) where:
40 weighting is one of "ew", "vw"
41 number of inds is 30 or 49
42 """
43 if filetype is "returns":
44 name = f"{weighting}_rets"
45 divisor = 100
46 elif filetype is "nfirms":
47 name = "nfirms"
48 divisor = 1
49 elif filetype is "size":
50 name = "size"
51 divisor = 1
52 else:
53 raise ValueError(f"filetype must be one of: returns, nfirms, size")
54
55 ind = pd.read_csv(f"data/ind{n_inds}_m_{name}.csv", header=0, index_col=0, na_values=-99.99)/divisor
56 ind.index = pd.to_datetime(ind.index, format="%Y%m").to_period('M')
57 ind.columns = ind.columns.str.strip()
58 return ind
59
60 def get_ind_returns(weighting="vw", n_inds=30):
61 """
62 Load and format the Ken French Industry Portfolios Monthly Returns
63 """
64 return get_ind_file("returns", weighting=weighting, n_inds=n_inds)
65
66 def get_ind_nfirms(n_inds=30):
67 """
68 Load and format the Ken French 30 Industry Portfolios Average number of Firms
69 """
70 return get_ind_file("nfirms", n_inds=n_inds)
71
72 def get_ind_size(n_inds=30):
73 """
74 Load and format the Ken French 30 Industry Portfolios Average size (market cap)
75 """
76 return get_ind_file("size", n_inds=n_inds)
77
78
79 def get_ind_market_caps(n_inds=30, weights=False):
80 """
81 Load the industry portfolio data and derive the market caps
82 """
83 ind_nfirms = get_ind_nfirms(n_inds=n_inds)
84 ind_size = get_ind_size(n_inds=n_inds)
85 ind_mktcap = ind_nfirms * ind_size
86 if weights:
87 total_mktcap = ind_mktcap.sum(axis=1)
88 ind_capweight = ind_mktcap.divide(total_mktcap, axis="rows")
89 return ind_capweight
90 #else
91 return ind_mktcap
92
93 def get_total_market_index_returns(n_inds=30):
94 """
95 Load the 30 industry portfolio data and derive the returns of a capweighted total market index
96 """
97 ind_capweight = get_ind_market_caps(n_inds=n_inds)
98 ind_return = get_ind_returns(weighting="vw", n_inds=n_inds)
99 total_market_return = (ind_capweight * ind_return).sum(axis="columns")
100 return total_market_return
101
102 def skewness(r):
103 """
104 Alternative to scipy.stats.skew()
105 Computes the skewness of the supplied Series or DataFrame
106 Returns a float or a Series
107 """
108 demeaned_r = r - r.mean()
109 # use the population standard deviation, so set dof=0
110 sigma_r = r.std(ddof=0)
111 exp = (demeaned_r**3).mean()
112 return exp/sigma_r**3
113
114
115 def kurtosis(r):
116 """
117 Alternative to scipy.stats.kurtosis()
118 Computes the kurtosis of the supplied Series or DataFrame
119 Returns a float or a Series
120 """
121 demeaned_r = r - r.mean()
122 # use the population standard deviation, so set dof=0
123 sigma_r = r.std(ddof=0)
124 exp = (demeaned_r**4).mean()
125 return exp/sigma_r**4
126
127
128 def compound(r):
129 """
130 returns the result of compounding the set of returns in r
131 """
132 return np.expm1(np.log1p(r).sum())
133
134
135 def annualize_rets(r, periods_per_year):
136 """
137 Annualizes a set of returns
138 We should infer the periods per year
139 but that is currently left as an exercise
140 to the reader :-)
141 """
142 compounded_growth = (1+r).prod()
143 n_periods = r.shape[0]
144 return compounded_growth**(periods_per_year/n_periods)-1
145
146
147 def annualize_vol(r, periods_per_year):
148 """
149 Annualizes the vol of a set of returns
150 We should infer the periods per year
151 but that is currently left as an exercise
152 to the reader :-)
153 """
154 return r.std()*(periods_per_year**0.5)
155
156
157 def sharpe_ratio(r, riskfree_rate, periods_per_year):
158 """
159 Computes the annualized sharpe ratio of a set of returns
160 """
161 # convert the annual riskfree rate to per period
162 rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
163 excess_ret = r - rf_per_period
164 ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
165 ann_vol = annualize_vol(r, periods_per_year)
166 return ann_ex_ret/ann_vol
167
168
169 import scipy.stats
170 def is_normal(r, level=0.01):
171 """
172 Applies the Jarque-Bera test to determine if a Series is normal or not
173 Test is applied at the 1% level by default
174 Returns True if the hypothesis of normality is accepted, False otherwise
175 """
176 if isinstance(r, pd.DataFrame):
177 return r.aggregate(is_normal)
178 else:
179 statistic, p_value = scipy.stats.jarque_bera(r)
180 return p_value > level
181
182
183 def drawdown(return_series: pd.Series):
184 """Takes a time series of asset returns.
185 returns a DataFrame with columns for
186 the wealth index,
187 the previous peaks, and
188 the percentage drawdown
189 """
190 wealth_index = 1000*(1+return_series).cumprod()
191 previous_peaks = wealth_index.cummax()
192 drawdowns = (wealth_index - previous_peaks)/previous_peaks
193 return pd.DataFrame({"Wealth": wealth_index,
194 "Previous Peak": previous_peaks,
195 "Drawdown": drawdowns})
196
197
198 def semideviation(r):
199 """
200 Returns the semideviation aka negative semideviation of r
201 r must be a Series or a DataFrame, else raises a TypeError
202 """
203 if isinstance(r, pd.Series):
204 is_negative = r < 0
205 return r[is_negative].std(ddof=0)
206 elif isinstance(r, pd.DataFrame):
207 return r.aggregate(semideviation)
208 else:
209 raise TypeError("Expected r to be a Series or DataFrame")
210
211
212 def var_historic(r, level=5):
213 """
214 Returns the historic Value at Risk at a specified level
215 i.e. returns the number such that "level" percent of the returns
216 fall below that number, and the (100-level) percent are above
217 """
218 if isinstance(r, pd.DataFrame):
219 return r.aggregate(var_historic, level=level)
220 elif isinstance(r, pd.Series):
221 return -np.percentile(r, level)
222 else:
223 raise TypeError("Expected r to be a Series or DataFrame")
224
225
226 def cvar_historic(r, level=5):
227 """
228 Computes the Conditional VaR of Series or DataFrame
229 """
230 if isinstance(r, pd.Series):
231 is_beyond = r <= -var_historic(r, level=level)
232 return -r[is_beyond].mean()
233 elif isinstance(r, pd.DataFrame):
234 return r.aggregate(cvar_historic, level=level)
235 else:
236 raise TypeError("Expected r to be a Series or DataFrame")
237
238
239 from scipy.stats import norm
240 def var_gaussian(r, level=5, modified=False):
241 """
242 Returns the Parametric Gauusian VaR of a Series or DataFrame
243 If "modified" is True, then the modified VaR is returned,
244 using the Cornish-Fisher modification
245 """
246 # compute the Z score assuming it was Gaussian
247 z = norm.ppf(level/100)
248 if modified:
249 # modify the Z score based on observed skewness and kurtosis
250 s = skewness(r)
251 k = kurtosis(r)
252 z = (z +
253 (z**2 - 1)*s/6 +
254 (z**3 -3*z)*(k-3)/24 -
255 (2*z**3 - 5*z)*(s**2)/36
256 )
257 return -(r.mean() + z*r.std(ddof=0))
258
259
260 def portfolio_return(weights, returns):
261 """
262 Computes the return on a portfolio from constituent returns and weights
263 weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
264 """
265 return weights.T @ returns
266
267
268 def portfolio_vol(weights, covmat):
269 """
270 Computes the vol of a portfolio from a covariance matrix and constituent weights
271 weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
272 """
273 vol = (weights.T @ covmat @ weights)**0.5
274 return vol
275
276
277 def plot_ef2(n_points, er, cov):
278 """
279 Plots the 2-asset efficient frontier
280 """
281 if er.shape[0] != 2 or er.shape[0] != 2:
282 raise ValueError("plot_ef2 can only plot 2-asset frontiers")
283 weights = [np.array([w, 1-w]) for w in np.linspace(0, 1, n_points)]
284 rets = [portfolio_return(w, er) for w in weights]
285 vols = [portfolio_vol(w, cov) for w in weights]
286 ef = pd.DataFrame({
287 "Returns": rets,
288 "Volatility": vols
289 })
290 return ef.plot.line(x="Volatility", y="Returns", style=".-")
291
292
293 from scipy.optimize import minimize
294
295 def minimize_vol(target_return, er, cov):
296 """
297 Returns the optimal weights that achieve the target return
298 given a set of expected returns and a covariance matrix
299 """
300 n = er.shape[0]
301 init_guess = np.repeat(1/n, n)
302 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
303 # construct the constraints
304 weights_sum_to_1 = {'type': 'eq',
305 'fun': lambda weights: np.sum(weights) - 1
306 }
307 return_is_target = {'type': 'eq',
308 'args': (er,),
309 'fun': lambda weights, er: target_return - portfolio_return(weights,er)
310 }
311 weights = minimize(portfolio_vol, init_guess,
312 args=(cov,), method='SLSQP',
313 options={'disp': False},
314 constraints=(weights_sum_to_1,return_is_target),
315 bounds=bounds)
316 return weights.x
317
318
319 def tracking_error(r_a, r_b):
320 """
321 Returns the Tracking Error between the two return series
322 """
323 return np.sqrt(((r_a - r_b)**2).sum())
324
325
326 def msr(riskfree_rate, er, cov):
327 """
328 Returns the weights of the portfolio that gives you the maximum sharpe ratio
329 given the riskfree rate and expected returns and a covariance matrix
330 """
331 n = er.shape[0]
332 init_guess = np.repeat(1/n, n)
333 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
334 # construct the constraints
335 weights_sum_to_1 = {'type': 'eq',
336 'fun': lambda weights: np.sum(weights) - 1
337 }
338 def neg_sharpe(weights, riskfree_rate, er, cov):
339 """
340 Returns the negative of the sharpe ratio
341 of the given portfolio
342 """
343 r = portfolio_return(weights, er)
344 vol = portfolio_vol(weights, cov)
345 return -(r - riskfree_rate)/vol
346
347 weights = minimize(neg_sharpe, init_guess,
348 args=(riskfree_rate, er, cov), method='SLSQP',
349 options={'disp': False},
350 constraints=(weights_sum_to_1,),
351 bounds=bounds)
352 return weights.x
353
354
355 def gmv(cov):
356 """
357 Returns the weights of the Global Minimum Volatility portfolio
358 given a covariance matrix
359 """
360 n = cov.shape[0]
361 return msr(0, np.repeat(1, n), cov)
362
363
364 def optimal_weights(n_points, er, cov):
365 """
366 Returns a list of weights that represent a grid of n_points on the efficient frontier
367 """
368 target_rs = np.linspace(er.min(), er.max(), n_points)
369 weights = [minimize_vol(target_return, er, cov) for target_return in target_rs]
370 return weights
371
372
373 def plot_ef(n_points, er, cov, style='.-', legend=False, show_cml=False, riskfree_rate=0, show_ew=False, show_gmv=False):
374 """
375 Plots the multi-asset efficient frontier
376 """
377 weights = optimal_weights(n_points, er, cov)
378 rets = [portfolio_return(w, er) for w in weights]
379 vols = [portfolio_vol(w, cov) for w in weights]
380 ef = pd.DataFrame({
381 "Returns": rets,
382 "Volatility": vols
383 })
384 ax = ef.plot.line(x="Volatility", y="Returns", style=style, legend=legend)
385 if show_cml:
386 ax.set_xlim(left = 0)
387 # get MSR
388 w_msr = msr(riskfree_rate, er, cov)
389 r_msr = portfolio_return(w_msr, er)
390 vol_msr = portfolio_vol(w_msr, cov)
391 # add CML
392 cml_x = [0, vol_msr]
393 cml_y = [riskfree_rate, r_msr]
394 ax.plot(cml_x, cml_y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=10)
395 if show_ew:
396 n = er.shape[0]
397 w_ew = np.repeat(1/n, n)
398 r_ew = portfolio_return(w_ew, er)
399 vol_ew = portfolio_vol(w_ew, cov)
400 # add EW
401 ax.plot([vol_ew], [r_ew], color='goldenrod', marker='o', markersize=10)
402 if show_gmv:
403 w_gmv = gmv(cov)
404 r_gmv = portfolio_return(w_gmv, er)
405 vol_gmv = portfolio_vol(w_gmv, cov)
406 # add EW
407 ax.plot([vol_gmv], [r_gmv], color='midnightblue', marker='o', markersize=10)
408
409 return ax
410
411
412 def run_cppi(risky_r, safe_r=None, m=3, start=1000, floor=0.8, riskfree_rate=0.03, drawdown=None):
413 """
414 Run a backtest of the CPPI strategy, given a set of returns for the risky asset
415 Returns a dictionary containing: Asset Value History, Risk Budget History, Risky Weight History
416 """
417 # set up the CPPI parameters
418 dates = risky_r.index
419 n_steps = len(dates)
420 account_value = start
421 floor_value = start*floor
422 peak = account_value
423 if isinstance(risky_r, pd.Series):
424 risky_r = pd.DataFrame(risky_r, columns=["R"])
425
426 if safe_r is None:
427 safe_r = pd.DataFrame().reindex_like(risky_r)
428 safe_r.values[:] = riskfree_rate/12 # fast way to set all values to a number
429 # set up some DataFrames for saving intermediate values
430 account_history = pd.DataFrame().reindex_like(risky_r)
431 risky_w_history = pd.DataFrame().reindex_like(risky_r)
432 cushion_history = pd.DataFrame().reindex_like(risky_r)
433 floorval_history = pd.DataFrame().reindex_like(risky_r)
434 peak_history = pd.DataFrame().reindex_like(risky_r)
435
436 for step in range(n_steps):
437 if drawdown is not None:
438 peak = np.maximum(peak, account_value)
439 floor_value = peak*(1-drawdown)
440 cushion = (account_value - floor_value)/account_value
441 risky_w = m*cushion
442 risky_w = np.minimum(risky_w, 1)
443 risky_w = np.maximum(risky_w, 0)
444 safe_w = 1-risky_w
445 risky_alloc = account_value*risky_w
446 safe_alloc = account_value*safe_w
447 # recompute the new account value at the end of this step
448 account_value = risky_alloc*(1+risky_r.iloc[step]) + safe_alloc*(1+safe_r.iloc[step])
449 # save the histories for analysis and plotting
450 cushion_history.iloc[step] = cushion
451 risky_w_history.iloc[step] = risky_w
452 account_history.iloc[step] = account_value
453 floorval_history.iloc[step] = floor_value
454 peak_history.iloc[step] = peak
455 risky_wealth = start*(1+risky_r).cumprod()
456 backtest_result = {
457 "Wealth": account_history,
458 "Risky Wealth": risky_wealth,
459 "Risk Budget": cushion_history,
460 "Risky Allocation": risky_w_history,
461 "m": m,
462 "start": start,
463 "floor": floor,
464 "risky_r":risky_r,
465 "safe_r": safe_r,
466 "drawdown": drawdown,
467 "peak": peak_history,
468 "floor": floorval_history
469 }
470 return backtest_result
471
472
473 def summary_stats(r, riskfree_rate=0.03):
474 """
475 Return a DataFrame that contains aggregated summary stats for the returns in the columns of r
476 """
477 ann_r = r.aggregate(annualize_rets, periods_per_year=12)
478 ann_vol = r.aggregate(annualize_vol, periods_per_year=12)
479 ann_sr = r.aggregate(sharpe_ratio, riskfree_rate=riskfree_rate, periods_per_year=12)
480 dd = r.aggregate(lambda r: drawdown(r).Drawdown.min())
481 skew = r.aggregate(skewness)
482 kurt = r.aggregate(kurtosis)
483 cf_var5 = r.aggregate(var_gaussian, modified=True)
484 hist_cvar5 = r.aggregate(cvar_historic)
485 return pd.DataFrame({
486 "Annualized Return": ann_r,
487 "Annualized Vol": ann_vol,
488 "Skewness": skew,
489 "Kurtosis": kurt,
490 "Cornish-Fisher VaR (5%)": cf_var5,
491 "Historic CVaR (5%)": hist_cvar5,
492 "Sharpe Ratio": ann_sr,
493 "Max Drawdown": dd
494 })
495
496
497 def gbm(n_years = 10, n_scenarios=1000, mu=0.07, sigma=0.15, steps_per_year=12, s_0=100.0, prices=True):
498 """
499 Evolution of Geometric Brownian Motion trajectories, such as for Stock Prices through Monte Carlo
500 :param n_years: The number of years to generate data for
501 :param n_paths: The number of scenarios/trajectories
502 :param mu: Annualized Drift, e.g. Market Return
503 :param sigma: Annualized Volatility
504 :param steps_per_year: granularity of the simulation
505 :param s_0: initial value
506 :return: a numpy array of n_paths columns and n_years*steps_per_year rows
507 """
508 # Derive per-step Model Parameters from User Specifications
509 dt = 1/steps_per_year
510 n_steps = int(n_years*steps_per_year) + 1
511 # the standard way ...
512 # rets_plus_1 = np.random.normal(loc=mu*dt+1, scale=sigma*np.sqrt(dt), size=(n_steps, n_scenarios))
513 # without discretization error ...
514 rets_plus_1 = np.random.normal(loc=(1+mu)**dt, scale=(sigma*np.sqrt(dt)), size=(n_steps, n_scenarios))
515 rets_plus_1[0] = 1
516 ret_val = s_0*pd.DataFrame(rets_plus_1).cumprod() if prices else rets_plus_1-1
517 return ret_val
518
519
520 import statsmodels.api as sm
521 def regress(dependent_variable, explanatory_variables, alpha=True):
522 """
523 Runs a linear regression to decompose the dependent variable into the explanatory variables
524 returns an object of type statsmodel's RegressionResults on which you can call
525 .summary() to print a full summary
526 .params for the coefficients
527 .tvalues and .pvalues for the significance levels
528 .rsquared_adj and .rsquared for quality of fit
529 """
530 if alpha:
531 explanatory_variables = explanatory_variables.copy()
532 explanatory_variables["Alpha"] = 1
533
534 lm = sm.OLS(dependent_variable, explanatory_variables).fit()
535 return lm
536
537 def portfolio_tracking_error(weights, ref_r, bb_r):
538 """
539 returns the tracking error between the reference returns
540 and a portfolio of building block returns held with given weights
541 """
542 return tracking_error(ref_r, (weights*bb_r).sum(axis=1))
543
544 def style_analysis(dependent_variable, explanatory_variables):
545 """
546 Returns the optimal weights that minimizes the Tracking error between
547 a portfolio of the explanatory variables and the dependent variable
548 """
549 n = explanatory_variables.shape[1]
550 init_guess = np.repeat(1/n, n)
551 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
552 # construct the constraints
553 weights_sum_to_1 = {'type': 'eq',
554 'fun': lambda weights: np.sum(weights) - 1
555 }
556 solution = minimize(portfolio_tracking_error, init_guess,
557 args=(dependent_variable, explanatory_variables,), method='SLSQP',
558 options={'disp': False},
559 constraints=(weights_sum_to_1,),
560 bounds=bounds)
561 weights = pd.Series(solution.x, index=explanatory_variables.columns)
562 return weights
563
564
565 def ff_analysis(r, factors):
566 """
567 Returns the loadings of r on the Fama French Factors
568 which can be read in using get_fff_returns()
569 the index of r must be a (not necessarily proper) subset of the index of factors
570 r is either a Series or a DataFrame
571 """
572 if isinstance(r, pd.Series):
573 dependent_variable = r
574 explanatory_variables = factors.loc[r.index]
575 tilts = regress(dependent_variable, explanatory_variables).params
576 elif isinstance(r, pd.DataFrame):
577 tilts = pd.DataFrame({col: ff_analysis(r[col], factors) for col in r.columns})
578 else:
579 raise TypeError("r must be a Series or a DataFrame")
580 return tilts
581
582 def weight_ew(r, cap_weights=None, max_cw_mult=None, microcap_threshold=None, **kwargs):
583 """
584 Returns the weights of the EW portfolio based on the asset returns "r" as a DataFrame
585 If supplied a set of capweights and a capweight tether, it is applied and reweighted
586 """
587 n = len(r.columns)
588 ew = pd.Series(1/n, index=r.columns)
589 if cap_weights is not None:
590 cw = cap_weights.loc[r.index[0]] # starting cap weight
591 ## exclude microcaps
592 if microcap_threshold is not None and microcap_threshold > 0:
593 microcap = cw < microcap_threshold
594 ew[microcap] = 0
595 ew = ew/ew.sum()
596 #limit weight to a multiple of capweight
597 if max_cw_mult is not None and max_cw_mult > 0:
598 ew = np.minimum(ew, cw*max_cw_mult)
599 ew = ew/ew.sum() #reweight
600 return ew
601
602 def weight_cw(r, cap_weights, **kwargs):
603 """
604 Returns the weights of the CW portfolio based on the time series of capweights
605 """
606 w = cap_weights.loc[r.index[1]]
607 return w/w.sum()
608
609 def backtest_ws(r, estimation_window=60, weighting=weight_ew, verbose=False, **kwargs):
610 """
611 Backtests a given weighting scheme, given some parameters:
612 r : asset returns to use to build the portfolio
613 estimation_window: the window to use to estimate parameters
614 weighting: the weighting scheme to use, must be a function that takes "r", and a variable number of keyword-value arguments
615 """
616 n_periods = r.shape[0]
617 # return windows
618 windows = [(start, start+estimation_window) for start in range(n_periods-estimation_window)]
619 weights = [weighting(r.iloc[win[0]:win[1]], **kwargs) for win in windows]
620 # convert List of weights to DataFrame
621 weights = pd.DataFrame(weights, index=r.iloc[estimation_window:].index, columns=r.columns)
622 returns = (weights * r).sum(axis="columns", min_count=1) #mincount is to generate NAs if all inputs are NAs
623 return returns
624
625 def sample_cov(r, **kwargs):
626 """
627 Returns the sample covariance of the supplied returns
628 """
629 return r.cov()
630
631 def weight_gmv(r, cov_estimator=sample_cov, **kwargs):
632 """
633 Produces the weights of the GMV portfolio given a covariance matrix of the returns
634 """
635 est_cov = cov_estimator(r, **kwargs)
636 return gmv(est_cov)
637
638 def cc_cov(r, **kwargs):
639 """
640 Estimates a covariance matrix by using the Elton/Gruber Constant Correlation model
641 """
642 rhos = r.corr()
643 n = rhos.shape[0]
644 # this is a symmetric matrix with diagonals all 1 - so the mean correlation is ...
645 rho_bar = (rhos.values.sum()-n)/(n*(n-1))
646 ccor = np.full_like(rhos, rho_bar)
647 np.fill_diagonal(ccor, 1.)
648 sd = r.std()
649 return pd.DataFrame(ccor * np.outer(sd, sd), index=r.columns, columns=r.columns)
650
651 def shrinkage_cov(r, delta=0.5, **kwargs):
652 """
653 Covariance estimator that shrinks between the Sample Covariance and the Constant Correlation Estimators
654 """
655 prior = cc_cov(r, **kwargs)
656 sample = sample_cov(r, **kwargs)
657 return delta*prior + (1-delta)*sample
658