ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_118.py
(12094B)
1 llimport pandas as pd
2 import numpy as np
3
4 def get_ffme_returns():
5 """
6 Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
7 """
8 me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
9 header=0, index_col=0, na_values=-99.99)
10 rets = me_m[['Lo 10', 'Hi 10']]
11 rets.columns = ['SmallCap', 'LargeCap']
12 rets = rets/100
13 rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
14 return rets
15
16
17 def get_hfi_returns():
18 """
19 Load and format the EDHEC Hedge Fund Index Returns
20 """
21 hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
22 header=0, index_col=0, parse_dates=True)
23 hfi = hfi/100
24 hfi.index = hfi.index.to_period('M')
25 return hfi
26
27 def get_ind_file(filetype):
28 """
29 Load and format the Ken French 30 Industry Portfolios files
30 """
31 known_types = ["returns", "nfirms", "size"]
32 if filetype not in known_types:
33 raise ValueError(f"filetype must be one of:{','.join(known_types)}")
34 if filetype is "returns":
35 name = "vw_rets"
36 divisor = 100
37 elif filetype is "nfirms":
38 name = "nfirms"
39 divisor = 1
40 elif filetype is "size":
41 name = "size"
42 divisor = 1
43
44 ind = pd.read_csv(f"data/ind30_m_{name}.csv", header=0, index_col=0)/divisor
45 ind.index = pd.to_datetime(ind.index, format="%Y%m").to_period('M')
46 ind.columns = ind.columns.str.strip()
47 return ind
48
49 def get_ind_returns():
50 """
51 Load and format the Ken French 30 Industry Portfolios Value Weighted Monthly Returns
52 """
53 return get_ind_file("returns")
54
55 def get_ind_nfirms():
56 """
57 Load and format the Ken French 30 Industry Portfolios Average number of Firms
58 """
59 return get_ind_file("nfirms")
60
61 def get_ind_size():
62 """
63 Load and format the Ken French 30 Industry Portfolios Average size (market cap)
64 """
65 return get_ind_file("size")
66
67
68 def get_total_market_index_returns():
69 """
70 Load the 30 industry portfolio data and derive the returns of a capweighted total market index
71 """
72 ind_nfirms = get_ind_nfirms()
73 ind_size = get_ind_size()
74 ind_return = get_ind_returns()
75 ind_mktcap = ind_nfirms * ind_size
76 total_mktcap = ind_mktcap.sum(axis=1)
77 ind_capweight = ind_mktcap.divide(total_mktcap, axis="rows")
78 total_market_return = (ind_capweight * ind_return).sum(axis="columns")
79 return total_market_return
80
81 def skewness(r):
82 """
83 Alternative to scipy.stats.skew()
84 Computes the skewness of the supplied Series or DataFrame
85 Returns a float or a Series
86 """
87 demeaned_r = r - r.mean()
88 # use the population standard deviation, so set dof=0
89 sigma_r = r.std(ddof=0)
90 exp = (demeaned_r**3).mean()
91 return exp/sigma_r**3
92
93
94 def kurtosis(r):
95 """
96 Alternative to scipy.stats.kurtosis()
97 Computes the kurtosis of the supplied Series or DataFrame
98 Returns a float or a Series
99 """
100 demeaned_r = r - r.mean()
101 # use the population standard deviation, so set dof=0
102 sigma_r = r.std(ddof=0)
103 exp = (demeaned_r**4).mean()
104 return exp/sigma_r**4
105
106
107 def annualize_rets(r, periods_per_year):
108 """
109 Annualizes a set of returns
110 We should infer the periods per year
111 but that is currently left as an exercise
112 to the reader :-)
113 """
114 compounded_growth = (1+r).prod()
115 n_periods = r.shape[0]
116 return compounded_growth**(periods_per_year/n_periods)-1
117
118
119 def annualize_vol(r, periods_per_year):
120 """
121 Annualizes the vol of a set of returns
122 We should infer the periods per year
123 but that is currently left as an exercise
124 to the reader :-)
125 """
126 return r.std()*(periods_per_year**0.5)
127
128
129 def sharpe_ratio(r, riskfree_rate, periods_per_year):
130 """
131 Computes the annualized sharpe ratio of a set of returns
132 """
133 # convert the annual riskfree rate to per period
134 rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
135 excess_ret = r - rf_per_period
136 ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
137 ann_vol = annualize_vol(r, periods_per_year)
138 return ann_ex_ret/ann_vol
139
140
141 import scipy.stats
142 def is_normal(r, level=0.01):
143 """
144 Applies the Jarque-Bera test to determine if a Series is normal or not
145 Test is applied at the 1% level by default
146 Returns True if the hypothesis of normality is accepted, False otherwise
147 """
148 if isinstance(r, pd.DataFrame):
149 return r.aggregate(is_normal)
150 else:
151 statistic, p_value = scipy.stats.jarque_bera(r)
152 return p_value > level
153
154
155 def drawdown(return_series: pd.Series):
156 """Takes a time series of asset returns.
157 returns a DataFrame with columns for
158 the wealth index,
159 the previous peaks, and
160 the percentage drawdown
161 """
162 wealth_index = 1000*(1+return_series).cumprod()
163 previous_peaks = wealth_index.cummax()
164 drawdowns = (wealth_index - previous_peaks)/previous_peaks
165 return pd.DataFrame({"Wealth": wealth_index,
166 "Previous Peak": previous_peaks,
167 "Drawdown": drawdowns})
168
169
170 def semideviation(r):
171 """
172 Returns the semideviation aka negative semideviation of r
173 r must be a Series or a DataFrame, else raises a TypeError
174 """
175 if isinstance(r, pd.Series):
176 is_negative = r < 0
177 return r[is_negative].std(ddof=0)
178 elif isinstance(r, pd.DataFrame):
179 return r.aggregate(semideviation)
180 else:
181 raise TypeError("Expected r to be a Series or DataFrame")
182
183
184 def var_historic(r, level=5):
185 """
186 Returns the historic Value at Risk at a specified level
187 i.e. returns the number such that "level" percent of the returns
188 fall below that number, and the (100-level) percent are above
189 """
190 if isinstance(r, pd.DataFrame):
191 return r.aggregate(var_historic, level=level)
192 elif isinstance(r, pd.Series):
193 return -np.percentile(r, level)
194 else:
195 raise TypeError("Expected r to be a Series or DataFrame")
196
197
198 def cvar_historic(r, level=5):
199 """
200 Computes the Conditional VaR of Series or DataFrame
201 """
202 if isinstance(r, pd.Series):
203 is_beyond = r <= -var_historic(r, level=level)
204 return -r[is_beyond].mean()
205 elif isinstance(r, pd.DataFrame):
206 return r.aggregate(cvar_historic, level=level)
207 else:
208 raise TypeError("Expected r to be a Series or DataFrame")
209
210
211 from scipy.stats import norm
212 def var_gaussian(r, level=5, modified=False):
213 """
214 Returns the Parametric Gauusian VaR of a Series or DataFrame
215 If "modified" is True, then the modified VaR is returned,
216 using the Cornish-Fisher modification
217 """
218 # compute the Z score assuming it was Gaussian
219 z = norm.ppf(level/100)
220 if modified:
221 # modify the Z score based on observed skewness and kurtosis
222 s = skewness(r)
223 k = kurtosis(r)
224 z = (z +
225 (z**2 - 1)*s/6 +
226 (z**3 -3*z)*(k-3)/24 -
227 (2*z**3 - 5*z)*(s**2)/36
228 )
229 return -(r.mean() + z*r.std(ddof=0))
230
231
232 def portfolio_return(weights, returns):
233 """
234 Computes the return on a portfolio from constituent returns and weights
235 weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
236 """
237 return weights.T @ returns
238
239
240 def portfolio_vol(weights, covmat):
241 """
242 Computes the vol of a portfolio from a covariance matrix and constituent weights
243 weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
244 """
245 return (weights.T @ covmat @ weights)**0.5
246
247
248 def plot_ef2(n_points, er, cov):
249 """
250 Plots the 2-asset efficient frontier
251 """
252 if er.shape[0] != 2 or er.shape[0] != 2:
253 raise ValueError("plot_ef2 can only plot 2-asset frontiers")
254 weights = [np.array([w, 1-w]) for w in np.linspace(0, 1, n_points)]
255 rets = [portfolio_return(w, er) for w in weights]
256 vols = [portfolio_vol(w, cov) for w in weights]
257 ef = pd.DataFrame({
258 "Returns": rets,
259 "Volatility": vols
260 })
261 return ef.plot.line(x="Volatility", y="Returns", style=".-")
262
263
264 from scipy.optimize import minimize
265
266 def minimize_vol(target_return, er, cov):
267 """
268 Returns the optimal weights that achieve the target return
269 given a set of expected returns and a covariance matrix
270 """
271 n = er.shape[0]
272 init_guess = np.repeat(1/n, n)
273 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
274 # construct the constraints
275 weights_sum_to_1 = {'type': 'eq',
276 'fun': lambda weights: np.sum(weights) - 1
277 }
278 return_is_target = {'type': 'eq',
279 'args': (er,),
280 'fun': lambda weights, er: target_return - portfolio_return(weights,er)
281 }
282 weights = minimize(portfolio_vol, init_guess,
283 args=(cov,), method='SLSQP',
284 options={'disp': False},
285 constraints=(weights_sum_to_1,return_is_target),
286 bounds=bounds)
287 return weights.x
288
289
290 def msr(riskfree_rate, er, cov):
291 """
292 Returns the weights of the portfolio that gives you the maximum sharpe ratio
293 given the riskfree rate and expected returns and a covariance matrix
294 """
295 n = er.shape[0]
296 init_guess = np.repeat(1/n, n)
297 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
298 # construct the constraints
299 weights_sum_to_1 = {'type': 'eq',
300 'fun': lambda weights: np.sum(weights) - 1
301 }
302 def neg_sharpe(weights, riskfree_rate, er, cov):
303 """
304 Returns the negative of the sharpe ratio
305 of the given portfolio
306 """
307 r = portfolio_return(weights, er)
308 vol = portfolio_vol(weights, cov)
309 return -(r - riskfree_rate)/vol
310
311 weights = minimize(neg_sharpe, init_guess,
312 args=(riskfree_rate, er, cov), method='SLSQP',
313 options={'disp': False},
314 constraints=(weights_sum_to_1,),
315 bounds=bounds)
316 return weights.x
317
318
319 def gmv(cov):
320 """
321 Returns the weights of the Global Minimum Volatility portfolio
322 given a covariance matrix
323 """
324 n = cov.shape[0]
325 return msr(0, np.repeat(1, n), cov)
326
327
328 def optimal_weights(n_points, er, cov):
329 """
330 Returns a list of weights that represent a grid of n_points on the efficient frontier
331 """
332 target_rs = np.linspace(er.min(), er.max(), n_points)
333 weights = [minimize_vol(target_return, er, cov) for target_return in target_rs]
334 return weights
335
336
337 def plot_ef(n_points, er, cov, style='.-', legend=False, show_cml=False, riskfree_rate=0, show_ew=False, show_gmv=False):
338 """
339 Plots the multi-asset efficient frontier
340 """
341 weights = optimal_weights(n_points, er, cov)
342 rets = [portfolio_return(w, er) for w in weights]
343 vols = [portfolio_vol(w, cov) for w in weights]
344 ef = pd.DataFrame({
345 "Returns": rets,
346 "Volatility": vols
347 })
348 ax = ef.plot.line(x="Volatility", y="Returns", style=style, legend=legend)
349 if show_cml:
350 ax.set_xlim(left = 0)
351 # get MSR
352 w_msr = msr(riskfree_rate, er, cov)
353 r_msr = portfolio_return(w_msr, er)
354 vol_msr = portfolio_vol(w_msr, cov)
355 # add CML
356 cml_x = [0, vol_msr]
357 cml_y = [riskfree_rate, r_msr]
358 ax.plot(cml_x, cml_y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=10)
359 if show_ew:
360 n = er.shape[0]
361 w_ew = np.repeat(1/n, n)
362 r_ew = portfolio_return(w_ew, er)
363 vol_ew = portfolio_vol(w_ew, cov)
364 # add EW
365 ax.plot([vol_ew], [r_ew], color='goldenrod', marker='o', markersize=10)
366 if show_gmv:
367 w_gmv = gmv(cov)
368 r_gmv = portfolio_return(w_gmv, er)
369 vol_gmv = portfolio_vol(w_gmv, cov)
370 # add EW
371 ax.plot([vol_gmv], [r_gmv], color='midnightblue', marker='o', markersize=10)
372
373 return ax