ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_110.py
(9949B)
1 import pandas as pd
2 import numpy as np
3
4 def get_ffme_returns():
5 """
6 Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
7 """
8 me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
9 header=0, index_col=0, na_values=-99.99)
10 rets = me_m[['Lo 10', 'Hi 10']]
11 rets.columns = ['SmallCap', 'LargeCap']
12 rets = rets/100
13 rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
14 return rets
15
16
17 def get_hfi_returns():
18 """
19 Load and format the EDHEC Hedge Fund Index Returns
20 """
21 hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
22 header=0, index_col=0, parse_dates=True)
23 hfi = hfi/100
24 hfi.index = hfi.index.to_period('M')
25 return hfi
26
27 def get_ind_returns():
28 """
29 Load and format the Ken French 30 Industry Portfolios Value Weighted Monthly Returns
30 """
31 ind = pd.read_csv("data/ind30_m_vw_rets.csv", header=0, index_col=0)/100
32 ind.index = pd.to_datetime(ind.index, format="%Y%m").to_period('M')
33 ind.columns = ind.columns.str.strip()
34 return ind
35
36
37 def skewness(r):
38 """
39 Alternative to scipy.stats.skew()
40 Computes the skewness of the supplied Series or DataFrame
41 Returns a float or a Series
42 """
43 demeaned_r = r - r.mean()
44 # use the population standard deviation, so set dof=0
45 sigma_r = r.std(ddof=0)
46 exp = (demeaned_r**3).mean()
47 return exp/sigma_r**3
48
49
50 def kurtosis(r):
51 """
52 Alternative to scipy.stats.kurtosis()
53 Computes the kurtosis of the supplied Series or DataFrame
54 Returns a float or a Series
55 """
56 demeaned_r = r - r.mean()
57 # use the population standard deviation, so set dof=0
58 sigma_r = r.std(ddof=0)
59 exp = (demeaned_r**4).mean()
60 return exp/sigma_r**4
61
62
63 def annualize_rets(r, periods_per_year):
64 """
65 Annualizes a set of returns
66 We should infer the periods per year
67 but that is currently left as an exercise
68 to the reader :-)
69 """
70 compounded_growth = (1+r).prod()
71 n_periods = r.shape[0]
72 return compounded_growth**(periods_per_year/n_periods)-1
73
74
75 def annualize_vol(r, periods_per_year):
76 """
77 Annualizes the vol of a set of returns
78 We should infer the periods per year
79 but that is currently left as an exercise
80 to the reader :-)
81 """
82 return r.std()*(periods_per_year**0.5)
83
84
85 def sharpe_ratio(r, riskfree_rate, periods_per_year):
86 """
87 Computes the annualized sharpe ratio of a set of returns
88 """
89 # convert the annual riskfree rate to per period
90 rf_per_period = (1+riskfree_rate)**(1/periods_per_year)-1
91 excess_ret = r - rf_per_period
92 ann_ex_ret = annualize_rets(excess_ret, periods_per_year)
93 ann_vol = annualize_vol(r, periods_per_year)
94 return ann_ex_ret/ann_vol
95
96
97 import scipy.stats
98 def is_normal(r, level=0.01):
99 """
100 Applies the Jarque-Bera test to determine if a Series is normal or not
101 Test is applied at the 1% level by default
102 Returns True if the hypothesis of normality is accepted, False otherwise
103 """
104 if isinstance(r, pd.DataFrame):
105 return r.aggregate(is_normal)
106 else:
107 statistic, p_value = scipy.stats.jarque_bera(r)
108 return p_value > level
109
110
111 def drawdown(return_series: pd.Series):
112 """Takes a time series of asset returns.
113 returns a DataFrame with columns for
114 the wealth index,
115 the previous peaks, and
116 the percentage drawdown
117 """
118 wealth_index = 1000*(1+return_series).cumprod()
119 previous_peaks = wealth_index.cummax()
120 drawdowns = (wealth_index - previous_peaks)/previous_peaks
121 return pd.DataFrame({"Wealth": wealth_index,
122 "Previous Peak": previous_peaks,
123 "Drawdown": drawdowns})
124
125
126 def semideviation(r):
127 """
128 Returns the semideviation aka negative semideviation of r
129 r must be a Series or a DataFrame, else raises a TypeError
130 """
131 if isinstance(r, pd.Series):
132 is_negative = r < 0
133 return r[is_negative].std(ddof=0)
134 elif isinstance(r, pd.DataFrame):
135 return r.aggregate(semideviation)
136 else:
137 raise TypeError("Expected r to be a Series or DataFrame")
138
139
140 def var_historic(r, level=5):
141 """
142 Returns the historic Value at Risk at a specified level
143 i.e. returns the number such that "level" percent of the returns
144 fall below that number, and the (100-level) percent are above
145 """
146 if isinstance(r, pd.DataFrame):
147 return r.aggregate(var_historic, level=level)
148 elif isinstance(r, pd.Series):
149 return -np.percentile(r, level)
150 else:
151 raise TypeError("Expected r to be a Series or DataFrame")
152
153
154 def cvar_historic(r, level=5):
155 """
156 Computes the Conditional VaR of Series or DataFrame
157 """
158 if isinstance(r, pd.Series):
159 is_beyond = r <= var_historic(r, level=level)
160 return -r[is_beyond].mean()
161 elif isinstance(r, pd.DataFrame):
162 return r.aggregate(cvar_historic, level=level)
163 else:
164 raise TypeError("Expected r to be a Series or DataFrame")
165
166
167 from scipy.stats import norm
168 def var_gaussian(r, level=5, modified=False):
169 """
170 Returns the Parametric Gauusian VaR of a Series or DataFrame
171 If "modified" is True, then the modified VaR is returned,
172 using the Cornish-Fisher modification
173 """
174 # compute the Z score assuming it was Gaussian
175 z = norm.ppf(level/100)
176 if modified:
177 # modify the Z score based on observed skewness and kurtosis
178 s = skewness(r)
179 k = kurtosis(r)
180 z = (z +
181 (z**2 - 1)*s/6 +
182 (z**3 -3*z)*(k-3)/24 -
183 (2*z**3 - 5*z)*(s**2)/36
184 )
185 return -(r.mean() + z*r.std(ddof=0))
186
187
188 def portfolio_return(weights, returns):
189 """
190 Computes the return on a portfolio from constituent returns and weights
191 weights are a numpy array or Nx1 matrix and returns are a numpy array or Nx1 matrix
192 """
193 return weights.T @ returns
194
195
196 def portfolio_vol(weights, covmat):
197 """
198 Computes the vol of a portfolio from a covariance matrix and constituent weights
199 weights are a numpy array or N x 1 maxtrix and covmat is an N x N matrix
200 """
201 return (weights.T @ covmat @ weights)**0.5
202
203
204 def plot_ef2(n_points, er, cov):
205 """
206 Plots the 2-asset efficient frontier
207 """
208 if er.shape[0] != 2 or er.shape[0] != 2:
209 raise ValueError("plot_ef2 can only plot 2-asset frontiers")
210 weights = [np.array([w, 1-w]) for w in np.linspace(0, 1, n_points)]
211 rets = [portfolio_return(w, er) for w in weights]
212 vols = [portfolio_vol(w, cov) for w in weights]
213 ef = pd.DataFrame({
214 "Returns": rets,
215 "Volatility": vols
216 })
217 return ef.plot.line(x="Volatility", y="Returns", style=".-")
218
219
220 from scipy.optimize import minimize
221
222 def minimize_vol(target_return, er, cov):
223 """
224 Returns the optimal weights that achieve the target return
225 given a set of expected returns and a covariance matrix
226 """
227 n = er.shape[0]
228 init_guess = np.repeat(1/n, n)
229 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
230 # construct the constraints
231 weights_sum_to_1 = {'type': 'eq',
232 'fun': lambda weights: np.sum(weights) - 1
233 }
234 return_is_target = {'type': 'eq',
235 'args': (er,),
236 'fun': lambda weights, er: target_return - portfolio_return(weights,er)
237 }
238 weights = minimize(portfolio_vol, init_guess,
239 args=(cov,), method='SLSQP',
240 options={'disp': False},
241 constraints=(weights_sum_to_1,return_is_target),
242 bounds=bounds)
243 return weights.x
244
245
246 def msr(riskfree_rate, er, cov):
247 """
248 Returns the weights of the portfolio that gives you the maximum sharpe ratio
249 given the riskfree rate and expected returns and a covariance matrix
250 """
251 n = er.shape[0]
252 init_guess = np.repeat(1/n, n)
253 bounds = ((0.0, 1.0),) * n # an N-tuple of 2-tuples!
254 # construct the constraints
255 weights_sum_to_1 = {'type': 'eq',
256 'fun': lambda weights: np.sum(weights) - 1
257 }
258 def neg_sharpe(weights, riskfree_rate, er, cov):
259 """
260 Returns the negative of the sharpe ratio
261 of the given portfolio
262 """
263 r = portfolio_return(weights, er)
264 vol = portfolio_vol(weights, cov)
265 return -(r - riskfree_rate)/vol
266
267 weights = minimize(neg_sharpe, init_guess,
268 args=(riskfree_rate, er, cov), method='SLSQP',
269 options={'disp': False},
270 constraints=(weights_sum_to_1,),
271 bounds=bounds)
272 return weights.x
273
274
275 def optimal_weights(n_points, er, cov):
276 """
277 Returns a list of weights that represent a grid of n_points on the efficient frontier
278 """
279 target_rs = np.linspace(er.min(), er.max(), n_points)
280 weights = [minimize_vol(target_return, er, cov) for target_return in target_rs]
281 return weights
282
283
284 def plot_ef(n_points, er, cov, style='.-', legend=False, show_cml=False, riskfree_rate=0):
285 """
286 Plots the multi-asset efficient frontier
287 """
288 weights = optimal_weights(n_points, er, cov)
289 rets = [portfolio_return(w, er) for w in weights]
290 vols = [portfolio_vol(w, cov) for w in weights]
291 ef = pd.DataFrame({
292 "Returns": rets,
293 "Volatility": vols
294 })
295 ax = ef.plot.line(x="Volatility", y="Returns", style=style, legend=legend)
296 if show_cml:
297 ax.set_xlim(left = 0)
298 # get MSR
299 w_msr = msr(riskfree_rate, er, cov)
300 r_msr = portfolio_return(w_msr, er)
301 vol_msr = portfolio_vol(w_msr, cov)
302 # add CML
303 cml_x = [0, vol_msr]
304 cml_y = [riskfree_rate, r_msr]
305 ax.plot(cml_x, cml_y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=12)
306 return ax