ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_106.py
(4208B)
1 import pandas as pd
2 import numpy as np
3
4 def drawdown(return_series: pd.Series):
5 """Takes a time series of asset returns.
6 returns a DataFrame with columns for
7 the wealth index,
8 the previous peaks, and
9 the percentage drawdown
10 """
11 wealth_index = 1000*(1+return_series).cumprod()
12 previous_peaks = wealth_index.cummax()
13 drawdowns = (wealth_index - previous_peaks)/previous_peaks
14 return pd.DataFrame({"Wealth": wealth_index,
15 "Previous Peak": previous_peaks,
16 "Drawdown": drawdowns})
17
18
19 def get_ffme_returns():
20 """
21 Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
22 """
23 me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
24 header=0, index_col=0, na_values=-99.99)
25 rets = me_m[['Lo 10', 'Hi 10']]
26 rets.columns = ['SmallCap', 'LargeCap']
27 rets = rets/100
28 rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
29 return rets
30
31
32 def get_hfi_returns():
33 """
34 Load and format the EDHEC Hedge Fund Index Returns
35 """
36 hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
37 header=0, index_col=0, parse_dates=True)
38 hfi = hfi/100
39 hfi.index = hfi.index.to_period('M')
40 return hfi
41
42
43 def skewness(r):
44 """
45 Alternative to scipy.stats.skew()
46 Computes the skewness of the supplied Series or DataFrame
47 Returns a float or a Series
48 """
49 demeaned_r = r - r.mean()
50 # use the population standard deviation, so set dof=0
51 sigma_r = r.std(ddof=0)
52 exp = (demeaned_r**3).mean()
53 return exp/sigma_r**3
54
55
56 def kurtosis(r):
57 """
58 Alternative to scipy.stats.kurtosis()
59 Computes the kurtosis of the supplied Series or DataFrame
60 Returns a float or a Series
61 """
62 demeaned_r = r - r.mean()
63 # use the population standard deviation, so set dof=0
64 sigma_r = r.std(ddof=0)
65 exp = (demeaned_r**4).mean()
66 return exp/sigma_r**4
67
68
69 import scipy.stats
70 def is_normal(r, level=0.01):
71 """
72 Applies the Jarque-Bera test to determine if a Series is normal or not
73 Test is applied at the 1% level by default
74 Returns True if the hypothesis of normality is accepted, False otherwise
75 """
76 if isinstance(r, pd.DataFrame):
77 return r.aggregate(is_normal)
78 else:
79 statistic, p_value = scipy.stats.jarque_bera(r)
80 return p_value > level
81
82
83 def semideviation(r):
84 """
85 Returns the semideviation aka negative semideviation of r
86 r must be a Series or a DataFrame
87 """
88 is_negative = r < 0
89 return r[is_negative].std(ddof=0)
90
91
92 def var_historic(r, level=5):
93 """
94 Returns the historic Value at Risk at a specified level
95 i.e. returns the number such that "level" percent of the returns
96 fall below that number, and the (100-level) percent are above
97 """
98 if isinstance(r, pd.DataFrame):
99 return r.aggregate(var_historic, level=level)
100
101 elif isinstance(r, pd.Series):
102 return -np.percentile(r, level)
103 else:
104 raise TypeError("Expected r to be a Series or DataFrame")
105
106
107 def cvar_historic(r, level=5):
108 """
109 Computes the Conditional VaR of Series or DataFrame
110 """
111 if isinstance(r, pd.Series):
112 is_beyond = r <= -var_historic(r, level=level)
113 return -r[is_beyond].mean()
114 elif isinstance(r, pd.DataFrame):
115 return r.aggregate(cvar_historic, level=level)
116 else:
117 raise TypeError("Expected r to be a Series or DataFrame")
118
119
120 from scipy.stats import norm
121 def var_gaussian(r, level=5, modified=False):
122 """
123 Returns the Parametric Gauusian VaR of a Series or DataFrame
124 If "modified" is True, then the modified VaR is returned,
125 using the Cornish-Fisher modification
126 """
127 # compute the Z score assuming it was Gaussian
128 z = norm.ppf(level/100)
129 if modified:
130 # modify the Z score based on observed skewness and kurtosis
131 s = skewness(r)
132 k = kurtosis(r)
133 z = (z +
134 (z**2 - 1)*s/6 +
135 (z**3 -3*z)*(k-3)/24 -
136 (2*z**3 - 5*z)*(s**2)/36
137 )
138 return -(r.mean() + z*r.std(ddof=0))