ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
edhec_risk_kit_106.py

(4208B)
      1 import pandas as pd
      2 import numpy as np
      3 
      4 def drawdown(return_series: pd.Series):
      5     """Takes a time series of asset returns.
      6        returns a DataFrame with columns for
      7        the wealth index, 
      8        the previous peaks, and 
      9        the percentage drawdown
     10     """
     11     wealth_index = 1000*(1+return_series).cumprod()
     12     previous_peaks = wealth_index.cummax()
     13     drawdowns = (wealth_index - previous_peaks)/previous_peaks
     14     return pd.DataFrame({"Wealth": wealth_index, 
     15                          "Previous Peak": previous_peaks, 
     16                          "Drawdown": drawdowns})
     17 
     18 
     19 def get_ffme_returns():
     20     """
     21     Load the Fama-French Dataset for the returns of the Top and Bottom Deciles by MarketCap
     22     """
     23     me_m = pd.read_csv("data/Portfolios_Formed_on_ME_monthly_EW.csv",
     24                        header=0, index_col=0, na_values=-99.99)
     25     rets = me_m[['Lo 10', 'Hi 10']]
     26     rets.columns = ['SmallCap', 'LargeCap']
     27     rets = rets/100
     28     rets.index = pd.to_datetime(rets.index, format="%Y%m").to_period('M')
     29     return rets
     30 
     31 
     32 def get_hfi_returns():
     33     """
     34     Load and format the EDHEC Hedge Fund Index Returns
     35     """
     36     hfi = pd.read_csv("data/edhec-hedgefundindices.csv",
     37                       header=0, index_col=0, parse_dates=True)
     38     hfi = hfi/100
     39     hfi.index = hfi.index.to_period('M')
     40     return hfi
     41 
     42 
     43 def skewness(r):
     44     """
     45     Alternative to scipy.stats.skew()
     46     Computes the skewness of the supplied Series or DataFrame
     47     Returns a float or a Series
     48     """
     49     demeaned_r = r - r.mean()
     50     # use the population standard deviation, so set dof=0
     51     sigma_r = r.std(ddof=0)
     52     exp = (demeaned_r**3).mean()
     53     return exp/sigma_r**3
     54 
     55 
     56 def kurtosis(r):
     57     """
     58     Alternative to scipy.stats.kurtosis()
     59     Computes the kurtosis of the supplied Series or DataFrame
     60     Returns a float or a Series
     61     """
     62     demeaned_r = r - r.mean()
     63     # use the population standard deviation, so set dof=0
     64     sigma_r = r.std(ddof=0)
     65     exp = (demeaned_r**4).mean()
     66     return exp/sigma_r**4
     67 
     68 
     69 import scipy.stats
     70 def is_normal(r, level=0.01):
     71     """
     72     Applies the Jarque-Bera test to determine if a Series is normal or not
     73     Test is applied at the 1% level by default
     74     Returns True if the hypothesis of normality is accepted, False otherwise
     75     """
     76     if isinstance(r, pd.DataFrame):
     77         return r.aggregate(is_normal)
     78     else:
     79         statistic, p_value = scipy.stats.jarque_bera(r)
     80         return p_value > level
     81 
     82     
     83 def semideviation(r):
     84     """
     85     Returns the semideviation aka negative semideviation of r
     86     r must be a Series or a DataFrame
     87     """
     88     is_negative = r < 0
     89     return r[is_negative].std(ddof=0)
     90 
     91 
     92 def var_historic(r, level=5):
     93     """
     94     Returns the historic Value at Risk at a specified level
     95     i.e. returns the number such that "level" percent of the returns
     96     fall below that number, and the (100-level) percent are above
     97     """
     98     if isinstance(r, pd.DataFrame):
     99         return r.aggregate(var_historic, level=level)
    100     
    101     elif isinstance(r, pd.Series):
    102         return -np.percentile(r, level)
    103     else:
    104         raise TypeError("Expected r to be a Series or DataFrame")
    105 
    106 
    107 def cvar_historic(r, level=5):
    108     """
    109     Computes the Conditional VaR of Series or DataFrame
    110     """
    111     if isinstance(r, pd.Series):
    112         is_beyond = r <= -var_historic(r, level=level)
    113         return -r[is_beyond].mean()
    114     elif isinstance(r, pd.DataFrame):
    115         return r.aggregate(cvar_historic, level=level)
    116     else:
    117         raise TypeError("Expected r to be a Series or DataFrame")
    118 
    119 
    120 from scipy.stats import norm
    121 def var_gaussian(r, level=5, modified=False):
    122     """
    123     Returns the Parametric Gauusian VaR of a Series or DataFrame
    124     If "modified" is True, then the modified VaR is returned,
    125     using the Cornish-Fisher modification
    126     """
    127     # compute the Z score assuming it was Gaussian
    128     z = norm.ppf(level/100)
    129     if modified:
    130         # modify the Z score based on observed skewness and kurtosis
    131         s = skewness(r)
    132         k = kurtosis(r)
    133         z = (z +
    134                 (z**2 - 1)*s/6 +
    135                 (z**3 -3*z)*(k-3)/24 -
    136                 (2*z**3 - 5*z)*(s**2)/36
    137             )
    138     return -(r.mean() + z*r.std(ddof=0))