ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
HRP.py
(4269B)
1 # On 20151227 by MLdP <lopezdeprado@lbl.gov>
2 # Hierarchical Risk Parity
3 import matplotlib.pyplot as mpl
4 import scipy.cluster.hierarchy as sch,random,numpy as np,pandas as pd
5 #------------------------------------------------------------------------------
6 def getIVP(cov,**kargs):
7 # Compute the inverse-variance portfolio
8 ivp=1./np.diag(cov)
9 ivp/=ivp.sum()
10 return ivp
11 #------------------------------------------------------------------------------
12 def getClusterVar(cov,cItems):
13 # Compute variance per cluster
14 cov_=cov.loc[cItems,cItems] # matrix slice
15 w_=getIVP(cov_).reshape(-1,1)
16 cVar=np.dot(np.dot(w_.T,cov_),w_)[0,0]
17 return cVar
18 #------------------------------------------------------------------------------
19 def getQuasiDiag(link):
20 # Sort clustered items by distance
21 link=link.astype(int)
22 sortIx=pd.Series([link[-1,0],link[-1,1]])
23 numItems=link[-1,3] # number of original items
24 while sortIx.max()>=numItems:
25 sortIx.index=range(0,sortIx.shape[0]*2,2) # make space
26 df0=sortIx[sortIx>=numItems] # find clusters
27 i=df0.index;j=df0.values-numItems
28 sortIx[i]=link[j,0] # item 1
29 df0=pd.Series(link[j,1],index=i+1)
30 sortIx=sortIx.append(df0) # item 2
31 sortIx=sortIx.sort_index() # re-sort
32 sortIx.index=range(sortIx.shape[0]) # re-index
33 return sortIx.tolist()
34 #------------------------------------------------------------------------------
35 def getRecBipart(cov,sortIx):
36 # Compute HRP alloc
37 w=pd.Series(1,index=sortIx)
38 cItems=[sortIx] # initialize all items in one cluster
39 while len(cItems)>0:
40 cItems=[i[j:k] for i in cItems for j,k in ((0,len(i)/2), \
41 (len(i)/2,len(i))) if len(i)>1] # bi-section
42 for i in xrange(0,len(cItems),2): # parse in pairs
43 cItems0=cItems[i] # cluster 1
44 cItems1=cItems[i+1] # cluster 2
45 cVar0=getClusterVar(cov,cItems0)
46 cVar1=getClusterVar(cov,cItems1)
47 alpha=1-cVar0/(cVar0+cVar1)
48 w[cItems0]*=alpha # weight 1
49 w[cItems1]*=1-alpha # weight 2
50 return w
51 #------------------------------------------------------------------------------
52 def correlDist(corr):
53 # A distance matrix based on correlation, where 0<=d[i,j]<=1
54 # This is a proper distance metric
55 dist=((1-corr)/2.)**.5 # distance matrix
56 return dist
57 #------------------------------------------------------------------------------
58 def plotCorrMatrix(path,corr,labels=None):
59 # Heatmap of the correlation matrix
60 if labels is None:labels=[]
61 mpl.pcolor(corr)
62 mpl.colorbar()
63 mpl.yticks(np.arange(.5,corr.shape[0]+.5),labels)
64 mpl.xticks(np.arange(.5,corr.shape[0]+.5),labels)
65 mpl.savefig(path)
66 mpl.clf();mpl.close() # reset pylab
67 return
68 #------------------------------------------------------------------------------
69 def generateData(nObs,size0,size1,sigma1):
70 # Time series of correlated variables
71 #1) generating some uncorrelated data
72 np.random.seed(seed=12345);random.seed(12345)
73 x=np.random.normal(0,1,size=(nObs,size0)) # each row is a variable
74 #2) creating correlation between the variables
75 cols=[random.randint(0,size0-1) for i in xrange(size1)]
76 y=x[:,cols]+np.random.normal(0,sigma1,size=(nObs,len(cols)))
77 x=np.append(x,y,axis=1)
78 x=pd.DataFrame(x,columns=range(1,x.shape[1]+1))
79 return x,cols
80 #------------------------------------------------------------------------------
81 def main():
82 #1) Generate correlated data
83 nObs,size0,size1,sigma1=10000,5,5,.25
84 x,cols=generateData(nObs,size0,size1,sigma1)
85 print [(j+1,size0+i) for i,j in enumerate(cols,1)]
86 #2) compute and plot correl matrix
87 cov,corr=x.cov(),x.corr()
88 plotCorrMatrix('HRP3_corr0.png',corr,labels=corr.columns)
89 #3) cluster
90 dist=correlDist(corr)
91 link=sch.linkage(dist,'single')
92 sortIx=getQuasiDiag(link)
93 sortIx=corr.index[sortIx].tolist() # recover labels
94 df0=corr.loc[sortIx,sortIx] # reorder
95 plotCorrMatrix('HRP3_corr1.png',df0,labels=df0.columns)
96 #4) Capital allocation
97 hrp=getRecBipart(cov,sortIx)
98 print hrp
99 return
100 #------------------------------------------------------------------------------
101 if __name__=='__main__':main()