ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

HRP.py

(4269B)


      1 # On 20151227 by MLdP <lopezdeprado@lbl.gov>
      2 # Hierarchical Risk Parity
      3 import matplotlib.pyplot as mpl
      4 import scipy.cluster.hierarchy as sch,random,numpy as np,pandas as pd
      5 #------------------------------------------------------------------------------
      6 def getIVP(cov,**kargs):
      7     # Compute the inverse-variance portfolio
      8     ivp=1./np.diag(cov)
      9     ivp/=ivp.sum()
     10     return ivp
     11 #------------------------------------------------------------------------------
     12 def getClusterVar(cov,cItems):
     13     # Compute variance per cluster
     14     cov_=cov.loc[cItems,cItems] # matrix slice
     15     w_=getIVP(cov_).reshape(-1,1)
     16     cVar=np.dot(np.dot(w_.T,cov_),w_)[0,0]
     17     return cVar
     18 #------------------------------------------------------------------------------
     19 def getQuasiDiag(link):
     20     # Sort clustered items by distance
     21     link=link.astype(int)
     22     sortIx=pd.Series([link[-1,0],link[-1,1]])
     23     numItems=link[-1,3] # number of original items
     24     while sortIx.max()>=numItems:
     25         sortIx.index=range(0,sortIx.shape[0]*2,2) # make space
     26         df0=sortIx[sortIx>=numItems] # find clusters
     27         i=df0.index;j=df0.values-numItems
     28         sortIx[i]=link[j,0] # item 1
     29         df0=pd.Series(link[j,1],index=i+1)
     30         sortIx=sortIx.append(df0) # item 2
     31         sortIx=sortIx.sort_index() # re-sort
     32         sortIx.index=range(sortIx.shape[0]) # re-index
     33     return sortIx.tolist()
     34 #------------------------------------------------------------------------------
     35 def getRecBipart(cov,sortIx):
     36     # Compute HRP alloc
     37     w=pd.Series(1,index=sortIx)
     38     cItems=[sortIx] # initialize all items in one cluster
     39     while len(cItems)>0:
     40         cItems=[i[j:k] for i in cItems for j,k in ((0,len(i)/2), \
     41             (len(i)/2,len(i))) if len(i)>1] # bi-section
     42         for i in xrange(0,len(cItems),2): # parse in pairs
     43             cItems0=cItems[i] # cluster 1
     44             cItems1=cItems[i+1] # cluster 2
     45             cVar0=getClusterVar(cov,cItems0)
     46             cVar1=getClusterVar(cov,cItems1)
     47             alpha=1-cVar0/(cVar0+cVar1)
     48             w[cItems0]*=alpha # weight 1
     49             w[cItems1]*=1-alpha # weight 2
     50     return w
     51 #------------------------------------------------------------------------------
     52 def correlDist(corr):
     53     # A distance matrix based on correlation, where 0<=d[i,j]<=1
     54     # This is a proper distance metric
     55     dist=((1-corr)/2.)**.5 # distance matrix
     56     return dist
     57 #------------------------------------------------------------------------------
     58 def plotCorrMatrix(path,corr,labels=None):
     59     # Heatmap of the correlation matrix
     60     if labels is None:labels=[]
     61     mpl.pcolor(corr)
     62     mpl.colorbar()
     63     mpl.yticks(np.arange(.5,corr.shape[0]+.5),labels)
     64     mpl.xticks(np.arange(.5,corr.shape[0]+.5),labels)
     65     mpl.savefig(path)
     66     mpl.clf();mpl.close() # reset pylab
     67     return
     68 #------------------------------------------------------------------------------
     69 def generateData(nObs,size0,size1,sigma1):
     70     # Time series of correlated variables
     71     #1) generating some uncorrelated data
     72     np.random.seed(seed=12345);random.seed(12345)
     73     x=np.random.normal(0,1,size=(nObs,size0)) # each row is a variable
     74     #2) creating correlation between the variables
     75     cols=[random.randint(0,size0-1) for i in xrange(size1)]
     76     y=x[:,cols]+np.random.normal(0,sigma1,size=(nObs,len(cols)))
     77     x=np.append(x,y,axis=1)
     78     x=pd.DataFrame(x,columns=range(1,x.shape[1]+1))
     79     return x,cols
     80 #------------------------------------------------------------------------------
     81 def main():
     82     #1) Generate correlated data
     83     nObs,size0,size1,sigma1=10000,5,5,.25
     84     x,cols=generateData(nObs,size0,size1,sigma1)
     85     print [(j+1,size0+i) for i,j in enumerate(cols,1)]
     86     #2) compute and plot correl matrix
     87     cov,corr=x.cov(),x.corr()
     88     plotCorrMatrix('HRP3_corr0.png',corr,labels=corr.columns)
     89     #3) cluster
     90     dist=correlDist(corr)
     91     link=sch.linkage(dist,'single')
     92     sortIx=getQuasiDiag(link)
     93     sortIx=corr.index[sortIx].tolist() # recover labels
     94     df0=corr.loc[sortIx,sortIx] # reorder
     95     plotCorrMatrix('HRP3_corr1.png',df0,labels=df0.columns)
     96     #4) Capital allocation
     97     hrp=getRecBipart(cov,sortIx)
     98     print hrp
     99     return
    100 #------------------------------------------------------------------------------
    101 if __name__=='__main__':main()