ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
HRP_MC.py
(3367B)
1 # On 20151231 by MLdP <lopezdeprado@lbl.gov>
2 import scipy.cluster.hierarchy as sch,random,numpy as np,pandas as pd,CLA
3 from HRP import correlDist,getIVP,getQuasiDiag,getRecBipart
4 #------------------------------------------------------------------------------
5 def generateData(nObs,sLength,size0,size1,mu0,sigma0,sigma1F):
6 # Time series of correlated variables
7 #1) generate random uncorrelated data
8 x=np.random.normal(mu0,sigma0,size=(nObs,size0)) # each row is a variable
9 #2) create correlation between the variables
10 cols=[random.randint(0,size0-1) for i in xrange(size1)]
11 y=x[:,cols]+np.random.normal(0,sigma0*sigma1F,size=(nObs,len(cols)))
12 x=np.append(x,y,axis=1)
13 #3) add common random shock
14 point=np.random.randint(sLength,nObs-1,size=2)
15 x[np.ix_(point,[cols[0],size0])]=np.array([[-.5,-.5],[2,2]])
16 #4) add specific random shock
17 point=np.random.randint(sLength,nObs-1,size=2)
18 x[point,cols[-1]]=np.array([-.5,2])
19 return x,cols
20 #------------------------------------------------------------------------------
21 def getHRP(cov,corr):
22 # Construct a hierarchical portfolio
23 corr,cov=pd.DataFrame(corr),pd.DataFrame(cov)
24 dist=correlDist(corr)
25 link=sch.linkage(dist,'single')
26 sortIx=getQuasiDiag(link)
27 sortIx=corr.index[sortIx].tolist() # recover labels
28 hrp=getRecBipart(cov,sortIx)
29 return hrp.sort_index()
30 #------------------------------------------------------------------------------
31 def getCLA(cov,**kargs):
32 # Compute CLA's minimum variance portfolio
33 mean=np.arange(cov.shape[0]).reshape(-1,1) # Not used by C portf
34 lB=np.zeros(mean.shape)
35 uB=np.ones(mean.shape)
36 cla=CLA.CLA(mean,cov,lB,uB)
37 cla.solve()
38 return cla.w[-1].flatten()
39 #------------------------------------------------------------------------------
40 def hrpMC(numIters=1e4,nObs=520,size0=5,size1=5,mu0=0,sigma0=1e-2, \
41 sigma1F=.25,sLength=260,rebal=22):
42 # Monte Carlo experiment on HRP
43 methods=[getIVP,getHRP,getCLA]
44 stats,numIter={i.__name__:pd.Series() for i in methods},0
45 pointers=range(sLength,nObs,rebal)
46 while numIter<numIters:
47 print numIter
48 #1) Prepare data for one experiment
49 x,cols=generateData(nObs,sLength,size0,size1,mu0,sigma0,sigma1F)
50 r={i.__name__:pd.Series() for i in methods}
51 #2) Compute portfolios in-sample
52 for pointer in pointers:
53 x_=x[pointer-sLength:pointer]
54 cov_,corr_=np.cov(x_,rowvar=0),np.corrcoef(x_,rowvar=0)
55 #3) Compute performance out-of-sample
56 x_=x[pointer:pointer+rebal]
57 for func in methods:
58 w_=func(cov=cov_,corr=corr_) # callback
59 r_=pd.Series(np.dot(x_,w_))
60 r[func.__name__]=r[func.__name__].append(r_)
61 #4) Evaluate and store results
62 for func in methods:
63 r_=r[func.__name__].reset_index(drop=True)
64 p_=(1+r_).cumprod()
65 stats[func.__name__].loc[numIter]=p_.iloc[-1]-1 # terminal return
66 numIter+=1
67 #5) Report results
68 stats=pd.DataFrame.from_dict(stats,orient='columns')
69 stats.to_csv('stats.csv')
70 df0,df1=stats.std(),stats.var()
71 print pd.concat([df0,df1,df1/df1['getHRP']-1],axis=1)
72 return
73 #------------------------------------------------------------------------------
74 if __name__=='__main__':hrpMC()