ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
hrp_demo.py
(3924B)
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 __author__ = 'Stefan Jansen'
4
5 # Hierarchical Risk Parity
6 import matplotlib.pyplot as mpl
7 import scipy.cluster.hierarchy as sch
8 import random
9 import numpy as np
10 import pandas as pd
11
12
13 def getIVP(cov, **kargs):
14 # Compute the inverse-variance portfolio
15 ivp = 1. / np.diag(cov)
16 ivp /= ivp.sum()
17 return ivp
18
19
20 def getClusterVar(cov, cItems):
21 # Compute variance per cluster
22 cov_ = cov.loc[cItems, cItems] # matrix slice
23 w_ = getIVP(cov_).reshape(-1, 1)
24 cVar = np.dot(np.dot(w_.T, cov_), w_)[0, 0]
25 return cVar
26
27
28 def getQuasiDiag(link):
29 # Sort clustered items by distance
30 link = link.astype(int)
31 sortIx = pd.Series([link[-1, 0], link[-1, 1]])
32 numItems = link[-1, 3] # number of original items
33 while sortIx.max() >= numItems:
34 sortIx.index = list(range(0, sortIx.shape[0] * 2, 2)) # make space
35 df0 = sortIx[sortIx >= numItems] # find clusters
36 i = df0.index;
37 j = df0.values - numItems
38 sortIx[i] = link[j, 0] # item 1
39 df0 = pd.Series(link[j, 1], index=i + 1)
40 sortIx = sortIx.append(df0) # item 2
41 sortIx = sortIx.sort_index() # re-sort
42 sortIx.index = list(range(sortIx.shape[0])) # re-index
43 return sortIx.tolist()
44
45
46 def getRecBipart(cov, sortIx):
47 # Compute HRP alloc
48 w = pd.Series(1, index=sortIx)
49 cItems = [sortIx] # initialize all items in one cluster
50 while len(cItems) > 0:
51 print(cItems)
52 cItems = [i[j:k] for i in cItems for j, k in ((0, int(len(i) / 2)), (int(len(i) / 2), len(i))) if
53 len(i) > 1] # bi-section
54
55 for i in range(0, len(cItems), 2): # parse in pairs
56 cItems0 = cItems[i] # cluster 1
57 cItems1 = cItems[i + 1] # cluster 2
58 cVar0 = getClusterVar(cov, cItems0)
59 cVar1 = getClusterVar(cov, cItems1)
60 alpha = 1 - cVar0 / (cVar0 + cVar1)
61 w[cItems0] *= alpha # weight 1
62 w[cItems1] *= 1 - alpha # weight 2
63 return w
64
65
66 def correlDist(corr):
67 # A distance matrix based on correlation, where 0<=d[i,j]<=1
68 # This is a proper distance metric
69 dist = ((1 - corr) / 2.) ** .5 # distance matrix
70 return dist
71
72
73 def plotCorrMatrix(path, corr, labels=None):
74 # Heatmap of the correlation matrix
75 if labels is None:
76 labels = []
77 mpl.pcolor(corr)
78 mpl.colorbar()
79 mpl.yticks(np.arange(.5, corr.shape[0] + .5), labels)
80 mpl.xticks(np.arange(.5, corr.shape[0] + .5), labels)
81 mpl.savefig(path)
82 mpl.clf()
83 mpl.close() # reset pylab
84
85
86 def generateData(nObs, size0, size1, sigma1):
87 # Time series of correlated variables
88
89 # 1) generating some uncorrelated data
90 np.random.seed(seed=12345)
91 random.seed(12345)
92 x = np.random.normal(0, 1, size=(nObs, size0)) # each row is a variable
93
94 # 2) creating correlation between the variables
95 cols = [random.randint(0, size0 - 1) for i in range(size1)]
96 y = x[:, cols] + np.random.normal(0, sigma1, size=(nObs, len(cols)))
97 x = np.append(x, y, axis=1)
98 x = pd.DataFrame(x, columns=list(range(1, x.shape[1] + 1)))
99 return x, cols
100
101
102 def main():
103 # 1) Generate correlated data
104 nObs, size0, size1, sigma1 = 10000, 5, 5, .25
105 x, cols = generateData(nObs, size0, size1, sigma1)
106 print([(j + 1, size0 + i) for i, j in enumerate(cols, 1)])
107
108 # 2) compute and plot correl matrix
109 cov, corr = x.cov(), x.corr()
110 plotCorrMatrix('HRP3_corr0.png', corr, labels=corr.columns)
111
112 # 3) cluster
113 dist = correlDist(corr)
114 link = sch.linkage(dist, 'single')
115 sortIx = getQuasiDiag(link)
116 sortIx = corr.index[sortIx].tolist() # recover labels
117 df0 = corr.loc[sortIx, sortIx] # reorder
118 plotCorrMatrix('HRP3_corr1.png', df0, labels=df0.columns)
119 # 4) Capital allocation
120 hrp = getRecBipart(cov, sortIx)
121 print('Allocation', hrp, sep='\n')
122 return
123
124
125 if __name__ == '__main__':
126 main()