ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
HRP.py
(3702B)
1 import random
2 import pandas as pd
3 import numpy as np
4 import matplotlib.pyplot as plt
5 import seaborn as sns
6 from scipy.cluster.hierarchy import linkage
7 from scipy.spatial.distance import pdist, squareform
8
9 np.random.seed(42)
10
11
12 def get_distance_matrix(corr):
13 """Compute distance matrix from correlation;
14 0 <= d[i,j] <= 1"""
15 return np.sqrt((1 - corr) / 2)
16
17
18 def quasi_diagonalize(link):
19 """sort clustered assets by distance"""
20 link = link.astype(int)
21 sort_idx = pd.Series([link[-1, 0], link[-1, 1]])
22 num_items = link[-1, 3] # idx of original items
23 while sort_idx.max() >= num_items:
24 sort_idx.index = list(range(0, sort_idx.shape[0] * 2, 2)) # make space
25 df0 = sort_idx[sort_idx >= num_items] # find clusters
26 i = df0.index
27 j = df0.values - num_items
28 sort_idx[i] = link[j, 0] # item 1
29 df0 = pd.Series(link[j, 1], index=i + 1)
30 sort_idx = sort_idx.append(df0) # item 2
31 sort_idx = sort_idx.sort_index() # re-sort
32 sort_idx.index = list(range(sort_idx.shape[0])) # re-index
33 return sort_idx.tolist()
34
35
36 def get_inverse_var_pf(cov):
37 """Compute the inverse-variance portfolio"""
38 ivp = 1 / np.diag(cov)
39 return ivp / ivp.sum()
40
41
42 def get_cluster_var(cov, cluster_items):
43 """Compute variance per cluster"""
44 cov_ = cov.loc[cluster_items, cluster_items] # matrix slice
45 w_ = get_inverse_var_pf(cov_)
46 return (w_ @ cov_ @ w_).item()
47
48
49 def get_hrp_allocation(cov, tickers):
50 """Compute top-down HRP weights"""
51
52 weights = pd.Series(1, index=tickers)
53 clusters = [tickers] # initialize one cluster with all assets
54
55 while len(clusters) > 0:
56 # run bisectional search:
57 clusters = [c[start:stop] for c in clusters
58 for start, stop in ((0, int(len(c) / 2)),
59 (int(len(c) / 2), len(c)))
60 if len(c) > 1]
61 for i in range(0, len(clusters), 2): # parse in pairs
62 cluster0 = clusters[i]
63 cluster1 = clusters[i + 1]
64
65 cluster0_var = get_cluster_var(cov, cluster0)
66 cluster1_var = get_cluster_var(cov, cluster1)
67
68 weight_scaler = 1 - cluster0_var / (cluster0_var + cluster1_var)
69 weights[cluster0] *= weight_scaler
70 weights[cluster1] *= 1 - weight_scaler
71 return weights
72
73
74 with pd.HDFStore('../../00_data/assets.h5') as store:
75 sp500_stocks = store['sp500/stocks'].index
76 prices = store['quandl/wiki/prices'].adj_close.unstack('ticker').filter(sp500_stocks)
77
78 start = 1988
79 end = 2017
80
81 monthly_returns = prices.loc[f'{start}':f'{end}'].resample('M').last().pct_change().dropna(how='all')
82 monthly_returns = monthly_returns.dropna(axis=1)
83 monthly_returns.columns.names = ['Ticker']
84
85 cov = monthly_returns.cov()
86 corr = monthly_returns.corr()
87 corr.columns.names = ['Ticker']
88
89 cmap = sns.diverging_palette(10, 250, as_cmap=True)
90 fig, ax = plt.subplots(figsize=(11, 10))
91 sns.heatmap(corr, center=0, cmap=cmap, ax=ax)
92 fig.tight_layout()
93 fig.savefig('correl_map.png', dpi=600)
94
95 distance_matrix = get_distance_matrix(corr)
96 linkage_matrix = linkage(squareform(distance_matrix), 'single')
97
98 # sorted_idx = quasi_diagonalize(linkage_matrix)
99
100 clustergrid = sns.clustermap(distance_matrix,
101 method='single',
102 row_linkage=linkage_matrix,
103 col_linkage=linkage_matrix,
104 cmap=cmap, center=0)
105
106 clustergrid.savefig('clustermap.png', dpi=600)
107
108 sorted_idx = clustergrid.dendrogram_row.reordered_ind
109 sorted_tickers = corr.index[sorted_idx].tolist()
110 hrp_allocation = get_hrp_allocation(cov, sorted_tickers)