ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

HRP.py

(3702B)


      1 import random
      2 import pandas as pd
      3 import numpy as np
      4 import matplotlib.pyplot as plt
      5 import seaborn as sns
      6 from scipy.cluster.hierarchy import linkage
      7 from scipy.spatial.distance import pdist, squareform
      8 
      9 np.random.seed(42)
     10 
     11 
     12 def get_distance_matrix(corr):
     13     """Compute distance matrix from correlation;
     14         0 <= d[i,j] <= 1"""
     15     return np.sqrt((1 - corr) / 2)
     16 
     17 
     18 def quasi_diagonalize(link):
     19     """sort clustered assets by distance"""
     20     link = link.astype(int)
     21     sort_idx = pd.Series([link[-1, 0], link[-1, 1]])
     22     num_items = link[-1, 3]  # idx of original items
     23     while sort_idx.max() >= num_items:
     24         sort_idx.index = list(range(0, sort_idx.shape[0] * 2, 2))  # make space
     25         df0 = sort_idx[sort_idx >= num_items]  # find clusters
     26         i = df0.index
     27         j = df0.values - num_items
     28         sort_idx[i] = link[j, 0]  # item 1
     29         df0 = pd.Series(link[j, 1], index=i + 1)
     30         sort_idx = sort_idx.append(df0)  # item 2
     31         sort_idx = sort_idx.sort_index()  # re-sort
     32         sort_idx.index = list(range(sort_idx.shape[0]))  # re-index
     33     return sort_idx.tolist()
     34 
     35 
     36 def get_inverse_var_pf(cov):
     37     """Compute the inverse-variance portfolio"""
     38     ivp = 1 / np.diag(cov)
     39     return ivp / ivp.sum()
     40 
     41 
     42 def get_cluster_var(cov, cluster_items):
     43     """Compute variance per cluster"""
     44     cov_ = cov.loc[cluster_items, cluster_items]  # matrix slice
     45     w_ = get_inverse_var_pf(cov_)
     46     return (w_ @ cov_ @ w_).item()
     47 
     48 
     49 def get_hrp_allocation(cov, tickers):
     50     """Compute top-down HRP weights"""
     51 
     52     weights = pd.Series(1, index=tickers)
     53     clusters = [tickers]  # initialize one cluster with all assets
     54 
     55     while len(clusters) > 0:
     56         # run bisectional search:
     57         clusters = [c[start:stop] for c in clusters
     58                     for start, stop in ((0, int(len(c) / 2)),
     59                                         (int(len(c) / 2), len(c)))
     60                     if len(c) > 1]
     61         for i in range(0, len(clusters), 2):  # parse in pairs
     62             cluster0 = clusters[i]
     63             cluster1 = clusters[i + 1]
     64 
     65             cluster0_var = get_cluster_var(cov, cluster0)
     66             cluster1_var = get_cluster_var(cov, cluster1)
     67 
     68             weight_scaler = 1 - cluster0_var / (cluster0_var + cluster1_var)
     69             weights[cluster0] *= weight_scaler
     70             weights[cluster1] *= 1 - weight_scaler
     71     return weights
     72 
     73 
     74 with pd.HDFStore('../../00_data/assets.h5') as store:
     75     sp500_stocks = store['sp500/stocks'].index
     76     prices = store['quandl/wiki/prices'].adj_close.unstack('ticker').filter(sp500_stocks)
     77 
     78 start = 1988
     79 end = 2017
     80 
     81 monthly_returns = prices.loc[f'{start}':f'{end}'].resample('M').last().pct_change().dropna(how='all')
     82 monthly_returns = monthly_returns.dropna(axis=1)
     83 monthly_returns.columns.names = ['Ticker']
     84 
     85 cov = monthly_returns.cov()
     86 corr = monthly_returns.corr()
     87 corr.columns.names = ['Ticker']
     88 
     89 cmap = sns.diverging_palette(10, 250, as_cmap=True)
     90 fig, ax = plt.subplots(figsize=(11, 10))
     91 sns.heatmap(corr, center=0, cmap=cmap, ax=ax)
     92 fig.tight_layout()
     93 fig.savefig('correl_map.png', dpi=600)
     94 
     95 distance_matrix = get_distance_matrix(corr)
     96 linkage_matrix = linkage(squareform(distance_matrix), 'single')
     97 
     98 # sorted_idx = quasi_diagonalize(linkage_matrix)
     99 
    100 clustergrid = sns.clustermap(distance_matrix,
    101                              method='single',
    102                              row_linkage=linkage_matrix,
    103                              col_linkage=linkage_matrix,
    104                              cmap=cmap, center=0)
    105 
    106 clustergrid.savefig('clustermap.png', dpi=600)
    107 
    108 sorted_idx = clustergrid.dendrogram_row.reordered_ind
    109 sorted_tickers = corr.index[sorted_idx].tolist()
    110 hrp_allocation = get_hrp_allocation(cov, sorted_tickers)