ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
hrp_demo.py

(3924B)
      1 #!/usr/bin/env python
      2 # -*- coding: utf-8 -*-
      3 __author__ = 'Stefan Jansen'
      4 
      5 # Hierarchical Risk Parity
      6 import matplotlib.pyplot as mpl
      7 import scipy.cluster.hierarchy as sch
      8 import random
      9 import numpy as np
     10 import pandas as pd
     11 
     12 
     13 def getIVP(cov, **kargs):
     14     # Compute the inverse-variance portfolio
     15     ivp = 1. / np.diag(cov)
     16     ivp /= ivp.sum()
     17     return ivp
     18 
     19 
     20 def getClusterVar(cov, cItems):
     21     # Compute variance per cluster
     22     cov_ = cov.loc[cItems, cItems]  # matrix slice
     23     w_ = getIVP(cov_).reshape(-1, 1)
     24     cVar = np.dot(np.dot(w_.T, cov_), w_)[0, 0]
     25     return cVar
     26 
     27 
     28 def getQuasiDiag(link):
     29     # Sort clustered items by distance
     30     link = link.astype(int)
     31     sortIx = pd.Series([link[-1, 0], link[-1, 1]])
     32     numItems = link[-1, 3]  # number of original items
     33     while sortIx.max() >= numItems:
     34         sortIx.index = list(range(0, sortIx.shape[0] * 2, 2))  # make space
     35         df0 = sortIx[sortIx >= numItems]  # find clusters
     36         i = df0.index;
     37         j = df0.values - numItems
     38         sortIx[i] = link[j, 0]  # item 1
     39         df0 = pd.Series(link[j, 1], index=i + 1)
     40         sortIx = sortIx.append(df0)  # item 2
     41         sortIx = sortIx.sort_index()  # re-sort
     42         sortIx.index = list(range(sortIx.shape[0]))  # re-index
     43     return sortIx.tolist()
     44 
     45 
     46 def getRecBipart(cov, sortIx):
     47     # Compute HRP alloc
     48     w = pd.Series(1, index=sortIx)
     49     cItems = [sortIx]  # initialize all items in one cluster
     50     while len(cItems) > 0:
     51         print(cItems)
     52         cItems = [i[j:k] for i in cItems for j, k in ((0, int(len(i) / 2)), (int(len(i) / 2), len(i))) if
     53                   len(i) > 1]  # bi-section
     54 
     55     for i in range(0, len(cItems), 2):  # parse in pairs
     56         cItems0 = cItems[i]  # cluster 1
     57         cItems1 = cItems[i + 1]  # cluster 2
     58         cVar0 = getClusterVar(cov, cItems0)
     59         cVar1 = getClusterVar(cov, cItems1)
     60         alpha = 1 - cVar0 / (cVar0 + cVar1)
     61         w[cItems0] *= alpha  # weight 1
     62         w[cItems1] *= 1 - alpha  # weight 2
     63     return w
     64 
     65 
     66 def correlDist(corr):
     67     # A distance matrix based on correlation, where 0<=d[i,j]<=1
     68     # This is a proper distance metric
     69     dist = ((1 - corr) / 2.) ** .5  # distance matrix
     70     return dist
     71 
     72 
     73 def plotCorrMatrix(path, corr, labels=None):
     74     # Heatmap of the correlation matrix
     75     if labels is None:
     76         labels = []
     77     mpl.pcolor(corr)
     78     mpl.colorbar()
     79     mpl.yticks(np.arange(.5, corr.shape[0] + .5), labels)
     80     mpl.xticks(np.arange(.5, corr.shape[0] + .5), labels)
     81     mpl.savefig(path)
     82     mpl.clf()
     83     mpl.close()  # reset pylab
     84 
     85 
     86 def generateData(nObs, size0, size1, sigma1):
     87     # Time series of correlated variables
     88 
     89     # 1) generating some uncorrelated data
     90     np.random.seed(seed=12345)
     91     random.seed(12345)
     92     x = np.random.normal(0, 1, size=(nObs, size0))  # each row is a variable
     93 
     94     # 2) creating correlation between the variables
     95     cols = [random.randint(0, size0 - 1) for i in range(size1)]
     96     y = x[:, cols] + np.random.normal(0, sigma1, size=(nObs, len(cols)))
     97     x = np.append(x, y, axis=1)
     98     x = pd.DataFrame(x, columns=list(range(1, x.shape[1] + 1)))
     99     return x, cols
    100 
    101 
    102 def main():
    103     # 1) Generate correlated data
    104     nObs, size0, size1, sigma1 = 10000, 5, 5, .25
    105     x, cols = generateData(nObs, size0, size1, sigma1)
    106     print([(j + 1, size0 + i) for i, j in enumerate(cols, 1)])
    107 
    108     # 2) compute and plot correl matrix
    109     cov, corr = x.cov(), x.corr()
    110     plotCorrMatrix('HRP3_corr0.png', corr, labels=corr.columns)
    111 
    112     # 3) cluster
    113     dist = correlDist(corr)
    114     link = sch.linkage(dist, 'single')
    115     sortIx = getQuasiDiag(link)
    116     sortIx = corr.index[sortIx].tolist()  # recover labels
    117     df0 = corr.loc[sortIx, sortIx]  # reorder
    118     plotCorrMatrix('HRP3_corr1.png', df0, labels=df0.columns)
    119     # 4) Capital allocation
    120     hrp = getRecBipart(cov, sortIx)
    121     print('Allocation', hrp, sep='\n')
    122     return
    123 
    124 
    125 if __name__ == '__main__':
    126     main()