ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

bayesian_regression.py

(4595B)


      1 from __future__ import print_function, division
      2 import numpy as np
      3 from scipy.stats import chi2, multivariate_normal
      4 from mlfromscratch.utils import mean_squared_error, train_test_split, polynomial_features
      5 
      6 
      7 
      8 class BayesianRegression(object):
      9     """Bayesian regression model. If poly_degree is specified the features will
     10     be transformed to with a polynomial basis function, which allows for polynomial
     11     regression. Assumes Normal prior and likelihood for the weights and scaled inverse
     12     chi-squared prior and likelihood for the variance of the weights.
     13 
     14     Parameters:
     15     -----------
     16     n_draws: float
     17         The number of simulated draws from the posterior of the parameters.
     18     mu0: array
     19         The mean values of the prior Normal distribution of the parameters.
     20     omega0: array
     21         The precision matrix of the prior Normal distribution of the parameters.
     22     nu0: float
     23         The degrees of freedom of the prior scaled inverse chi squared distribution.
     24     sigma_sq0: float
     25         The scale parameter of the prior scaled inverse chi squared distribution.
     26     poly_degree: int
     27         The polynomial degree that the features should be transformed to. Allows
     28         for polynomial regression.
     29     cred_int: float
     30         The credible interval (ETI in this impl.). 95 => 95% credible interval of the posterior
     31         of the parameters.
     32 
     33     Reference:
     34         https://github.com/mattiasvillani/BayesLearnCourse/raw/master/Slides/BayesLearnL5.pdf
     35     """
     36     def __init__(self, n_draws, mu0, omega0, nu0, sigma_sq0, poly_degree=0, cred_int=95):
     37         self.w = None
     38         self.n_draws = n_draws
     39         self.poly_degree = poly_degree
     40         self.cred_int = cred_int
     41 
     42         # Prior parameters
     43         self.mu0 = mu0
     44         self.omega0 = omega0
     45         self.nu0 = nu0
     46         self.sigma_sq0 = sigma_sq0
     47 
     48     # Allows for simulation from the scaled inverse chi squared
     49     # distribution. Assumes the variance is distributed according to
     50     # this distribution.
     51     # Reference:
     52     #   https://en.wikipedia.org/wiki/Scaled_inverse_chi-squared_distribution
     53     def _draw_scaled_inv_chi_sq(self, n, df, scale):
     54         X = chi2.rvs(size=n, df=df)
     55         sigma_sq = df * scale / X
     56         return sigma_sq
     57 
     58     def fit(self, X, y):
     59 
     60         # If polynomial transformation
     61         if self.poly_degree:
     62             X = polynomial_features(X, degree=self.poly_degree)
     63 
     64         n_samples, n_features = np.shape(X)
     65 
     66         X_X = X.T.dot(X)
     67 
     68         # Least squares approximate of beta
     69         beta_hat = np.linalg.pinv(X_X).dot(X.T).dot(y)
     70 
     71         # The posterior parameters can be determined analytically since we assume
     72         # conjugate priors for the likelihoods.
     73 
     74         # Normal prior / likelihood => Normal posterior
     75         mu_n = np.linalg.pinv(X_X + self.omega0).dot(X_X.dot(beta_hat)+self.omega0.dot(self.mu0))
     76         omega_n = X_X + self.omega0
     77         # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior
     78         nu_n = self.nu0 + n_samples
     79         sigma_sq_n = (1.0/nu_n)*(self.nu0*self.sigma_sq0 + \
     80             (y.T.dot(y) + self.mu0.T.dot(self.omega0).dot(self.mu0) - mu_n.T.dot(omega_n.dot(mu_n))))
     81 
     82         # Simulate parameter values for n_draws
     83         beta_draws = np.empty((self.n_draws, n_features))
     84         for i in range(self.n_draws):
     85             sigma_sq = self._draw_scaled_inv_chi_sq(n=1, df=nu_n, scale=sigma_sq_n)
     86             beta = multivariate_normal.rvs(size=1, mean=mu_n[:,0], cov=sigma_sq*np.linalg.pinv(omega_n))
     87             # Save parameter draws
     88             beta_draws[i, :] = beta
     89 
     90         # Select the mean of the simulated variables as the ones used to make predictions
     91         self.w = np.mean(beta_draws, axis=0)
     92 
     93         # Lower and upper boundary of the credible interval
     94         l_eti = 50 - self.cred_int/2
     95         u_eti = 50 + self.cred_int/2
     96         self.eti = np.array([[np.percentile(beta_draws[:,i], q=l_eti), np.percentile(beta_draws[:,i], q=u_eti)] \
     97                                 for i in range(n_features)])
     98 
     99     def predict(self, X, eti=False):
    100 
    101         # If polynomial transformation
    102         if self.poly_degree:
    103             X = polynomial_features(X, degree=self.poly_degree)
    104 
    105         y_pred = X.dot(self.w)
    106         # If the lower and upper boundaries for the 95%
    107         # equal tail interval should be returned
    108         if eti:
    109             lower_w = self.eti[:, 0]
    110             upper_w = self.eti[:, 1]
    111             y_lower_pred = X.dot(lower_w)
    112             y_upper_pred = X.dot(upper_w)
    113             return y_pred, y_lower_pred, y_upper_pred
    114 
    115         return y_pred