ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
bayesian_regression.py
(4595B)
1 from __future__ import print_function, division
2 import numpy as np
3 from scipy.stats import chi2, multivariate_normal
4 from mlfromscratch.utils import mean_squared_error, train_test_split, polynomial_features
5
6
7
8 class BayesianRegression(object):
9 """Bayesian regression model. If poly_degree is specified the features will
10 be transformed to with a polynomial basis function, which allows for polynomial
11 regression. Assumes Normal prior and likelihood for the weights and scaled inverse
12 chi-squared prior and likelihood for the variance of the weights.
13
14 Parameters:
15 -----------
16 n_draws: float
17 The number of simulated draws from the posterior of the parameters.
18 mu0: array
19 The mean values of the prior Normal distribution of the parameters.
20 omega0: array
21 The precision matrix of the prior Normal distribution of the parameters.
22 nu0: float
23 The degrees of freedom of the prior scaled inverse chi squared distribution.
24 sigma_sq0: float
25 The scale parameter of the prior scaled inverse chi squared distribution.
26 poly_degree: int
27 The polynomial degree that the features should be transformed to. Allows
28 for polynomial regression.
29 cred_int: float
30 The credible interval (ETI in this impl.). 95 => 95% credible interval of the posterior
31 of the parameters.
32
33 Reference:
34 https://github.com/mattiasvillani/BayesLearnCourse/raw/master/Slides/BayesLearnL5.pdf
35 """
36 def __init__(self, n_draws, mu0, omega0, nu0, sigma_sq0, poly_degree=0, cred_int=95):
37 self.w = None
38 self.n_draws = n_draws
39 self.poly_degree = poly_degree
40 self.cred_int = cred_int
41
42 # Prior parameters
43 self.mu0 = mu0
44 self.omega0 = omega0
45 self.nu0 = nu0
46 self.sigma_sq0 = sigma_sq0
47
48 # Allows for simulation from the scaled inverse chi squared
49 # distribution. Assumes the variance is distributed according to
50 # this distribution.
51 # Reference:
52 # https://en.wikipedia.org/wiki/Scaled_inverse_chi-squared_distribution
53 def _draw_scaled_inv_chi_sq(self, n, df, scale):
54 X = chi2.rvs(size=n, df=df)
55 sigma_sq = df * scale / X
56 return sigma_sq
57
58 def fit(self, X, y):
59
60 # If polynomial transformation
61 if self.poly_degree:
62 X = polynomial_features(X, degree=self.poly_degree)
63
64 n_samples, n_features = np.shape(X)
65
66 X_X = X.T.dot(X)
67
68 # Least squares approximate of beta
69 beta_hat = np.linalg.pinv(X_X).dot(X.T).dot(y)
70
71 # The posterior parameters can be determined analytically since we assume
72 # conjugate priors for the likelihoods.
73
74 # Normal prior / likelihood => Normal posterior
75 mu_n = np.linalg.pinv(X_X + self.omega0).dot(X_X.dot(beta_hat)+self.omega0.dot(self.mu0))
76 omega_n = X_X + self.omega0
77 # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior
78 nu_n = self.nu0 + n_samples
79 sigma_sq_n = (1.0/nu_n)*(self.nu0*self.sigma_sq0 + \
80 (y.T.dot(y) + self.mu0.T.dot(self.omega0).dot(self.mu0) - mu_n.T.dot(omega_n.dot(mu_n))))
81
82 # Simulate parameter values for n_draws
83 beta_draws = np.empty((self.n_draws, n_features))
84 for i in range(self.n_draws):
85 sigma_sq = self._draw_scaled_inv_chi_sq(n=1, df=nu_n, scale=sigma_sq_n)
86 beta = multivariate_normal.rvs(size=1, mean=mu_n[:,0], cov=sigma_sq*np.linalg.pinv(omega_n))
87 # Save parameter draws
88 beta_draws[i, :] = beta
89
90 # Select the mean of the simulated variables as the ones used to make predictions
91 self.w = np.mean(beta_draws, axis=0)
92
93 # Lower and upper boundary of the credible interval
94 l_eti = 50 - self.cred_int/2
95 u_eti = 50 + self.cred_int/2
96 self.eti = np.array([[np.percentile(beta_draws[:,i], q=l_eti), np.percentile(beta_draws[:,i], q=u_eti)] \
97 for i in range(n_features)])
98
99 def predict(self, X, eti=False):
100
101 # If polynomial transformation
102 if self.poly_degree:
103 X = polynomial_features(X, degree=self.poly_degree)
104
105 y_pred = X.dot(self.w)
106 # If the lower and upper boundaries for the 95%
107 # equal tail interval should be returned
108 if eti:
109 lower_w = self.eti[:, 0]
110 upper_w = self.eti[:, 1]
111 y_lower_pred = X.dot(lower_w)
112 y_upper_pred = X.dot(upper_w)
113 return y_pred, y_lower_pred, y_upper_pred
114
115 return y_pred