ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
regression.py
(10499B)
1 from __future__ import print_function, division
2 import numpy as np
3 import math
4 from mlfromscratch.utils import normalize, polynomial_features
5
6 class l1_regularization():
7 """ Regularization for Lasso Regression """
8 def __init__(self, alpha):
9 self.alpha = alpha
10
11 def __call__(self, w):
12 return self.alpha * np.linalg.norm(w)
13
14 def grad(self, w):
15 return self.alpha * np.sign(w)
16
17 class l2_regularization():
18 """ Regularization for Ridge Regression """
19 def __init__(self, alpha):
20 self.alpha = alpha
21
22 def __call__(self, w):
23 return self.alpha * 0.5 * w.T.dot(w)
24
25 def grad(self, w):
26 return self.alpha * w
27
28 class l1_l2_regularization():
29 """ Regularization for Elastic Net Regression """
30 def __init__(self, alpha, l1_ratio=0.5):
31 self.alpha = alpha
32 self.l1_ratio = l1_ratio
33
34 def __call__(self, w):
35 l1_contr = self.l1_ratio * np.linalg.norm(w)
36 l2_contr = (1 - self.l1_ratio) * 0.5 * w.T.dot(w)
37 return self.alpha * (l1_contr + l2_contr)
38
39 def grad(self, w):
40 l1_contr = self.l1_ratio * np.sign(w)
41 l2_contr = (1 - self.l1_ratio) * w
42 return self.alpha * (l1_contr + l2_contr)
43
44 class Regression(object):
45 """ Base regression model. Models the relationship between a scalar dependent variable y and the independent
46 variables X.
47 Parameters:
48 -----------
49 n_iterations: float
50 The number of training iterations the algorithm will tune the weights for.
51 learning_rate: float
52 The step length that will be used when updating the weights.
53 """
54 def __init__(self, n_iterations, learning_rate):
55 self.n_iterations = n_iterations
56 self.learning_rate = learning_rate
57
58 def initialize_weights(self, n_features):
59 """ Initialize weights randomly [-1/N, 1/N] """
60 limit = 1 / math.sqrt(n_features)
61 self.w = np.random.uniform(-limit, limit, (n_features, ))
62
63 def fit(self, X, y):
64 # Insert constant ones for bias weights
65 X = np.insert(X, 0, 1, axis=1)
66 self.training_errors = []
67 self.initialize_weights(n_features=X.shape[1])
68
69 # Do gradient descent for n_iterations
70 for i in range(self.n_iterations):
71 y_pred = X.dot(self.w)
72 # Calculate l2 loss
73 mse = np.mean(0.5 * (y - y_pred)**2 + self.regularization(self.w))
74 self.training_errors.append(mse)
75 # Gradient of l2 loss w.r.t w
76 grad_w = -(y - y_pred).dot(X) + self.regularization.grad(self.w)
77 # Update the weights
78 self.w -= self.learning_rate * grad_w
79
80 def predict(self, X):
81 # Insert constant ones for bias weights
82 X = np.insert(X, 0, 1, axis=1)
83 y_pred = X.dot(self.w)
84 return y_pred
85
86 class LinearRegression(Regression):
87 """Linear model.
88 Parameters:
89 -----------
90 n_iterations: float
91 The number of training iterations the algorithm will tune the weights for.
92 learning_rate: float
93 The step length that will be used when updating the weights.
94 gradient_descent: boolean
95 True or false depending if gradient descent should be used when training. If
96 false then we use batch optimization by least squares.
97 """
98 def __init__(self, n_iterations=100, learning_rate=0.001, gradient_descent=True):
99 self.gradient_descent = gradient_descent
100 # No regularization
101 self.regularization = lambda x: 0
102 self.regularization.grad = lambda x: 0
103 super(LinearRegression, self).__init__(n_iterations=n_iterations,
104 learning_rate=learning_rate)
105 def fit(self, X, y):
106 # If not gradient descent => Least squares approximation of w
107 if not self.gradient_descent:
108 # Insert constant ones for bias weights
109 X = np.insert(X, 0, 1, axis=1)
110 # Calculate weights by least squares (using Moore-Penrose pseudoinverse)
111 U, S, V = np.linalg.svd(X.T.dot(X))
112 S = np.diag(S)
113 X_sq_reg_inv = V.dot(np.linalg.pinv(S)).dot(U.T)
114 self.w = X_sq_reg_inv.dot(X.T).dot(y)
115 else:
116 super(LinearRegression, self).fit(X, y)
117
118 class LassoRegression(Regression):
119 """Linear regression model with a regularization factor which does both variable selection
120 and regularization. Model that tries to balance the fit of the model with respect to the training
121 data and the complexity of the model. A large regularization factor with decreases the variance of
122 the model and do para.
123 Parameters:
124 -----------
125 degree: int
126 The degree of the polynomial that the independent variable X will be transformed to.
127 reg_factor: float
128 The factor that will determine the amount of regularization and feature
129 shrinkage.
130 n_iterations: float
131 The number of training iterations the algorithm will tune the weights for.
132 learning_rate: float
133 The step length that will be used when updating the weights.
134 """
135 def __init__(self, degree, reg_factor, n_iterations=3000, learning_rate=0.01):
136 self.degree = degree
137 self.regularization = l1_regularization(alpha=reg_factor)
138 super(LassoRegression, self).__init__(n_iterations,
139 learning_rate)
140
141 def fit(self, X, y):
142 X = normalize(polynomial_features(X, degree=self.degree))
143 super(LassoRegression, self).fit(X, y)
144
145 def predict(self, X):
146 X = normalize(polynomial_features(X, degree=self.degree))
147 return super(LassoRegression, self).predict(X)
148
149 class PolynomialRegression(Regression):
150 """Performs a non-linear transformation of the data before fitting the model
151 and doing predictions which allows for doing non-linear regression.
152 Parameters:
153 -----------
154 degree: int
155 The degree of the polynomial that the independent variable X will be transformed to.
156 n_iterations: float
157 The number of training iterations the algorithm will tune the weights for.
158 learning_rate: float
159 The step length that will be used when updating the weights.
160 """
161 def __init__(self, degree, n_iterations=3000, learning_rate=0.001):
162 self.degree = degree
163 # No regularization
164 self.regularization = lambda x: 0
165 self.regularization.grad = lambda x: 0
166 super(PolynomialRegression, self).__init__(n_iterations=n_iterations,
167 learning_rate=learning_rate)
168
169 def fit(self, X, y):
170 X = polynomial_features(X, degree=self.degree)
171 super(PolynomialRegression, self).fit(X, y)
172
173 def predict(self, X):
174 X = polynomial_features(X, degree=self.degree)
175 return super(PolynomialRegression, self).predict(X)
176
177 class RidgeRegression(Regression):
178 """Also referred to as Tikhonov regularization. Linear regression model with a regularization factor.
179 Model that tries to balance the fit of the model with respect to the training data and the complexity
180 of the model. A large regularization factor with decreases the variance of the model.
181 Parameters:
182 -----------
183 reg_factor: float
184 The factor that will determine the amount of regularization and feature
185 shrinkage.
186 n_iterations: float
187 The number of training iterations the algorithm will tune the weights for.
188 learning_rate: float
189 The step length that will be used when updating the weights.
190 """
191 def __init__(self, reg_factor, n_iterations=1000, learning_rate=0.001):
192 self.regularization = l2_regularization(alpha=reg_factor)
193 super(RidgeRegression, self).__init__(n_iterations,
194 learning_rate)
195
196 class PolynomialRidgeRegression(Regression):
197 """Similar to regular ridge regression except that the data is transformed to allow
198 for polynomial regression.
199 Parameters:
200 -----------
201 degree: int
202 The degree of the polynomial that the independent variable X will be transformed to.
203 reg_factor: float
204 The factor that will determine the amount of regularization and feature
205 shrinkage.
206 n_iterations: float
207 The number of training iterations the algorithm will tune the weights for.
208 learning_rate: float
209 The step length that will be used when updating the weights.
210 """
211 def __init__(self, degree, reg_factor, n_iterations=3000, learning_rate=0.01, gradient_descent=True):
212 self.degree = degree
213 self.regularization = l2_regularization(alpha=reg_factor)
214 super(PolynomialRidgeRegression, self).__init__(n_iterations,
215 learning_rate)
216
217 def fit(self, X, y):
218 X = normalize(polynomial_features(X, degree=self.degree))
219 super(PolynomialRidgeRegression, self).fit(X, y)
220
221 def predict(self, X):
222 X = normalize(polynomial_features(X, degree=self.degree))
223 return super(PolynomialRidgeRegression, self).predict(X)
224
225 class ElasticNet(Regression):
226 """ Regression where a combination of l1 and l2 regularization are used. The
227 ratio of their contributions are set with the 'l1_ratio' parameter.
228 Parameters:
229 -----------
230 degree: int
231 The degree of the polynomial that the independent variable X will be transformed to.
232 reg_factor: float
233 The factor that will determine the amount of regularization and feature
234 shrinkage.
235 l1_ration: float
236 Weighs the contribution of l1 and l2 regularization.
237 n_iterations: float
238 The number of training iterations the algorithm will tune the weights for.
239 learning_rate: float
240 The step length that will be used when updating the weights.
241 """
242 def __init__(self, degree=1, reg_factor=0.05, l1_ratio=0.5, n_iterations=3000,
243 learning_rate=0.01):
244 self.degree = degree
245 self.regularization = l1_l2_regularization(alpha=reg_factor, l1_ratio=l1_ratio)
246 super(ElasticNet, self).__init__(n_iterations,
247 learning_rate)
248
249 def fit(self, X, y):
250 X = normalize(polynomial_features(X, degree=self.degree))
251 super(ElasticNet, self).fit(X, y)
252
253 def predict(self, X):
254 X = normalize(polynomial_features(X, degree=self.degree))
255 return super(ElasticNet, self).predict(X)