ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
regression.py

(10499B)
      1 from __future__ import print_function, division
      2 import numpy as np
      3 import math
      4 from mlfromscratch.utils import normalize, polynomial_features
      5 
      6 class l1_regularization():
      7     """ Regularization for Lasso Regression """
      8     def __init__(self, alpha):
      9         self.alpha = alpha
     10     
     11     def __call__(self, w):
     12         return self.alpha * np.linalg.norm(w)
     13 
     14     def grad(self, w):
     15         return self.alpha * np.sign(w)
     16 
     17 class l2_regularization():
     18     """ Regularization for Ridge Regression """
     19     def __init__(self, alpha):
     20         self.alpha = alpha
     21     
     22     def __call__(self, w):
     23         return self.alpha * 0.5 *  w.T.dot(w)
     24 
     25     def grad(self, w):
     26         return self.alpha * w
     27 
     28 class l1_l2_regularization():
     29     """ Regularization for Elastic Net Regression """
     30     def __init__(self, alpha, l1_ratio=0.5):
     31         self.alpha = alpha
     32         self.l1_ratio = l1_ratio
     33 
     34     def __call__(self, w):
     35         l1_contr = self.l1_ratio * np.linalg.norm(w)
     36         l2_contr = (1 - self.l1_ratio) * 0.5 * w.T.dot(w) 
     37         return self.alpha * (l1_contr + l2_contr)
     38 
     39     def grad(self, w):
     40         l1_contr = self.l1_ratio * np.sign(w)
     41         l2_contr = (1 - self.l1_ratio) * w
     42         return self.alpha * (l1_contr + l2_contr) 
     43 
     44 class Regression(object):
     45     """ Base regression model. Models the relationship between a scalar dependent variable y and the independent 
     46     variables X. 
     47     Parameters:
     48     -----------
     49     n_iterations: float
     50         The number of training iterations the algorithm will tune the weights for.
     51     learning_rate: float
     52         The step length that will be used when updating the weights.
     53     """
     54     def __init__(self, n_iterations, learning_rate):
     55         self.n_iterations = n_iterations
     56         self.learning_rate = learning_rate
     57 
     58     def initialize_weights(self, n_features):
     59         """ Initialize weights randomly [-1/N, 1/N] """
     60         limit = 1 / math.sqrt(n_features)
     61         self.w = np.random.uniform(-limit, limit, (n_features, ))
     62 
     63     def fit(self, X, y):
     64         # Insert constant ones for bias weights
     65         X = np.insert(X, 0, 1, axis=1)
     66         self.training_errors = []
     67         self.initialize_weights(n_features=X.shape[1])
     68 
     69         # Do gradient descent for n_iterations
     70         for i in range(self.n_iterations):
     71             y_pred = X.dot(self.w)
     72             # Calculate l2 loss
     73             mse = np.mean(0.5 * (y - y_pred)**2 + self.regularization(self.w))
     74             self.training_errors.append(mse)
     75             # Gradient of l2 loss w.r.t w
     76             grad_w = -(y - y_pred).dot(X) + self.regularization.grad(self.w)
     77             # Update the weights
     78             self.w -= self.learning_rate * grad_w
     79 
     80     def predict(self, X):
     81         # Insert constant ones for bias weights
     82         X = np.insert(X, 0, 1, axis=1)
     83         y_pred = X.dot(self.w)
     84         return y_pred
     85 
     86 class LinearRegression(Regression):
     87     """Linear model.
     88     Parameters:
     89     -----------
     90     n_iterations: float
     91         The number of training iterations the algorithm will tune the weights for.
     92     learning_rate: float
     93         The step length that will be used when updating the weights.
     94     gradient_descent: boolean
     95         True or false depending if gradient descent should be used when training. If 
     96         false then we use batch optimization by least squares.
     97     """
     98     def __init__(self, n_iterations=100, learning_rate=0.001, gradient_descent=True):
     99         self.gradient_descent = gradient_descent
    100         # No regularization
    101         self.regularization = lambda x: 0
    102         self.regularization.grad = lambda x: 0
    103         super(LinearRegression, self).__init__(n_iterations=n_iterations,
    104                                             learning_rate=learning_rate)
    105     def fit(self, X, y):
    106         # If not gradient descent => Least squares approximation of w
    107         if not self.gradient_descent:
    108             # Insert constant ones for bias weights
    109             X = np.insert(X, 0, 1, axis=1)
    110             # Calculate weights by least squares (using Moore-Penrose pseudoinverse)
    111             U, S, V = np.linalg.svd(X.T.dot(X))
    112             S = np.diag(S)
    113             X_sq_reg_inv = V.dot(np.linalg.pinv(S)).dot(U.T)
    114             self.w = X_sq_reg_inv.dot(X.T).dot(y)
    115         else:
    116             super(LinearRegression, self).fit(X, y)
    117 
    118 class LassoRegression(Regression):
    119     """Linear regression model with a regularization factor which does both variable selection 
    120     and regularization. Model that tries to balance the fit of the model with respect to the training 
    121     data and the complexity of the model. A large regularization factor with decreases the variance of 
    122     the model and do para.
    123     Parameters:
    124     -----------
    125     degree: int
    126         The degree of the polynomial that the independent variable X will be transformed to.
    127     reg_factor: float
    128         The factor that will determine the amount of regularization and feature
    129         shrinkage. 
    130     n_iterations: float
    131         The number of training iterations the algorithm will tune the weights for.
    132     learning_rate: float
    133         The step length that will be used when updating the weights.
    134     """
    135     def __init__(self, degree, reg_factor, n_iterations=3000, learning_rate=0.01):
    136         self.degree = degree
    137         self.regularization = l1_regularization(alpha=reg_factor)
    138         super(LassoRegression, self).__init__(n_iterations, 
    139                                             learning_rate)
    140 
    141     def fit(self, X, y):
    142         X = normalize(polynomial_features(X, degree=self.degree))
    143         super(LassoRegression, self).fit(X, y)
    144 
    145     def predict(self, X):
    146         X = normalize(polynomial_features(X, degree=self.degree))
    147         return super(LassoRegression, self).predict(X)
    148 
    149 class PolynomialRegression(Regression):
    150     """Performs a non-linear transformation of the data before fitting the model
    151     and doing predictions which allows for doing non-linear regression.
    152     Parameters:
    153     -----------
    154     degree: int
    155         The degree of the polynomial that the independent variable X will be transformed to.
    156     n_iterations: float
    157         The number of training iterations the algorithm will tune the weights for.
    158     learning_rate: float
    159         The step length that will be used when updating the weights.
    160     """
    161     def __init__(self, degree, n_iterations=3000, learning_rate=0.001):
    162         self.degree = degree
    163         # No regularization
    164         self.regularization = lambda x: 0
    165         self.regularization.grad = lambda x: 0
    166         super(PolynomialRegression, self).__init__(n_iterations=n_iterations,
    167                                                 learning_rate=learning_rate)
    168 
    169     def fit(self, X, y):
    170         X = polynomial_features(X, degree=self.degree)
    171         super(PolynomialRegression, self).fit(X, y)
    172 
    173     def predict(self, X):
    174         X = polynomial_features(X, degree=self.degree)
    175         return super(PolynomialRegression, self).predict(X)
    176 
    177 class RidgeRegression(Regression):
    178     """Also referred to as Tikhonov regularization. Linear regression model with a regularization factor.
    179     Model that tries to balance the fit of the model with respect to the training data and the complexity
    180     of the model. A large regularization factor with decreases the variance of the model.
    181     Parameters:
    182     -----------
    183     reg_factor: float
    184         The factor that will determine the amount of regularization and feature
    185         shrinkage. 
    186     n_iterations: float
    187         The number of training iterations the algorithm will tune the weights for.
    188     learning_rate: float
    189         The step length that will be used when updating the weights.
    190     """
    191     def __init__(self, reg_factor, n_iterations=1000, learning_rate=0.001):
    192         self.regularization = l2_regularization(alpha=reg_factor)
    193         super(RidgeRegression, self).__init__(n_iterations, 
    194                                             learning_rate)
    195 
    196 class PolynomialRidgeRegression(Regression):
    197     """Similar to regular ridge regression except that the data is transformed to allow
    198     for polynomial regression.
    199     Parameters:
    200     -----------
    201     degree: int
    202         The degree of the polynomial that the independent variable X will be transformed to.
    203     reg_factor: float
    204         The factor that will determine the amount of regularization and feature
    205         shrinkage. 
    206     n_iterations: float
    207         The number of training iterations the algorithm will tune the weights for.
    208     learning_rate: float
    209         The step length that will be used when updating the weights.
    210     """
    211     def __init__(self, degree, reg_factor, n_iterations=3000, learning_rate=0.01, gradient_descent=True):
    212         self.degree = degree
    213         self.regularization = l2_regularization(alpha=reg_factor)
    214         super(PolynomialRidgeRegression, self).__init__(n_iterations, 
    215                                                         learning_rate)
    216 
    217     def fit(self, X, y):
    218         X = normalize(polynomial_features(X, degree=self.degree))
    219         super(PolynomialRidgeRegression, self).fit(X, y)
    220 
    221     def predict(self, X):
    222         X = normalize(polynomial_features(X, degree=self.degree))
    223         return super(PolynomialRidgeRegression, self).predict(X)
    224 
    225 class ElasticNet(Regression):
    226     """ Regression where a combination of l1 and l2 regularization are used. The
    227     ratio of their contributions are set with the 'l1_ratio' parameter.
    228     Parameters:
    229     -----------
    230     degree: int
    231         The degree of the polynomial that the independent variable X will be transformed to.
    232     reg_factor: float
    233         The factor that will determine the amount of regularization and feature
    234         shrinkage. 
    235     l1_ration: float
    236         Weighs the contribution of l1 and l2 regularization.
    237     n_iterations: float
    238         The number of training iterations the algorithm will tune the weights for.
    239     learning_rate: float
    240         The step length that will be used when updating the weights.
    241     """
    242     def __init__(self, degree=1, reg_factor=0.05, l1_ratio=0.5, n_iterations=3000, 
    243                 learning_rate=0.01):
    244         self.degree = degree
    245         self.regularization = l1_l2_regularization(alpha=reg_factor, l1_ratio=l1_ratio)
    246         super(ElasticNet, self).__init__(n_iterations, 
    247                                         learning_rate)
    248 
    249     def fit(self, X, y):
    250         X = normalize(polynomial_features(X, degree=self.degree))
    251         super(ElasticNet, self).fit(X, y)
    252 
    253     def predict(self, X):
    254         X = normalize(polynomial_features(X, degree=self.degree))
    255         return super(ElasticNet, self).predict(X)