ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

gradient_boosting.py

(4281B)


      1 from __future__ import division, print_function
      2 import numpy as np
      3 import progressbar
      4 
      5 # Import helper functions
      6 from mlfromscratch.utils import train_test_split, standardize, to_categorical
      7 from mlfromscratch.utils import mean_squared_error, accuracy_score
      8 from mlfromscratch.deep_learning.loss_functions import SquareLoss, CrossEntropy
      9 from mlfromscratch.supervised_learning.decision_tree import RegressionTree
     10 from mlfromscratch.utils.misc import bar_widgets
     11 
     12 
     13 class GradientBoosting(object):
     14     """Super class of GradientBoostingClassifier and GradientBoostinRegressor. 
     15     Uses a collection of regression trees that trains on predicting the gradient
     16     of the loss function. 
     17 
     18     Parameters:
     19     -----------
     20     n_estimators: int
     21         The number of classification trees that are used.
     22     learning_rate: float
     23         The step length that will be taken when following the negative gradient during
     24         training.
     25     min_samples_split: int
     26         The minimum number of samples needed to make a split when building a tree.
     27     min_impurity: float
     28         The minimum impurity required to split the tree further. 
     29     max_depth: int
     30         The maximum depth of a tree.
     31     regression: boolean
     32         True or false depending on if we're doing regression or classification.
     33     """
     34     def __init__(self, n_estimators, learning_rate, min_samples_split,
     35                  min_impurity, max_depth, regression):
     36         self.n_estimators = n_estimators
     37         self.learning_rate = learning_rate
     38         self.min_samples_split = min_samples_split
     39         self.min_impurity = min_impurity
     40         self.max_depth = max_depth
     41         self.regression = regression
     42         self.bar = progressbar.ProgressBar(widgets=bar_widgets)
     43         
     44         # Square loss for regression
     45         # Log loss for classification
     46         self.loss = SquareLoss()
     47         if not self.regression:
     48             self.loss = CrossEntropy()
     49 
     50         # Initialize regression trees
     51         self.trees = []
     52         for _ in range(n_estimators):
     53             tree = RegressionTree(
     54                     min_samples_split=self.min_samples_split,
     55                     min_impurity=min_impurity,
     56                     max_depth=self.max_depth)
     57             self.trees.append(tree)
     58 
     59 
     60     def fit(self, X, y):
     61         y_pred = np.full(np.shape(y), np.mean(y, axis=0))
     62         for i in self.bar(range(self.n_estimators)):
     63             gradient = self.loss.gradient(y, y_pred)
     64             self.trees[i].fit(X, gradient)
     65             update = self.trees[i].predict(X)
     66             # Update y prediction
     67             y_pred -= np.multiply(self.learning_rate, update)
     68 
     69 
     70     def predict(self, X):
     71         y_pred = np.array([])
     72         # Make predictions
     73         for tree in self.trees:
     74             update = tree.predict(X)
     75             update = np.multiply(self.learning_rate, update)
     76             y_pred = -update if not y_pred.any() else y_pred - update
     77 
     78         if not self.regression:
     79             # Turn into probability distribution
     80             y_pred = np.exp(y_pred) / np.expand_dims(np.sum(np.exp(y_pred), axis=1), axis=1)
     81             # Set label to the value that maximizes probability
     82             y_pred = np.argmax(y_pred, axis=1)
     83         return y_pred
     84 
     85 
     86 class GradientBoostingRegressor(GradientBoosting):
     87     def __init__(self, n_estimators=200, learning_rate=0.5, min_samples_split=2,
     88                  min_var_red=1e-7, max_depth=4, debug=False):
     89         super(GradientBoostingRegressor, self).__init__(n_estimators=n_estimators, 
     90             learning_rate=learning_rate, 
     91             min_samples_split=min_samples_split, 
     92             min_impurity=min_var_red,
     93             max_depth=max_depth,
     94             regression=True)
     95 
     96 class GradientBoostingClassifier(GradientBoosting):
     97     def __init__(self, n_estimators=200, learning_rate=.5, min_samples_split=2,
     98                  min_info_gain=1e-7, max_depth=2, debug=False):
     99         super(GradientBoostingClassifier, self).__init__(n_estimators=n_estimators, 
    100             learning_rate=learning_rate, 
    101             min_samples_split=min_samples_split, 
    102             min_impurity=min_info_gain,
    103             max_depth=max_depth,
    104             regression=False)
    105 
    106     def fit(self, X, y):
    107         y = to_categorical(y)
    108         super(GradientBoostingClassifier, self).fit(X, y)
    109