ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

ridge_regression.py

(2967B)


      1 from __future__ import print_function
      2 import matplotlib.pyplot as plt
      3 import numpy as np
      4 import pandas as pd
      5 # Import helper functions
      6 from mlfromscratch.supervised_learning import PolynomialRidgeRegression
      7 from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, Plot
      8 from mlfromscratch.utils import train_test_split, polynomial_features, mean_squared_error
      9 
     10 
     11 def main():
     12 
     13     # Load temperature data
     14     data = pd.read_csv('mlfromscratch/data/TempLinkoping2016.txt', sep="\t")
     15 
     16     time = np.atleast_2d(data["time"].values).T
     17     temp = data["temp"].values
     18 
     19     X = time # fraction of the year [0, 1]
     20     y = temp
     21 
     22     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
     23 
     24     poly_degree = 15
     25 
     26     # Finding regularization constant using cross validation
     27     lowest_error = float("inf")
     28     best_reg_factor = None
     29     print ("Finding regularization constant using cross validation:")
     30     k = 10
     31     for reg_factor in np.arange(0, 0.1, 0.01):
     32         cross_validation_sets = k_fold_cross_validation_sets(
     33             X_train, y_train, k=k)
     34         mse = 0
     35         for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
     36             model = PolynomialRidgeRegression(degree=poly_degree, 
     37                                             reg_factor=reg_factor,
     38                                             learning_rate=0.001,
     39                                             n_iterations=10000)
     40             model.fit(_X_train, _y_train)
     41             y_pred = model.predict(_X_test)
     42             _mse = mean_squared_error(_y_test, y_pred)
     43             mse += _mse
     44         mse /= k
     45 
     46         # Print the mean squared error
     47         print ("\tMean Squared Error: %s (regularization: %s)" % (mse, reg_factor))
     48 
     49         # Save reg. constant that gave lowest error
     50         if mse < lowest_error:
     51             best_reg_factor = reg_factor
     52             lowest_error = mse
     53 
     54     # Make final prediction
     55     model = PolynomialRidgeRegression(degree=poly_degree, 
     56                                     reg_factor=reg_factor,
     57                                     learning_rate=0.001,
     58                                     n_iterations=10000)
     59     model.fit(X_train, y_train)
     60 
     61     y_pred = model.predict(X_test)
     62     mse = mean_squared_error(y_test, y_pred)
     63     print ("Mean squared error: %s (given by reg. factor: %s)" % (mse, reg_factor))
     64 
     65     y_pred_line = model.predict(X)
     66 
     67     # Color map
     68     cmap = plt.get_cmap('viridis')
     69 
     70     # Plot the results
     71     m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
     72     m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
     73     plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction")
     74     plt.suptitle("Polynomial Ridge Regression")
     75     plt.title("MSE: %.2f" % mse, fontsize=10)
     76     plt.xlabel('Day')
     77     plt.ylabel('Temperature in Celcius')
     78     plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
     79     plt.show()
     80 
     81 if __name__ == "__main__":
     82     main()