ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
polynomial_regression.py
(2974B)
1 from __future__ import print_function
2 import matplotlib.pyplot as plt
3 import numpy as np
4 import pandas as pd
5 # Import helper functions
6 from mlfromscratch.supervised_learning import PolynomialRidgeRegression
7 from mlfromscratch.utils import k_fold_cross_validation_sets, normalize, mean_squared_error
8 from mlfromscratch.utils import train_test_split, polynomial_features, Plot
9
10
11 def main():
12
13 # Load temperature data
14 data = pd.read_csv('../data/TempLinkoping2016.txt', sep="\t")
15
16 time = np.atleast_2d(data["time"].values).T
17 temp = data["temp"].values
18
19 X = time # fraction of the year [0, 1]
20 y = temp
21
22 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)
23
24 poly_degree = 15
25
26 # Finding regularization constant using cross validation
27 lowest_error = float("inf")
28 best_reg_factor = None
29 print ("Finding regularization constant using cross validation:")
30 k = 10
31 for reg_factor in np.arange(0, 0.1, 0.01):
32 cross_validation_sets = k_fold_cross_validation_sets(
33 X_train, y_train, k=k)
34 mse = 0
35 for _X_train, _X_test, _y_train, _y_test in cross_validation_sets:
36 model = PolynomialRidgeRegression(degree=poly_degree,
37 reg_factor=reg_factor,
38 learning_rate=0.001,
39 n_iterations=10000)
40 model.fit(_X_train, _y_train)
41 y_pred = model.predict(_X_test)
42 _mse = mean_squared_error(_y_test, y_pred)
43 mse += _mse
44 mse /= k
45
46 # Print the mean squared error
47 print ("\tMean Squared Error: %s (regularization: %s)" % (mse, reg_factor))
48
49 # Save reg. constant that gave lowest error
50 if mse < lowest_error:
51 best_reg_factor = reg_factor
52 lowest_error = mse
53
54 # Make final prediction
55 model = PolynomialRidgeRegression(degree=poly_degree,
56 reg_factor=best_reg_factor,
57 learning_rate=0.001,
58 n_iterations=10000)
59 model.fit(X_train, y_train)
60 y_pred = model.predict(X_test)
61 mse = mean_squared_error(y_test, y_pred)
62 print ("Mean squared error: %s (given by reg. factor: %s)" % (lowest_error, best_reg_factor))
63
64 y_pred_line = model.predict(X)
65
66 # Color map
67 cmap = plt.get_cmap('viridis')
68
69 # Plot the results
70 m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)
71 m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)
72 plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction")
73 plt.suptitle("Polynomial Ridge Regression")
74 plt.title("MSE: %.2f" % mse, fontsize=10)
75 plt.xlabel('Day')
76 plt.ylabel('Temperature in Celcius')
77 plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right')
78 plt.show()
79
80 if __name__ == "__main__":
81 main()