ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
demo.py
(4677B)
1 from __future__ import print_function
2 from sklearn import datasets
3 import numpy as np
4 import math
5 import matplotlib.pyplot as plt
6
7 from mlfromscratch.utils import train_test_split, normalize, to_categorical, accuracy_score
8 from mlfromscratch.deep_learning.optimizers import Adam
9 from mlfromscratch.deep_learning.loss_functions import CrossEntropy
10 from mlfromscratch.deep_learning.activation_functions import Softmax
11 from mlfromscratch.utils.kernels import *
12 from mlfromscratch.supervised_learning import *
13 from mlfromscratch.deep_learning import *
14 from mlfromscratch.unsupervised_learning import PCA
15 from mlfromscratch.deep_learning.layers import Dense, Dropout, Conv2D, Flatten, Activation
16
17
18 print ("+-------------------------------------------+")
19 print ("| |")
20 print ("| Machine Learning From Scratch |")
21 print ("| |")
22 print ("+-------------------------------------------+")
23
24
25 # ...........
26 # LOAD DATA
27 # ...........
28 data = datasets.load_digits()
29 digit1 = 1
30 digit2 = 8
31 idx = np.append(np.where(data.target == digit1)[0], np.where(data.target == digit2)[0])
32 y = data.target[idx]
33 # Change labels to {0, 1}
34 y[y == digit1] = 0
35 y[y == digit2] = 1
36 X = data.data[idx]
37 X = normalize(X)
38
39 print ("Dataset: The Digit Dataset (digits %s and %s)" % (digit1, digit2))
40
41 # ..........................
42 # DIMENSIONALITY REDUCTION
43 # ..........................
44 pca = PCA()
45 X = pca.transform(X, n_components=5) # Reduce to 5 dimensions
46
47 n_samples, n_features = np.shape(X)
48
49 # ..........................
50 # TRAIN / TEST SPLIT
51 # ..........................
52 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
53 # Rescaled labels {-1, 1}
54 rescaled_y_train = 2*y_train - np.ones(np.shape(y_train))
55 rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))
56
57 # .......
58 # SETUP
59 # .......
60 adaboost = Adaboost(n_clf = 8)
61 naive_bayes = NaiveBayes()
62 knn = KNN(k=4)
63 logistic_regression = LogisticRegression()
64 mlp = NeuralNetwork(optimizer=Adam(),
65 loss=CrossEntropy)
66 mlp.add(Dense(input_shape=(n_features,), n_units=64))
67 mlp.add(Activation('relu'))
68 mlp.add(Dense(n_units=64))
69 mlp.add(Activation('relu'))
70 mlp.add(Dense(n_units=2))
71 mlp.add(Activation('softmax'))
72 perceptron = Perceptron()
73 decision_tree = ClassificationTree()
74 random_forest = RandomForest(n_estimators=50)
75 support_vector_machine = SupportVectorMachine()
76 lda = LDA()
77 gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2)
78 xgboost = XGBoost(n_estimators=50, learning_rate=0.5)
79
80 # ........
81 # TRAIN
82 # ........
83 print ("Training:")
84 print ("- Adaboost")
85 adaboost.fit(X_train, rescaled_y_train)
86 print ("- Decision Tree")
87 decision_tree.fit(X_train, y_train)
88 print ("- Gradient Boosting")
89 gbc.fit(X_train, y_train)
90 print ("- LDA")
91 lda.fit(X_train, y_train)
92 print ("- Logistic Regression")
93 logistic_regression.fit(X_train, y_train)
94 print ("- Multilayer Perceptron")
95 mlp.fit(X_train, to_categorical(y_train), n_epochs=300, batch_size=50)
96 print ("- Naive Bayes")
97 naive_bayes.fit(X_train, y_train)
98 print ("- Perceptron")
99 perceptron.fit(X_train, to_categorical(y_train))
100 print ("- Random Forest")
101 random_forest.fit(X_train, y_train)
102 print ("- Support Vector Machine")
103 support_vector_machine.fit(X_train, rescaled_y_train)
104 print ("- XGBoost")
105 xgboost.fit(X_train, y_train)
106
107
108
109 # .........
110 # PREDICT
111 # .........
112 y_pred = {}
113 y_pred["Adaboost"] = adaboost.predict(X_test)
114 y_pred["Gradient Boosting"] = gbc.predict(X_test)
115 y_pred["Naive Bayes"] = naive_bayes.predict(X_test)
116 y_pred["K Nearest Neighbors"] = knn.predict(X_test, X_train, y_train)
117 y_pred["Logistic Regression"] = logistic_regression.predict(X_test)
118 y_pred["LDA"] = lda.predict(X_test)
119 y_pred["Multilayer Perceptron"] = np.argmax(mlp.predict(X_test), axis=1)
120 y_pred["Perceptron"] = np.argmax(perceptron.predict(X_test), axis=1)
121 y_pred["Decision Tree"] = decision_tree.predict(X_test)
122 y_pred["Random Forest"] = random_forest.predict(X_test)
123 y_pred["Support Vector Machine"] = support_vector_machine.predict(X_test)
124 y_pred["XGBoost"] = xgboost.predict(X_test)
125
126 # ..........
127 # ACCURACY
128 # ..........
129 print ("Accuracy:")
130 for clf in y_pred:
131 # Rescaled {-1 1}
132 if clf == "Adaboost" or clf == "Support Vector Machine":
133 print ("\t%-23s: %.5f" %(clf, accuracy_score(rescaled_y_test, y_pred[clf])))
134 # Categorical
135 else:
136 print ("\t%-23s: %.5f" %(clf, accuracy_score(y_test, y_pred[clf])))
137
138 # .......
139 # PLOT
140 # .......
141 plt.scatter(X_test[:,0], X_test[:,1], c=y_test)
142 plt.ylabel("Principal Component 2")
143 plt.xlabel("Principal Component 1")
144 plt.title("The Digit Dataset (digits %s and %s)" % (digit1, digit2))
145 plt.show()
146
147