ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
demo.py

(4677B)
      1 from __future__ import print_function
      2 from sklearn import datasets
      3 import numpy as np
      4 import math
      5 import matplotlib.pyplot as plt
      6 
      7 from mlfromscratch.utils import train_test_split, normalize, to_categorical, accuracy_score
      8 from mlfromscratch.deep_learning.optimizers import Adam
      9 from mlfromscratch.deep_learning.loss_functions import CrossEntropy
     10 from mlfromscratch.deep_learning.activation_functions import Softmax
     11 from mlfromscratch.utils.kernels import *
     12 from mlfromscratch.supervised_learning import *
     13 from mlfromscratch.deep_learning import *
     14 from mlfromscratch.unsupervised_learning import PCA
     15 from mlfromscratch.deep_learning.layers import Dense, Dropout, Conv2D, Flatten, Activation
     16 
     17 
     18 print ("+-------------------------------------------+")
     19 print ("|                                           |")
     20 print ("|       Machine Learning From Scratch       |")
     21 print ("|                                           |")
     22 print ("+-------------------------------------------+")
     23 
     24 
     25 # ...........
     26 #  LOAD DATA
     27 # ...........
     28 data = datasets.load_digits()
     29 digit1 = 1
     30 digit2 = 8
     31 idx = np.append(np.where(data.target == digit1)[0], np.where(data.target == digit2)[0])
     32 y = data.target[idx]
     33 # Change labels to {0, 1}
     34 y[y == digit1] = 0
     35 y[y == digit2] = 1
     36 X = data.data[idx]
     37 X = normalize(X)
     38 
     39 print ("Dataset: The Digit Dataset (digits %s and %s)" % (digit1, digit2))
     40 
     41 # ..........................
     42 #  DIMENSIONALITY REDUCTION
     43 # ..........................
     44 pca = PCA()
     45 X = pca.transform(X, n_components=5) # Reduce to 5 dimensions
     46 
     47 n_samples, n_features = np.shape(X)
     48 
     49 # ..........................
     50 #  TRAIN / TEST SPLIT
     51 # ..........................
     52 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
     53 # Rescaled labels {-1, 1}
     54 rescaled_y_train = 2*y_train - np.ones(np.shape(y_train))
     55 rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))
     56 
     57 # .......
     58 #  SETUP
     59 # .......
     60 adaboost = Adaboost(n_clf = 8)
     61 naive_bayes = NaiveBayes()
     62 knn = KNN(k=4)
     63 logistic_regression = LogisticRegression()
     64 mlp = NeuralNetwork(optimizer=Adam(), 
     65                     loss=CrossEntropy)
     66 mlp.add(Dense(input_shape=(n_features,), n_units=64))
     67 mlp.add(Activation('relu'))
     68 mlp.add(Dense(n_units=64))
     69 mlp.add(Activation('relu'))
     70 mlp.add(Dense(n_units=2))   
     71 mlp.add(Activation('softmax'))
     72 perceptron = Perceptron()
     73 decision_tree = ClassificationTree()
     74 random_forest = RandomForest(n_estimators=50)
     75 support_vector_machine = SupportVectorMachine()
     76 lda = LDA()
     77 gbc = GradientBoostingClassifier(n_estimators=50, learning_rate=.9, max_depth=2)
     78 xgboost = XGBoost(n_estimators=50, learning_rate=0.5)
     79 
     80 # ........
     81 #  TRAIN
     82 # ........
     83 print ("Training:")
     84 print ("- Adaboost")
     85 adaboost.fit(X_train, rescaled_y_train)
     86 print ("- Decision Tree")
     87 decision_tree.fit(X_train, y_train)
     88 print ("- Gradient Boosting")
     89 gbc.fit(X_train, y_train)
     90 print ("- LDA")
     91 lda.fit(X_train, y_train)
     92 print ("- Logistic Regression")
     93 logistic_regression.fit(X_train, y_train)
     94 print ("- Multilayer Perceptron")
     95 mlp.fit(X_train, to_categorical(y_train), n_epochs=300, batch_size=50)
     96 print ("- Naive Bayes")
     97 naive_bayes.fit(X_train, y_train)
     98 print ("- Perceptron")
     99 perceptron.fit(X_train, to_categorical(y_train))
    100 print ("- Random Forest")
    101 random_forest.fit(X_train, y_train)
    102 print ("- Support Vector Machine")
    103 support_vector_machine.fit(X_train, rescaled_y_train)
    104 print ("- XGBoost")
    105 xgboost.fit(X_train, y_train)
    106 
    107 
    108 
    109 # .........
    110 #  PREDICT
    111 # .........
    112 y_pred = {}
    113 y_pred["Adaboost"] = adaboost.predict(X_test)
    114 y_pred["Gradient Boosting"] = gbc.predict(X_test)
    115 y_pred["Naive Bayes"] = naive_bayes.predict(X_test)
    116 y_pred["K Nearest Neighbors"] = knn.predict(X_test, X_train, y_train)
    117 y_pred["Logistic Regression"] = logistic_regression.predict(X_test)
    118 y_pred["LDA"] = lda.predict(X_test)
    119 y_pred["Multilayer Perceptron"] = np.argmax(mlp.predict(X_test), axis=1)
    120 y_pred["Perceptron"] = np.argmax(perceptron.predict(X_test), axis=1)
    121 y_pred["Decision Tree"] = decision_tree.predict(X_test)
    122 y_pred["Random Forest"] = random_forest.predict(X_test)
    123 y_pred["Support Vector Machine"] = support_vector_machine.predict(X_test)
    124 y_pred["XGBoost"] = xgboost.predict(X_test)
    125 
    126 # ..........
    127 #  ACCURACY
    128 # ..........
    129 print ("Accuracy:")
    130 for clf in y_pred:
    131     # Rescaled {-1 1}
    132     if clf == "Adaboost" or clf == "Support Vector Machine":
    133         print ("\t%-23s: %.5f" %(clf, accuracy_score(rescaled_y_test, y_pred[clf])))
    134     # Categorical
    135     else:
    136         print ("\t%-23s: %.5f" %(clf, accuracy_score(y_test, y_pred[clf])))
    137 
    138 # .......
    139 #  PLOT
    140 # .......
    141 plt.scatter(X_test[:,0], X_test[:,1], c=y_test)
    142 plt.ylabel("Principal Component 2")
    143 plt.xlabel("Principal Component 1")
    144 plt.title("The Digit Dataset (digits %s and %s)" % (digit1, digit2))
    145 plt.show()
    146 
    147