ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

support_vector_machine.py

(4024B)


      1 
      2 from __future__ import division, print_function
      3 import numpy as np
      4 import cvxopt
      5 from mlfromscratch.utils import train_test_split, normalize, accuracy_score
      6 from mlfromscratch.utils.kernels import *
      7 from mlfromscratch.utils import Plot
      8 
      9 # Hide cvxopt output
     10 cvxopt.solvers.options['show_progress'] = False
     11 
     12 class SupportVectorMachine(object):
     13     """The Support Vector Machine classifier.
     14     Uses cvxopt to solve the quadratic optimization problem.
     15 
     16     Parameters:
     17     -----------
     18     C: float
     19         Penalty term.
     20     kernel: function
     21         Kernel function. Can be either polynomial, rbf or linear.
     22     power: int
     23         The degree of the polynomial kernel. Will be ignored by the other
     24         kernel functions.
     25     gamma: float
     26         Used in the rbf kernel function.
     27     coef: float
     28         Bias term used in the polynomial kernel function.
     29     """
     30     def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4):
     31         self.C = C
     32         self.kernel = kernel
     33         self.power = power
     34         self.gamma = gamma
     35         self.coef = coef
     36         self.lagr_multipliers = None
     37         self.support_vectors = None
     38         self.support_vector_labels = None
     39         self.intercept = None
     40 
     41     def fit(self, X, y):
     42 
     43         n_samples, n_features = np.shape(X)
     44 
     45         # Set gamma to 1/n_features by default
     46         if not self.gamma:
     47             self.gamma = 1 / n_features
     48 
     49         # Initialize kernel method with parameters
     50         self.kernel = self.kernel(
     51             power=self.power,
     52             gamma=self.gamma,
     53             coef=self.coef)
     54 
     55         # Calculate kernel matrix
     56         kernel_matrix = np.zeros((n_samples, n_samples))
     57         for i in range(n_samples):
     58             for j in range(n_samples):
     59                 kernel_matrix[i, j] = self.kernel(X[i], X[j])
     60 
     61         # Define the quadratic optimization problem
     62         P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
     63         q = cvxopt.matrix(np.ones(n_samples) * -1)
     64         A = cvxopt.matrix(y, (1, n_samples), tc='d')
     65         b = cvxopt.matrix(0, tc='d')
     66 
     67         if not self.C:
     68             G = cvxopt.matrix(np.identity(n_samples) * -1)
     69             h = cvxopt.matrix(np.zeros(n_samples))
     70         else:
     71             G_max = np.identity(n_samples) * -1
     72             G_min = np.identity(n_samples)
     73             G = cvxopt.matrix(np.vstack((G_max, G_min)))
     74             h_max = cvxopt.matrix(np.zeros(n_samples))
     75             h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
     76             h = cvxopt.matrix(np.vstack((h_max, h_min)))
     77 
     78         # Solve the quadratic optimization problem using cvxopt
     79         minimization = cvxopt.solvers.qp(P, q, G, h, A, b)
     80 
     81         # Lagrange multipliers
     82         lagr_mult = np.ravel(minimization['x'])
     83 
     84         # Extract support vectors
     85         # Get indexes of non-zero lagr. multipiers
     86         idx = lagr_mult > 1e-7
     87         # Get the corresponding lagr. multipliers
     88         self.lagr_multipliers = lagr_mult[idx]
     89         # Get the samples that will act as support vectors
     90         self.support_vectors = X[idx]
     91         # Get the corresponding labels
     92         self.support_vector_labels = y[idx]
     93 
     94         # Calculate intercept with first support vector
     95         self.intercept = self.support_vector_labels[0]
     96         for i in range(len(self.lagr_multipliers)):
     97             self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
     98                 i] * self.kernel(self.support_vectors[i], self.support_vectors[0])
     99 
    100     def predict(self, X):
    101         y_pred = []
    102         # Iterate through list of samples and make predictions
    103         for sample in X:
    104             prediction = 0
    105             # Determine the label of the sample by the support vectors
    106             for i in range(len(self.lagr_multipliers)):
    107                 prediction += self.lagr_multipliers[i] * self.support_vector_labels[
    108                     i] * self.kernel(self.support_vectors[i], sample)
    109             prediction += self.intercept
    110             y_pred.append(np.sign(prediction))
    111         return np.array(y_pred)