ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
support_vector_machine.py
(4024B)
1
2 from __future__ import division, print_function
3 import numpy as np
4 import cvxopt
5 from mlfromscratch.utils import train_test_split, normalize, accuracy_score
6 from mlfromscratch.utils.kernels import *
7 from mlfromscratch.utils import Plot
8
9 # Hide cvxopt output
10 cvxopt.solvers.options['show_progress'] = False
11
12 class SupportVectorMachine(object):
13 """The Support Vector Machine classifier.
14 Uses cvxopt to solve the quadratic optimization problem.
15
16 Parameters:
17 -----------
18 C: float
19 Penalty term.
20 kernel: function
21 Kernel function. Can be either polynomial, rbf or linear.
22 power: int
23 The degree of the polynomial kernel. Will be ignored by the other
24 kernel functions.
25 gamma: float
26 Used in the rbf kernel function.
27 coef: float
28 Bias term used in the polynomial kernel function.
29 """
30 def __init__(self, C=1, kernel=rbf_kernel, power=4, gamma=None, coef=4):
31 self.C = C
32 self.kernel = kernel
33 self.power = power
34 self.gamma = gamma
35 self.coef = coef
36 self.lagr_multipliers = None
37 self.support_vectors = None
38 self.support_vector_labels = None
39 self.intercept = None
40
41 def fit(self, X, y):
42
43 n_samples, n_features = np.shape(X)
44
45 # Set gamma to 1/n_features by default
46 if not self.gamma:
47 self.gamma = 1 / n_features
48
49 # Initialize kernel method with parameters
50 self.kernel = self.kernel(
51 power=self.power,
52 gamma=self.gamma,
53 coef=self.coef)
54
55 # Calculate kernel matrix
56 kernel_matrix = np.zeros((n_samples, n_samples))
57 for i in range(n_samples):
58 for j in range(n_samples):
59 kernel_matrix[i, j] = self.kernel(X[i], X[j])
60
61 # Define the quadratic optimization problem
62 P = cvxopt.matrix(np.outer(y, y) * kernel_matrix, tc='d')
63 q = cvxopt.matrix(np.ones(n_samples) * -1)
64 A = cvxopt.matrix(y, (1, n_samples), tc='d')
65 b = cvxopt.matrix(0, tc='d')
66
67 if not self.C:
68 G = cvxopt.matrix(np.identity(n_samples) * -1)
69 h = cvxopt.matrix(np.zeros(n_samples))
70 else:
71 G_max = np.identity(n_samples) * -1
72 G_min = np.identity(n_samples)
73 G = cvxopt.matrix(np.vstack((G_max, G_min)))
74 h_max = cvxopt.matrix(np.zeros(n_samples))
75 h_min = cvxopt.matrix(np.ones(n_samples) * self.C)
76 h = cvxopt.matrix(np.vstack((h_max, h_min)))
77
78 # Solve the quadratic optimization problem using cvxopt
79 minimization = cvxopt.solvers.qp(P, q, G, h, A, b)
80
81 # Lagrange multipliers
82 lagr_mult = np.ravel(minimization['x'])
83
84 # Extract support vectors
85 # Get indexes of non-zero lagr. multipiers
86 idx = lagr_mult > 1e-7
87 # Get the corresponding lagr. multipliers
88 self.lagr_multipliers = lagr_mult[idx]
89 # Get the samples that will act as support vectors
90 self.support_vectors = X[idx]
91 # Get the corresponding labels
92 self.support_vector_labels = y[idx]
93
94 # Calculate intercept with first support vector
95 self.intercept = self.support_vector_labels[0]
96 for i in range(len(self.lagr_multipliers)):
97 self.intercept -= self.lagr_multipliers[i] * self.support_vector_labels[
98 i] * self.kernel(self.support_vectors[i], self.support_vectors[0])
99
100 def predict(self, X):
101 y_pred = []
102 # Iterate through list of samples and make predictions
103 for sample in X:
104 prediction = 0
105 # Determine the label of the sample by the support vectors
106 for i in range(len(self.lagr_multipliers)):
107 prediction += self.lagr_multipliers[i] * self.support_vector_labels[
108 i] * self.kernel(self.support_vectors[i], sample)
109 prediction += self.intercept
110 y_pred.append(np.sign(prediction))
111 return np.array(y_pred)