ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
multi_class_lda.py
(2627B)
1 from __future__ import print_function, division
2 import matplotlib.pyplot as plt
3 import numpy as np
4 from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize
5
6
7 class MultiClassLDA():
8 """Enables dimensionality reduction for multiple
9 class distributions. It transforms the features space into a space where
10 the between class scatter is maximized and the within class scatter is
11 minimized.
12
13 Parameters:
14 -----------
15 solver: str
16 If 'svd' we use the pseudo-inverse to calculate the inverse of matrices
17 when doing the transformation.
18 """
19 def __init__(self, solver="svd"):
20 self.solver = solver
21
22 def _calculate_scatter_matrices(self, X, y):
23 n_features = np.shape(X)[1]
24 labels = np.unique(y)
25
26 # Within class scatter matrix:
27 # SW = sum{ (X_for_class - mean_of_X_for_class)^2 }
28 # <=> (n_samples_X_for_class - 1) * covar(X_for_class)
29 SW = np.empty((n_features, n_features))
30 for label in labels:
31 _X = X[y == label]
32 SW += (len(_X) - 1) * calculate_covariance_matrix(_X)
33
34 # Between class scatter:
35 # SB = sum{ n_samples_for_class * (mean_for_class - total_mean)^2 }
36 total_mean = np.mean(X, axis=0)
37 SB = np.empty((n_features, n_features))
38 for label in labels:
39 _X = X[y == label]
40 _mean = np.mean(_X, axis=0)
41 SB += len(_X) * (_mean - total_mean).dot((_mean - total_mean).T)
42
43 return SW, SB
44
45 def transform(self, X, y, n_components):
46 SW, SB = self._calculate_scatter_matrices(X, y)
47
48 # Determine SW^-1 * SB by calculating inverse of SW
49 A = np.linalg.inv(SW).dot(SB)
50
51 # Get eigenvalues and eigenvectors of SW^-1 * SB
52 eigenvalues, eigenvectors = np.linalg.eigh(A)
53
54 # Sort the eigenvalues and corresponding eigenvectors from largest
55 # to smallest eigenvalue and select the first n_components
56 idx = eigenvalues.argsort()[::-1]
57 eigenvalues = eigenvalues[idx][:n_components]
58 eigenvectors = eigenvectors[:, idx][:, :n_components]
59
60 # Project the data onto eigenvectors
61 X_transformed = X.dot(eigenvectors)
62
63 return X_transformed
64
65
66 def plot_in_2d(self, X, y, title=None):
67 """ Plot the dataset X and the corresponding labels y in 2D using the LDA
68 transformation."""
69 X_transformed = self.transform(X, y, n_components=2)
70 x1 = X_transformed[:, 0]
71 x2 = X_transformed[:, 1]
72 plt.scatter(x1, x2, c=y)
73 if title: plt.title(title)
74 plt.show()