ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

multi_class_lda.py

(2627B)


      1 from __future__ import print_function, division
      2 import matplotlib.pyplot as plt
      3 import numpy as np
      4 from mlfromscratch.utils import calculate_covariance_matrix, normalize, standardize
      5 
      6 
      7 class MultiClassLDA():
      8     """Enables dimensionality reduction for multiple
      9     class distributions. It transforms the features space into a space where
     10     the between class scatter is maximized and the within class scatter is
     11     minimized.
     12 
     13     Parameters:
     14     -----------
     15     solver: str
     16         If 'svd' we use the pseudo-inverse to calculate the inverse of matrices
     17         when doing the transformation.
     18     """
     19     def __init__(self, solver="svd"):
     20         self.solver = solver
     21 
     22     def _calculate_scatter_matrices(self, X, y):
     23         n_features = np.shape(X)[1]
     24         labels = np.unique(y)
     25 
     26         # Within class scatter matrix:
     27         # SW = sum{ (X_for_class - mean_of_X_for_class)^2 }
     28         #   <=> (n_samples_X_for_class - 1) * covar(X_for_class)
     29         SW = np.empty((n_features, n_features))
     30         for label in labels:
     31             _X = X[y == label]
     32             SW += (len(_X) - 1) * calculate_covariance_matrix(_X)
     33 
     34         # Between class scatter:
     35         # SB = sum{ n_samples_for_class * (mean_for_class - total_mean)^2 }
     36         total_mean = np.mean(X, axis=0)
     37         SB = np.empty((n_features, n_features))
     38         for label in labels:
     39             _X = X[y == label]
     40             _mean = np.mean(_X, axis=0)
     41             SB += len(_X) * (_mean - total_mean).dot((_mean - total_mean).T)
     42 
     43         return SW, SB
     44 
     45     def transform(self, X, y, n_components):
     46         SW, SB = self._calculate_scatter_matrices(X, y)
     47 
     48         # Determine SW^-1 * SB by calculating inverse of SW
     49         A = np.linalg.inv(SW).dot(SB)
     50 
     51         # Get eigenvalues and eigenvectors of SW^-1 * SB
     52         eigenvalues, eigenvectors = np.linalg.eigh(A)
     53 
     54         # Sort the eigenvalues and corresponding eigenvectors from largest
     55         # to smallest eigenvalue and select the first n_components
     56         idx = eigenvalues.argsort()[::-1]
     57         eigenvalues = eigenvalues[idx][:n_components]
     58         eigenvectors = eigenvectors[:, idx][:, :n_components]
     59 
     60         # Project the data onto eigenvectors
     61         X_transformed = X.dot(eigenvectors)
     62 
     63         return X_transformed
     64 
     65 
     66     def plot_in_2d(self, X, y, title=None):
     67         """ Plot the dataset X and the corresponding labels y in 2D using the LDA
     68         transformation."""
     69         X_transformed = self.transform(X, y, n_components=2)
     70         x1 = X_transformed[:, 0]
     71         x2 = X_transformed[:, 1]
     72         plt.scatter(x1, x2, c=y)
     73         if title: plt.title(title)
     74         plt.show()