ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

data_operation.py

(2240B)


      1 from __future__ import division
      2 import numpy as np
      3 import math
      4 import sys
      5 
      6 
      7 def calculate_entropy(y):
      8     """ Calculate the entropy of label array y """
      9     log2 = lambda x: math.log(x) / math.log(2)
     10     unique_labels = np.unique(y)
     11     entropy = 0
     12     for label in unique_labels:
     13         count = len(y[y == label])
     14         p = count / len(y)
     15         entropy += -p * log2(p)
     16     return entropy
     17 
     18 
     19 def mean_squared_error(y_true, y_pred):
     20     """ Returns the mean squared error between y_true and y_pred """
     21     mse = np.mean(np.power(y_true - y_pred, 2))
     22     return mse
     23 
     24 
     25 def calculate_variance(X):
     26     """ Return the variance of the features in dataset X """
     27     mean = np.ones(np.shape(X)) * X.mean(0)
     28     n_samples = np.shape(X)[0]
     29     variance = (1 / n_samples) * np.diag((X - mean).T.dot(X - mean))
     30     
     31     return variance
     32 
     33 
     34 def calculate_std_dev(X):
     35     """ Calculate the standard deviations of the features in dataset X """
     36     std_dev = np.sqrt(calculate_variance(X))
     37     return std_dev
     38 
     39 
     40 def euclidean_distance(x1, x2):
     41     """ Calculates the l2 distance between two vectors """
     42     distance = 0
     43     # Squared distance between each coordinate
     44     for i in range(len(x1)):
     45         distance += pow((x1[i] - x2[i]), 2)
     46     return math.sqrt(distance)
     47 
     48 
     49 def accuracy_score(y_true, y_pred):
     50     """ Compare y_true to y_pred and return the accuracy """
     51     accuracy = np.sum(y_true == y_pred, axis=0) / len(y_true)
     52     return accuracy
     53 
     54 
     55 def calculate_covariance_matrix(X, Y=None):
     56     """ Calculate the covariance matrix for the dataset X """
     57     if Y is None:
     58         Y = X
     59     n_samples = np.shape(X)[0]
     60     covariance_matrix = (1 / (n_samples-1)) * (X - X.mean(axis=0)).T.dot(Y - Y.mean(axis=0))
     61 
     62     return np.array(covariance_matrix, dtype=float)
     63  
     64 
     65 def calculate_correlation_matrix(X, Y=None):
     66     """ Calculate the correlation matrix for the dataset X """
     67     if Y is None:
     68         Y = X
     69     n_samples = np.shape(X)[0]
     70     covariance = (1 / n_samples) * (X - X.mean(0)).T.dot(Y - Y.mean(0))
     71     std_dev_X = np.expand_dims(calculate_std_dev(X), 1)
     72     std_dev_y = np.expand_dims(calculate_std_dev(Y), 1)
     73     correlation_matrix = np.divide(covariance, std_dev_X.dot(std_dev_y.T))
     74 
     75     return np.array(correlation_matrix, dtype=float)