ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
optimizers.py
(4774B)
1 import numpy as np
2 from mlfromscratch.utils import make_diagonal, normalize
3
4 # Optimizers for models that use gradient based methods for finding the
5 # weights that minimizes the loss.
6 # A great resource for understanding these methods:
7 # http://sebastianruder.com/optimizing-gradient-descent/index.html
8
9 class StochasticGradientDescent():
10 def __init__(self, learning_rate=0.01, momentum=0):
11 self.learning_rate = learning_rate
12 self.momentum = momentum
13 self.w_updt = None
14
15 def update(self, w, grad_wrt_w):
16 # If not initialized
17 if self.w_updt is None:
18 self.w_updt = np.zeros(np.shape(w))
19 # Use momentum if set
20 self.w_updt = self.momentum * self.w_updt + (1 - self.momentum) * grad_wrt_w
21 # Move against the gradient to minimize loss
22 return w - self.learning_rate * self.w_updt
23
24 class NesterovAcceleratedGradient():
25 def __init__(self, learning_rate=0.001, momentum=0.4):
26 self.learning_rate = learning_rate
27 self.momentum = momentum
28 self.w_updt = np.array([])
29
30 def update(self, w, grad_func):
31 # Calculate the gradient of the loss a bit further down the slope from w
32 approx_future_grad = np.clip(grad_func(w - self.momentum * self.w_updt), -1, 1)
33 # Initialize on first update
34 if not self.w_updt.any():
35 self.w_updt = np.zeros(np.shape(w))
36
37 self.w_updt = self.momentum * self.w_updt + self.learning_rate * approx_future_grad
38 # Move against the gradient to minimize loss
39 return w - self.w_updt
40
41 class Adagrad():
42 def __init__(self, learning_rate=0.01):
43 self.learning_rate = learning_rate
44 self.G = None # Sum of squares of the gradients
45 self.eps = 1e-8
46
47 def update(self, w, grad_wrt_w):
48 # If not initialized
49 if self.G is None:
50 self.G = np.zeros(np.shape(w))
51 # Add the square of the gradient of the loss function at w
52 self.G += np.power(grad_wrt_w, 2)
53 # Adaptive gradient with higher learning rate for sparse data
54 return w - self.learning_rate * grad_wrt_w / np.sqrt(self.G + self.eps)
55
56 class Adadelta():
57 def __init__(self, rho=0.95, eps=1e-6):
58 self.E_w_updt = None # Running average of squared parameter updates
59 self.E_grad = None # Running average of the squared gradient of w
60 self.w_updt = None # Parameter update
61 self.eps = eps
62 self.rho = rho
63
64 def update(self, w, grad_wrt_w):
65 # If not initialized
66 if self.w_updt is None:
67 self.w_updt = np.zeros(np.shape(w))
68 self.E_w_updt = np.zeros(np.shape(w))
69 self.E_grad = np.zeros(np.shape(grad_wrt_w))
70
71 # Update average of gradients at w
72 self.E_grad = self.rho * self.E_grad + (1 - self.rho) * np.power(grad_wrt_w, 2)
73
74 RMS_delta_w = np.sqrt(self.E_w_updt + self.eps)
75 RMS_grad = np.sqrt(self.E_grad + self.eps)
76
77 # Adaptive learning rate
78 adaptive_lr = RMS_delta_w / RMS_grad
79
80 # Calculate the update
81 self.w_updt = adaptive_lr * grad_wrt_w
82
83 # Update the running average of w updates
84 self.E_w_updt = self.rho * self.E_w_updt + (1 - self.rho) * np.power(self.w_updt, 2)
85
86 return w - self.w_updt
87
88 class RMSprop():
89 def __init__(self, learning_rate=0.01, rho=0.9):
90 self.learning_rate = learning_rate
91 self.Eg = None # Running average of the square gradients at w
92 self.eps = 1e-8
93 self.rho = rho
94
95 def update(self, w, grad_wrt_w):
96 # If not initialized
97 if self.Eg is None:
98 self.Eg = np.zeros(np.shape(grad_wrt_w))
99
100 self.Eg = self.rho * self.Eg + (1 - self.rho) * np.power(grad_wrt_w, 2)
101
102 # Divide the learning rate for a weight by a running average of the magnitudes of recent
103 # gradients for that weight
104 return w - self.learning_rate * grad_wrt_w / np.sqrt(self.Eg + self.eps)
105
106 class Adam():
107 def __init__(self, learning_rate=0.001, b1=0.9, b2=0.999):
108 self.learning_rate = learning_rate
109 self.eps = 1e-8
110 self.m = None
111 self.v = None
112 # Decay rates
113 self.b1 = b1
114 self.b2 = b2
115
116 def update(self, w, grad_wrt_w):
117 # If not initialized
118 if self.m is None:
119 self.m = np.zeros(np.shape(grad_wrt_w))
120 self.v = np.zeros(np.shape(grad_wrt_w))
121
122 self.m = self.b1 * self.m + (1 - self.b1) * grad_wrt_w
123 self.v = self.b2 * self.v + (1 - self.b2) * np.power(grad_wrt_w, 2)
124
125 m_hat = self.m / (1 - self.b1)
126 v_hat = self.v / (1 - self.b2)
127
128 self.w_updt = self.learning_rate * m_hat / (np.sqrt(v_hat) + self.eps)
129
130 return w - self.w_updt
131
132
133