ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
layers.py
(27518B)
1
2 from __future__ import print_function, division
3 import math
4 import numpy as np
5 import copy
6 from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU
7 from mlfromscratch.deep_learning.activation_functions import TanH, ELU, SELU, Softmax
8
9
10 class Layer(object):
11
12 def set_input_shape(self, shape):
13 """ Sets the shape that the layer expects of the input in the forward
14 pass method """
15 self.input_shape = shape
16
17 def layer_name(self):
18 """ The name of the layer. Used in model summary. """
19 return self.__class__.__name__
20
21 def parameters(self):
22 """ The number of trainable parameters used by the layer """
23 return 0
24
25 def forward_pass(self, X, training):
26 """ Propogates the signal forward in the network """
27 raise NotImplementedError()
28
29 def backward_pass(self, accum_grad):
30 """ Propogates the accumulated gradient backwards in the network.
31 If the has trainable weights then these weights are also tuned in this method.
32 As input (accum_grad) it receives the gradient with respect to the output of the layer and
33 returns the gradient with respect to the output of the previous layer. """
34 raise NotImplementedError()
35
36 def output_shape(self):
37 """ The shape of the output produced by forward_pass """
38 raise NotImplementedError()
39
40
41 class Dense(Layer):
42 """A fully-connected NN layer.
43 Parameters:
44 -----------
45 n_units: int
46 The number of neurons in the layer.
47 input_shape: tuple
48 The expected input shape of the layer. For dense layers a single digit specifying
49 the number of features of the input. Must be specified if it is the first layer in
50 the network.
51 """
52 def __init__(self, n_units, input_shape=None):
53 self.layer_input = None
54 self.input_shape = input_shape
55 self.n_units = n_units
56 self.trainable = True
57 self.W = None
58 self.w0 = None
59
60 def initialize(self, optimizer):
61 # Initialize the weights
62 limit = 1 / math.sqrt(self.input_shape[0])
63 self.W = np.random.uniform(-limit, limit, (self.input_shape[0], self.n_units))
64 self.w0 = np.zeros((1, self.n_units))
65 # Weight optimizers
66 self.W_opt = copy.copy(optimizer)
67 self.w0_opt = copy.copy(optimizer)
68
69 def parameters(self):
70 return np.prod(self.W.shape) + np.prod(self.w0.shape)
71
72 def forward_pass(self, X, training=True):
73 self.layer_input = X
74 return X.dot(self.W) + self.w0
75
76 def backward_pass(self, accum_grad):
77 # Save weights used during forwards pass
78 W = self.W
79
80 if self.trainable:
81 # Calculate gradient w.r.t layer weights
82 grad_w = self.layer_input.T.dot(accum_grad)
83 grad_w0 = np.sum(accum_grad, axis=0, keepdims=True)
84
85 # Update the layer weights
86 self.W = self.W_opt.update(self.W, grad_w)
87 self.w0 = self.w0_opt.update(self.w0, grad_w0)
88
89 # Return accumulated gradient for next layer
90 # Calculated based on the weights used during the forward pass
91 accum_grad = accum_grad.dot(W.T)
92 return accum_grad
93
94 def output_shape(self):
95 return (self.n_units, )
96
97
98 class RNN(Layer):
99 """A Vanilla Fully-Connected Recurrent Neural Network layer.
100
101 Parameters:
102 -----------
103 n_units: int
104 The number of hidden states in the layer.
105 activation: string
106 The name of the activation function which will be applied to the output of each state.
107 bptt_trunc: int
108 Decides how many time steps the gradient should be propagated backwards through states
109 given the loss gradient for time step t.
110 input_shape: tuple
111 The expected input shape of the layer. For dense layers a single digit specifying
112 the number of features of the input. Must be specified if it is the first layer in
113 the network.
114
115 Reference:
116 http://www.wildml.com/2015/09/recurrent-neural-networks-tutorial-part-2-implementing-a-language-model-rnn-with-python-numpy-and-theano/
117 """
118 def __init__(self, n_units, activation='tanh', bptt_trunc=5, input_shape=None):
119 self.input_shape = input_shape
120 self.n_units = n_units
121 self.activation = activation_functions[activation]()
122 self.trainable = True
123 self.bptt_trunc = bptt_trunc
124 self.W = None # Weight of the previous state
125 self.V = None # Weight of the output
126 self.U = None # Weight of the input
127
128 def initialize(self, optimizer):
129 timesteps, input_dim = self.input_shape
130 # Initialize the weights
131 limit = 1 / math.sqrt(input_dim)
132 self.U = np.random.uniform(-limit, limit, (self.n_units, input_dim))
133 limit = 1 / math.sqrt(self.n_units)
134 self.V = np.random.uniform(-limit, limit, (input_dim, self.n_units))
135 self.W = np.random.uniform(-limit, limit, (self.n_units, self.n_units))
136 # Weight optimizers
137 self.U_opt = copy.copy(optimizer)
138 self.V_opt = copy.copy(optimizer)
139 self.W_opt = copy.copy(optimizer)
140
141 def parameters(self):
142 return np.prod(self.W.shape) + np.prod(self.U.shape) + np.prod(self.V.shape)
143
144 def forward_pass(self, X, training=True):
145 self.layer_input = X
146 batch_size, timesteps, input_dim = X.shape
147
148 # Save these values for use in backprop.
149 self.state_input = np.zeros((batch_size, timesteps, self.n_units))
150 self.states = np.zeros((batch_size, timesteps+1, self.n_units))
151 self.outputs = np.zeros((batch_size, timesteps, input_dim))
152
153 # Set last time step to zero for calculation of the state_input at time step zero
154 self.states[:, -1] = np.zeros((batch_size, self.n_units))
155 for t in range(timesteps):
156 # Input to state_t is the current input and output of previous states
157 self.state_input[:, t] = X[:, t].dot(self.U.T) + self.states[:, t-1].dot(self.W.T)
158 self.states[:, t] = self.activation(self.state_input[:, t])
159 self.outputs[:, t] = self.states[:, t].dot(self.V.T)
160
161 return self.outputs
162
163 def backward_pass(self, accum_grad):
164 _, timesteps, _ = accum_grad.shape
165
166 # Variables where we save the accumulated gradient w.r.t each parameter
167 grad_U = np.zeros_like(self.U)
168 grad_V = np.zeros_like(self.V)
169 grad_W = np.zeros_like(self.W)
170 # The gradient w.r.t the layer input.
171 # Will be passed on to the previous layer in the network
172 accum_grad_next = np.zeros_like(accum_grad)
173
174 # Back Propagation Through Time
175 for t in reversed(range(timesteps)):
176 # Update gradient w.r.t V at time step t
177 grad_V += accum_grad[:, t].T.dot(self.states[:, t])
178 # Calculate the gradient w.r.t the state input
179 grad_wrt_state = accum_grad[:, t].dot(self.V) * self.activation.gradient(self.state_input[:, t])
180 # Gradient w.r.t the layer input
181 accum_grad_next[:, t] = grad_wrt_state.dot(self.U)
182 # Update gradient w.r.t W and U by backprop. from time step t for at most
183 # self.bptt_trunc number of time steps
184 for t_ in reversed(np.arange(max(0, t - self.bptt_trunc), t+1)):
185 grad_U += grad_wrt_state.T.dot(self.layer_input[:, t_])
186 grad_W += grad_wrt_state.T.dot(self.states[:, t_-1])
187 # Calculate gradient w.r.t previous state
188 grad_wrt_state = grad_wrt_state.dot(self.W) * self.activation.gradient(self.state_input[:, t_-1])
189
190 # Update weights
191 self.U = self.U_opt.update(self.U, grad_U)
192 self.V = self.V_opt.update(self.V, grad_V)
193 self.W = self.W_opt.update(self.W, grad_W)
194
195 return accum_grad_next
196
197 def output_shape(self):
198 return self.input_shape
199
200 class Conv2D(Layer):
201 """A 2D Convolution Layer.
202
203 Parameters:
204 -----------
205 n_filters: int
206 The number of filters that will convolve over the input matrix. The number of channels
207 of the output shape.
208 filter_shape: tuple
209 A tuple (filter_height, filter_width).
210 input_shape: tuple
211 The shape of the expected input of the layer. (batch_size, channels, height, width)
212 Only needs to be specified for first layer in the network.
213 padding: string
214 Either 'same' or 'valid'. 'same' results in padding being added so that the output height and width
215 matches the input height and width. For 'valid' no padding is added.
216 stride: int
217 The stride length of the filters during the convolution over the input.
218 """
219 def __init__(self, n_filters, filter_shape, input_shape=None, padding='same', stride=1):
220 self.n_filters = n_filters
221 self.filter_shape = filter_shape
222 self.padding = padding
223 self.stride = stride
224 self.input_shape = input_shape
225 self.trainable = True
226
227 def initialize(self, optimizer):
228 # Initialize the weights
229 filter_height, filter_width = self.filter_shape
230 channels = self.input_shape[0]
231 limit = 1 / math.sqrt(np.prod(self.filter_shape))
232 self.W = np.random.uniform(-limit, limit, size=(self.n_filters, channels, filter_height, filter_width))
233 self.w0 = np.zeros((self.n_filters, 1))
234 # Weight optimizers
235 self.W_opt = copy.copy(optimizer)
236 self.w0_opt = copy.copy(optimizer)
237
238 def parameters(self):
239 return np.prod(self.W.shape) + np.prod(self.w0.shape)
240
241 def forward_pass(self, X, training=True):
242 batch_size, channels, height, width = X.shape
243 self.layer_input = X
244 # Turn image shape into column shape
245 # (enables dot product between input and weights)
246 self.X_col = image_to_column(X, self.filter_shape, stride=self.stride, output_shape=self.padding)
247 # Turn weights into column shape
248 self.W_col = self.W.reshape((self.n_filters, -1))
249 # Calculate output
250 output = self.W_col.dot(self.X_col) + self.w0
251 # Reshape into (n_filters, out_height, out_width, batch_size)
252 output = output.reshape(self.output_shape() + (batch_size, ))
253 # Redistribute axises so that batch size comes first
254 return output.transpose(3,0,1,2)
255
256 def backward_pass(self, accum_grad):
257 # Reshape accumulated gradient into column shape
258 accum_grad = accum_grad.transpose(1, 2, 3, 0).reshape(self.n_filters, -1)
259
260 if self.trainable:
261 # Take dot product between column shaped accum. gradient and column shape
262 # layer input to determine the gradient at the layer with respect to layer weights
263 grad_w = accum_grad.dot(self.X_col.T).reshape(self.W.shape)
264 # The gradient with respect to bias terms is the sum similarly to in Dense layer
265 grad_w0 = np.sum(accum_grad, axis=1, keepdims=True)
266
267 # Update the layers weights
268 self.W = self.W_opt.update(self.W, grad_w)
269 self.w0 = self.w0_opt.update(self.w0, grad_w0)
270
271 # Recalculate the gradient which will be propogated back to prev. layer
272 accum_grad = self.W_col.T.dot(accum_grad)
273 # Reshape from column shape to image shape
274 accum_grad = column_to_image(accum_grad,
275 self.layer_input.shape,
276 self.filter_shape,
277 stride=self.stride,
278 output_shape=self.padding)
279
280 return accum_grad
281
282 def output_shape(self):
283 channels, height, width = self.input_shape
284 pad_h, pad_w = determine_padding(self.filter_shape, output_shape=self.padding)
285 output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride + 1
286 output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride + 1
287 return self.n_filters, int(output_height), int(output_width)
288
289
290 class BatchNormalization(Layer):
291 """Batch normalization.
292 """
293 def __init__(self, momentum=0.99):
294 self.momentum = momentum
295 self.trainable = True
296 self.eps = 0.01
297 self.running_mean = None
298 self.running_var = None
299
300 def initialize(self, optimizer):
301 # Initialize the parameters
302 self.gamma = np.ones(self.input_shape)
303 self.beta = np.zeros(self.input_shape)
304 # parameter optimizers
305 self.gamma_opt = copy.copy(optimizer)
306 self.beta_opt = copy.copy(optimizer)
307
308 def parameters(self):
309 return np.prod(self.gamma.shape) + np.prod(self.beta.shape)
310
311 def forward_pass(self, X, training=True):
312
313 # Initialize running mean and variance if first run
314 if self.running_mean is None:
315 self.running_mean = np.mean(X, axis=0)
316 self.running_var = np.var(X, axis=0)
317
318 if training and self.trainable:
319 mean = np.mean(X, axis=0)
320 var = np.var(X, axis=0)
321 self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean
322 self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
323 else:
324 mean = self.running_mean
325 var = self.running_var
326
327 # Statistics saved for backward pass
328 self.X_centered = X - mean
329 self.stddev_inv = 1 / np.sqrt(var + self.eps)
330
331 X_norm = self.X_centered * self.stddev_inv
332 output = self.gamma * X_norm + self.beta
333
334 return output
335
336 def backward_pass(self, accum_grad):
337
338 # Save parameters used during the forward pass
339 gamma = self.gamma
340
341 # If the layer is trainable the parameters are updated
342 if self.trainable:
343 X_norm = self.X_centered * self.stddev_inv
344 grad_gamma = np.sum(accum_grad * X_norm, axis=0)
345 grad_beta = np.sum(accum_grad, axis=0)
346
347 self.gamma = self.gamma_opt.update(self.gamma, grad_gamma)
348 self.beta = self.beta_opt.update(self.beta, grad_beta)
349
350 batch_size = accum_grad.shape[0]
351
352 # The gradient of the loss with respect to the layer inputs (use weights and statistics from forward pass)
353 accum_grad = (1 / batch_size) * gamma * self.stddev_inv * (
354 batch_size * accum_grad
355 - np.sum(accum_grad, axis=0)
356 - self.X_centered * self.stddev_inv**2 * np.sum(accum_grad * self.X_centered, axis=0)
357 )
358
359 return accum_grad
360
361 def output_shape(self):
362 return self.input_shape
363
364
365 class PoolingLayer(Layer):
366 """A parent class of MaxPooling2D and AveragePooling2D
367 """
368 def __init__(self, pool_shape=(2, 2), stride=1, padding=0):
369 self.pool_shape = pool_shape
370 self.stride = stride
371 self.padding = padding
372 self.trainable = True
373
374 def forward_pass(self, X, training=True):
375 self.layer_input = X
376
377 batch_size, channels, height, width = X.shape
378
379 _, out_height, out_width = self.output_shape()
380
381 X = X.reshape(batch_size*channels, 1, height, width)
382 X_col = image_to_column(X, self.pool_shape, self.stride, self.padding)
383
384 # MaxPool or AveragePool specific method
385 output = self._pool_forward(X_col)
386
387 output = output.reshape(out_height, out_width, batch_size, channels)
388 output = output.transpose(2, 3, 0, 1)
389
390 return output
391
392 def backward_pass(self, accum_grad):
393 batch_size, _, _, _ = accum_grad.shape
394 channels, height, width = self.input_shape
395 accum_grad = accum_grad.transpose(2, 3, 0, 1).ravel()
396
397 # MaxPool or AveragePool specific method
398 accum_grad_col = self._pool_backward(accum_grad)
399
400 accum_grad = column_to_image(accum_grad_col, (batch_size * channels, 1, height, width), self.pool_shape, self.stride, 0)
401 accum_grad = accum_grad.reshape((batch_size,) + self.input_shape)
402
403 return accum_grad
404
405 def output_shape(self):
406 channels, height, width = self.input_shape
407 out_height = (height - self.pool_shape[0]) / self.stride + 1
408 out_width = (width - self.pool_shape[1]) / self.stride + 1
409 assert out_height % 1 == 0
410 assert out_width % 1 == 0
411 return channels, int(out_height), int(out_width)
412
413
414 class MaxPooling2D(PoolingLayer):
415 def _pool_forward(self, X_col):
416 arg_max = np.argmax(X_col, axis=0).flatten()
417 output = X_col[arg_max, range(arg_max.size)]
418 self.cache = arg_max
419 return output
420
421 def _pool_backward(self, accum_grad):
422 accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size))
423 arg_max = self.cache
424 accum_grad_col[arg_max, range(accum_grad.size)] = accum_grad
425 return accum_grad_col
426
427 class AveragePooling2D(PoolingLayer):
428 def _pool_forward(self, X_col):
429 output = np.mean(X_col, axis=0)
430 return output
431
432 def _pool_backward(self, accum_grad):
433 accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size))
434 accum_grad_col[:, range(accum_grad.size)] = 1. / accum_grad_col.shape[0] * accum_grad
435 return accum_grad_col
436
437
438 class ConstantPadding2D(Layer):
439 """Adds rows and columns of constant values to the input.
440 Expects the input to be of shape (batch_size, channels, height, width)
441
442 Parameters:
443 -----------
444 padding: tuple
445 The amount of padding along the height and width dimension of the input.
446 If (pad_h, pad_w) the same symmetric padding is applied along height and width dimension.
447 If ((pad_h0, pad_h1), (pad_w0, pad_w1)) the specified padding is added to beginning and end of
448 the height and width dimension.
449 padding_value: int or tuple
450 The value the is added as padding.
451 """
452 def __init__(self, padding, padding_value=0):
453 self.padding = padding
454 self.trainable = True
455 if not isinstance(padding[0], tuple):
456 self.padding = ((padding[0], padding[0]), padding[1])
457 if not isinstance(padding[1], tuple):
458 self.padding = (self.padding[0], (padding[1], padding[1]))
459 self.padding_value = padding_value
460
461 def forward_pass(self, X, training=True):
462 output = np.pad(X,
463 pad_width=((0,0), (0,0), self.padding[0], self.padding[1]),
464 mode="constant",
465 constant_values=self.padding_value)
466 return output
467
468 def backward_pass(self, accum_grad):
469 pad_top, pad_left = self.padding[0][0], self.padding[1][0]
470 height, width = self.input_shape[1], self.input_shape[2]
471 accum_grad = accum_grad[:, :, pad_top:pad_top+height, pad_left:pad_left+width]
472 return accum_grad
473
474 def output_shape(self):
475 new_height = self.input_shape[1] + np.sum(self.padding[0])
476 new_width = self.input_shape[2] + np.sum(self.padding[1])
477 return (self.input_shape[0], new_height, new_width)
478
479
480 class ZeroPadding2D(ConstantPadding2D):
481 """Adds rows and columns of zero values to the input.
482 Expects the input to be of shape (batch_size, channels, height, width)
483
484 Parameters:
485 -----------
486 padding: tuple
487 The amount of padding along the height and width dimension of the input.
488 If (pad_h, pad_w) the same symmetric padding is applied along height and width dimension.
489 If ((pad_h0, pad_h1), (pad_w0, pad_w1)) the specified padding is added to beginning and end of
490 the height and width dimension.
491 """
492 def __init__(self, padding):
493 self.padding = padding
494 if isinstance(padding[0], int):
495 self.padding = ((padding[0], padding[0]), padding[1])
496 if isinstance(padding[1], int):
497 self.padding = (self.padding[0], (padding[1], padding[1]))
498 self.padding_value = 0
499
500
501 class Flatten(Layer):
502 """ Turns a multidimensional matrix into two-dimensional """
503 def __init__(self, input_shape=None):
504 self.prev_shape = None
505 self.trainable = True
506 self.input_shape = input_shape
507
508 def forward_pass(self, X, training=True):
509 self.prev_shape = X.shape
510 return X.reshape((X.shape[0], -1))
511
512 def backward_pass(self, accum_grad):
513 return accum_grad.reshape(self.prev_shape)
514
515 def output_shape(self):
516 return (np.prod(self.input_shape),)
517
518
519 class UpSampling2D(Layer):
520 """ Nearest neighbor up sampling of the input. Repeats the rows and
521 columns of the data by size[0] and size[1] respectively.
522
523 Parameters:
524 -----------
525 size: tuple
526 (size_y, size_x) - The number of times each axis will be repeated.
527 """
528 def __init__(self, size=(2,2), input_shape=None):
529 self.prev_shape = None
530 self.trainable = True
531 self.size = size
532 self.input_shape = input_shape
533
534 def forward_pass(self, X, training=True):
535 self.prev_shape = X.shape
536 # Repeat each axis as specified by size
537 X_new = X.repeat(self.size[0], axis=2).repeat(self.size[1], axis=3)
538 return X_new
539
540 def backward_pass(self, accum_grad):
541 # Down sample input to previous shape
542 accum_grad = accum_grad[:, :, ::self.size[0], ::self.size[1]]
543 return accum_grad
544
545 def output_shape(self):
546 channels, height, width = self.input_shape
547 return channels, self.size[0] * height, self.size[1] * width
548
549
550 class Reshape(Layer):
551 """ Reshapes the input tensor into specified shape
552
553 Parameters:
554 -----------
555 shape: tuple
556 The shape which the input shall be reshaped to.
557 """
558 def __init__(self, shape, input_shape=None):
559 self.prev_shape = None
560 self.trainable = True
561 self.shape = shape
562 self.input_shape = input_shape
563
564 def forward_pass(self, X, training=True):
565 self.prev_shape = X.shape
566 return X.reshape((X.shape[0], ) + self.shape)
567
568 def backward_pass(self, accum_grad):
569 return accum_grad.reshape(self.prev_shape)
570
571 def output_shape(self):
572 return self.shape
573
574
575 class Dropout(Layer):
576 """A layer that randomly sets a fraction p of the output units of the previous layer
577 to zero.
578
579 Parameters:
580 -----------
581 p: float
582 The probability that unit x is set to zero.
583 """
584 def __init__(self, p=0.2):
585 self.p = p
586 self._mask = None
587 self.input_shape = None
588 self.n_units = None
589 self.pass_through = True
590 self.trainable = True
591
592 def forward_pass(self, X, training=True):
593 c = (1 - self.p)
594 if training:
595 self._mask = np.random.uniform(size=X.shape) > self.p
596 c = self._mask
597 return X * c
598
599 def backward_pass(self, accum_grad):
600 return accum_grad * self._mask
601
602 def output_shape(self):
603 return self.input_shape
604
605 activation_functions = {
606 'relu': ReLU,
607 'sigmoid': Sigmoid,
608 'selu': SELU,
609 'elu': ELU,
610 'softmax': Softmax,
611 'leaky_relu': LeakyReLU,
612 'tanh': TanH,
613 'softplus': SoftPlus
614 }
615
616 class Activation(Layer):
617 """A layer that applies an activation operation to the input.
618
619 Parameters:
620 -----------
621 name: string
622 The name of the activation function that will be used.
623 """
624
625 def __init__(self, name):
626 self.activation_name = name
627 self.activation_func = activation_functions[name]()
628 self.trainable = True
629
630 def layer_name(self):
631 return "Activation (%s)" % (self.activation_func.__class__.__name__)
632
633 def forward_pass(self, X, training=True):
634 self.layer_input = X
635 return self.activation_func(X)
636
637 def backward_pass(self, accum_grad):
638 return accum_grad * self.activation_func.gradient(self.layer_input)
639
640 def output_shape(self):
641 return self.input_shape
642
643
644 # Method which calculates the padding based on the specified output shape and the
645 # shape of the filters
646 def determine_padding(filter_shape, output_shape="same"):
647
648 # No padding
649 if output_shape == "valid":
650 return (0, 0), (0, 0)
651 # Pad so that the output shape is the same as input shape (given that stride=1)
652 elif output_shape == "same":
653 filter_height, filter_width = filter_shape
654
655 # Derived from:
656 # output_height = (height + pad_h - filter_height) / stride + 1
657 # In this case output_height = height and stride = 1. This gives the
658 # expression for the padding below.
659 pad_h1 = int(math.floor((filter_height - 1)/2))
660 pad_h2 = int(math.ceil((filter_height - 1)/2))
661 pad_w1 = int(math.floor((filter_width - 1)/2))
662 pad_w2 = int(math.ceil((filter_width - 1)/2))
663
664 return (pad_h1, pad_h2), (pad_w1, pad_w2)
665
666
667 # Reference: CS231n Stanford
668 def get_im2col_indices(images_shape, filter_shape, padding, stride=1):
669 # First figure out what the size of the output should be
670 batch_size, channels, height, width = images_shape
671 filter_height, filter_width = filter_shape
672 pad_h, pad_w = padding
673 out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1)
674 out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1)
675
676 i0 = np.repeat(np.arange(filter_height), filter_width)
677 i0 = np.tile(i0, channels)
678 i1 = stride * np.repeat(np.arange(out_height), out_width)
679 j0 = np.tile(np.arange(filter_width), filter_height * channels)
680 j1 = stride * np.tile(np.arange(out_width), out_height)
681 i = i0.reshape(-1, 1) + i1.reshape(1, -1)
682 j = j0.reshape(-1, 1) + j1.reshape(1, -1)
683
684 k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1)
685
686 return (k, i, j)
687
688
689 # Method which turns the image shaped input to column shape.
690 # Used during the forward pass.
691 # Reference: CS231n Stanford
692 def image_to_column(images, filter_shape, stride, output_shape='same'):
693 filter_height, filter_width = filter_shape
694
695 pad_h, pad_w = determine_padding(filter_shape, output_shape)
696
697 # Add padding to the image
698 images_padded = np.pad(images, ((0, 0), (0, 0), pad_h, pad_w), mode='constant')
699
700 # Calculate the indices where the dot products are to be applied between weights
701 # and the image
702 k, i, j = get_im2col_indices(images.shape, filter_shape, (pad_h, pad_w), stride)
703
704 # Get content from image at those indices
705 cols = images_padded[:, k, i, j]
706 channels = images.shape[1]
707 # Reshape content into column shape
708 cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1)
709 return cols
710
711
712
713 # Method which turns the column shaped input to image shape.
714 # Used during the backward pass.
715 # Reference: CS231n Stanford
716 def column_to_image(cols, images_shape, filter_shape, stride, output_shape='same'):
717 batch_size, channels, height, width = images_shape
718 pad_h, pad_w = determine_padding(filter_shape, output_shape)
719 height_padded = height + np.sum(pad_h)
720 width_padded = width + np.sum(pad_w)
721 images_padded = np.zeros((batch_size, channels, height_padded, width_padded))
722
723 # Calculate the indices where the dot products are applied between weights
724 # and the image
725 k, i, j = get_im2col_indices(images_shape, filter_shape, (pad_h, pad_w), stride)
726
727 cols = cols.reshape(channels * np.prod(filter_shape), -1, batch_size)
728 cols = cols.transpose(2, 0, 1)
729 # Add column content to the images at the indices
730 np.add.at(images_padded, (slice(None), k, i, j), cols)
731
732 # Return image without padding
733 return images_padded[:, :, pad_h[0]:height+pad_h[0], pad_w[0]:width+pad_w[0]]