ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

layers.py

(27518B)


      1 
      2 from __future__ import print_function, division
      3 import math
      4 import numpy as np
      5 import copy
      6 from mlfromscratch.deep_learning.activation_functions import Sigmoid, ReLU, SoftPlus, LeakyReLU
      7 from mlfromscratch.deep_learning.activation_functions import TanH, ELU, SELU, Softmax
      8 
      9 
     10 class Layer(object):
     11 
     12     def set_input_shape(self, shape):
     13         """ Sets the shape that the layer expects of the input in the forward
     14         pass method """
     15         self.input_shape = shape
     16 
     17     def layer_name(self):
     18         """ The name of the layer. Used in model summary. """
     19         return self.__class__.__name__
     20 
     21     def parameters(self):
     22         """ The number of trainable parameters used by the layer """
     23         return 0
     24 
     25     def forward_pass(self, X, training):
     26         """ Propogates the signal forward in the network """
     27         raise NotImplementedError()
     28 
     29     def backward_pass(self, accum_grad):
     30         """ Propogates the accumulated gradient backwards in the network.
     31         If the has trainable weights then these weights are also tuned in this method.
     32         As input (accum_grad) it receives the gradient with respect to the output of the layer and
     33         returns the gradient with respect to the output of the previous layer. """
     34         raise NotImplementedError()
     35 
     36     def output_shape(self):
     37         """ The shape of the output produced by forward_pass """
     38         raise NotImplementedError()
     39 
     40 
     41 class Dense(Layer):
     42     """A fully-connected NN layer.
     43     Parameters:
     44     -----------
     45     n_units: int
     46         The number of neurons in the layer.
     47     input_shape: tuple
     48         The expected input shape of the layer. For dense layers a single digit specifying
     49         the number of features of the input. Must be specified if it is the first layer in
     50         the network.
     51     """
     52     def __init__(self, n_units, input_shape=None):
     53         self.layer_input = None
     54         self.input_shape = input_shape
     55         self.n_units = n_units
     56         self.trainable = True
     57         self.W = None
     58         self.w0 = None
     59 
     60     def initialize(self, optimizer):
     61         # Initialize the weights
     62         limit = 1 / math.sqrt(self.input_shape[0])
     63         self.W  = np.random.uniform(-limit, limit, (self.input_shape[0], self.n_units))
     64         self.w0 = np.zeros((1, self.n_units))
     65         # Weight optimizers
     66         self.W_opt  = copy.copy(optimizer)
     67         self.w0_opt = copy.copy(optimizer)
     68 
     69     def parameters(self):
     70         return np.prod(self.W.shape) + np.prod(self.w0.shape)
     71 
     72     def forward_pass(self, X, training=True):
     73         self.layer_input = X
     74         return X.dot(self.W) + self.w0
     75 
     76     def backward_pass(self, accum_grad):
     77         # Save weights used during forwards pass
     78         W = self.W
     79 
     80         if self.trainable:
     81             # Calculate gradient w.r.t layer weights
     82             grad_w = self.layer_input.T.dot(accum_grad)
     83             grad_w0 = np.sum(accum_grad, axis=0, keepdims=True)
     84 
     85             # Update the layer weights
     86             self.W = self.W_opt.update(self.W, grad_w)
     87             self.w0 = self.w0_opt.update(self.w0, grad_w0)
     88 
     89         # Return accumulated gradient for next layer
     90         # Calculated based on the weights used during the forward pass
     91         accum_grad = accum_grad.dot(W.T)
     92         return accum_grad
     93 
     94     def output_shape(self):
     95         return (self.n_units, )
     96 
     97 
     98 class RNN(Layer):
     99     """A Vanilla Fully-Connected Recurrent Neural Network layer.
    100 
    101     Parameters:
    102     -----------
    103     n_units: int
    104         The number of hidden states in the layer.
    105     activation: string
    106         The name of the activation function which will be applied to the output of each state.
    107     bptt_trunc: int
    108         Decides how many time steps the gradient should be propagated backwards through states
    109         given the loss gradient for time step t.
    110     input_shape: tuple
    111         The expected input shape of the layer. For dense layers a single digit specifying
    112         the number of features of the input. Must be specified if it is the first layer in
    113         the network.
    114 
    115     Reference:
    116     http://www.wildml.com/2015/09/recurrent-neural-networks-tutorial-part-2-implementing-a-language-model-rnn-with-python-numpy-and-theano/
    117     """
    118     def __init__(self, n_units, activation='tanh', bptt_trunc=5, input_shape=None):
    119         self.input_shape = input_shape
    120         self.n_units = n_units
    121         self.activation = activation_functions[activation]()
    122         self.trainable = True
    123         self.bptt_trunc = bptt_trunc
    124         self.W = None # Weight of the previous state
    125         self.V = None # Weight of the output
    126         self.U = None # Weight of the input
    127 
    128     def initialize(self, optimizer):
    129         timesteps, input_dim = self.input_shape
    130         # Initialize the weights
    131         limit = 1 / math.sqrt(input_dim)
    132         self.U  = np.random.uniform(-limit, limit, (self.n_units, input_dim))
    133         limit = 1 / math.sqrt(self.n_units)
    134         self.V = np.random.uniform(-limit, limit, (input_dim, self.n_units))
    135         self.W  = np.random.uniform(-limit, limit, (self.n_units, self.n_units))
    136         # Weight optimizers
    137         self.U_opt  = copy.copy(optimizer)
    138         self.V_opt = copy.copy(optimizer)
    139         self.W_opt = copy.copy(optimizer)
    140 
    141     def parameters(self):
    142         return np.prod(self.W.shape) + np.prod(self.U.shape) + np.prod(self.V.shape)
    143 
    144     def forward_pass(self, X, training=True):
    145         self.layer_input = X
    146         batch_size, timesteps, input_dim = X.shape
    147 
    148         # Save these values for use in backprop.
    149         self.state_input = np.zeros((batch_size, timesteps, self.n_units))
    150         self.states = np.zeros((batch_size, timesteps+1, self.n_units))
    151         self.outputs = np.zeros((batch_size, timesteps, input_dim))
    152 
    153         # Set last time step to zero for calculation of the state_input at time step zero
    154         self.states[:, -1] = np.zeros((batch_size, self.n_units))
    155         for t in range(timesteps):
    156             # Input to state_t is the current input and output of previous states
    157             self.state_input[:, t] = X[:, t].dot(self.U.T) + self.states[:, t-1].dot(self.W.T)
    158             self.states[:, t] = self.activation(self.state_input[:, t])
    159             self.outputs[:, t] = self.states[:, t].dot(self.V.T)
    160 
    161         return self.outputs
    162 
    163     def backward_pass(self, accum_grad):
    164         _, timesteps, _ = accum_grad.shape
    165 
    166         # Variables where we save the accumulated gradient w.r.t each parameter
    167         grad_U = np.zeros_like(self.U)
    168         grad_V = np.zeros_like(self.V)
    169         grad_W = np.zeros_like(self.W)
    170         # The gradient w.r.t the layer input.
    171         # Will be passed on to the previous layer in the network
    172         accum_grad_next = np.zeros_like(accum_grad)
    173 
    174         # Back Propagation Through Time
    175         for t in reversed(range(timesteps)):
    176             # Update gradient w.r.t V at time step t
    177             grad_V += accum_grad[:, t].T.dot(self.states[:, t])
    178             # Calculate the gradient w.r.t the state input
    179             grad_wrt_state = accum_grad[:, t].dot(self.V) * self.activation.gradient(self.state_input[:, t])
    180             # Gradient w.r.t the layer input
    181             accum_grad_next[:, t] = grad_wrt_state.dot(self.U)
    182             # Update gradient w.r.t W and U by backprop. from time step t for at most
    183             # self.bptt_trunc number of time steps
    184             for t_ in reversed(np.arange(max(0, t - self.bptt_trunc), t+1)):
    185                 grad_U += grad_wrt_state.T.dot(self.layer_input[:, t_])
    186                 grad_W += grad_wrt_state.T.dot(self.states[:, t_-1])
    187                 # Calculate gradient w.r.t previous state
    188                 grad_wrt_state = grad_wrt_state.dot(self.W) * self.activation.gradient(self.state_input[:, t_-1])
    189 
    190         # Update weights
    191         self.U = self.U_opt.update(self.U, grad_U)
    192         self.V = self.V_opt.update(self.V, grad_V)
    193         self.W = self.W_opt.update(self.W, grad_W)
    194 
    195         return accum_grad_next
    196 
    197     def output_shape(self):
    198         return self.input_shape
    199 
    200 class Conv2D(Layer):
    201     """A 2D Convolution Layer.
    202 
    203     Parameters:
    204     -----------
    205     n_filters: int
    206         The number of filters that will convolve over the input matrix. The number of channels
    207         of the output shape.
    208     filter_shape: tuple
    209         A tuple (filter_height, filter_width).
    210     input_shape: tuple
    211         The shape of the expected input of the layer. (batch_size, channels, height, width)
    212         Only needs to be specified for first layer in the network.
    213     padding: string
    214         Either 'same' or 'valid'. 'same' results in padding being added so that the output height and width
    215         matches the input height and width. For 'valid' no padding is added.
    216     stride: int
    217         The stride length of the filters during the convolution over the input.
    218     """
    219     def __init__(self, n_filters, filter_shape, input_shape=None, padding='same', stride=1):
    220         self.n_filters = n_filters
    221         self.filter_shape = filter_shape
    222         self.padding = padding
    223         self.stride = stride
    224         self.input_shape = input_shape
    225         self.trainable = True
    226 
    227     def initialize(self, optimizer):
    228         # Initialize the weights
    229         filter_height, filter_width = self.filter_shape
    230         channels = self.input_shape[0]
    231         limit = 1 / math.sqrt(np.prod(self.filter_shape))
    232         self.W  = np.random.uniform(-limit, limit, size=(self.n_filters, channels, filter_height, filter_width))
    233         self.w0 = np.zeros((self.n_filters, 1))
    234         # Weight optimizers
    235         self.W_opt  = copy.copy(optimizer)
    236         self.w0_opt = copy.copy(optimizer)
    237 
    238     def parameters(self):
    239         return np.prod(self.W.shape) + np.prod(self.w0.shape)
    240 
    241     def forward_pass(self, X, training=True):
    242         batch_size, channels, height, width = X.shape
    243         self.layer_input = X
    244         # Turn image shape into column shape
    245         # (enables dot product between input and weights)
    246         self.X_col = image_to_column(X, self.filter_shape, stride=self.stride, output_shape=self.padding)
    247         # Turn weights into column shape
    248         self.W_col = self.W.reshape((self.n_filters, -1))
    249         # Calculate output
    250         output = self.W_col.dot(self.X_col) + self.w0
    251         # Reshape into (n_filters, out_height, out_width, batch_size)
    252         output = output.reshape(self.output_shape() + (batch_size, ))
    253         # Redistribute axises so that batch size comes first
    254         return output.transpose(3,0,1,2)
    255 
    256     def backward_pass(self, accum_grad):
    257         # Reshape accumulated gradient into column shape
    258         accum_grad = accum_grad.transpose(1, 2, 3, 0).reshape(self.n_filters, -1)
    259 
    260         if self.trainable:
    261             # Take dot product between column shaped accum. gradient and column shape
    262             # layer input to determine the gradient at the layer with respect to layer weights
    263             grad_w = accum_grad.dot(self.X_col.T).reshape(self.W.shape)
    264             # The gradient with respect to bias terms is the sum similarly to in Dense layer
    265             grad_w0 = np.sum(accum_grad, axis=1, keepdims=True)
    266 
    267             # Update the layers weights
    268             self.W = self.W_opt.update(self.W, grad_w)
    269             self.w0 = self.w0_opt.update(self.w0, grad_w0)
    270 
    271         # Recalculate the gradient which will be propogated back to prev. layer
    272         accum_grad = self.W_col.T.dot(accum_grad)
    273         # Reshape from column shape to image shape
    274         accum_grad = column_to_image(accum_grad,
    275                                 self.layer_input.shape,
    276                                 self.filter_shape,
    277                                 stride=self.stride,
    278                                 output_shape=self.padding)
    279 
    280         return accum_grad
    281 
    282     def output_shape(self):
    283         channels, height, width = self.input_shape
    284         pad_h, pad_w = determine_padding(self.filter_shape, output_shape=self.padding)
    285         output_height = (height + np.sum(pad_h) - self.filter_shape[0]) / self.stride + 1
    286         output_width = (width + np.sum(pad_w) - self.filter_shape[1]) / self.stride + 1
    287         return self.n_filters, int(output_height), int(output_width)
    288 
    289 
    290 class BatchNormalization(Layer):
    291     """Batch normalization.
    292     """
    293     def __init__(self, momentum=0.99):
    294         self.momentum = momentum
    295         self.trainable = True
    296         self.eps = 0.01
    297         self.running_mean = None
    298         self.running_var = None
    299 
    300     def initialize(self, optimizer):
    301         # Initialize the parameters
    302         self.gamma  = np.ones(self.input_shape)
    303         self.beta = np.zeros(self.input_shape)
    304         # parameter optimizers
    305         self.gamma_opt  = copy.copy(optimizer)
    306         self.beta_opt = copy.copy(optimizer)
    307 
    308     def parameters(self):
    309         return np.prod(self.gamma.shape) + np.prod(self.beta.shape)
    310 
    311     def forward_pass(self, X, training=True):
    312 
    313         # Initialize running mean and variance if first run
    314         if self.running_mean is None:
    315             self.running_mean = np.mean(X, axis=0)
    316             self.running_var = np.var(X, axis=0)
    317 
    318         if training and self.trainable:
    319             mean = np.mean(X, axis=0)
    320             var = np.var(X, axis=0)
    321             self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean
    322             self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var
    323         else:
    324             mean = self.running_mean
    325             var = self.running_var
    326 
    327         # Statistics saved for backward pass
    328         self.X_centered = X - mean
    329         self.stddev_inv = 1 / np.sqrt(var + self.eps)
    330 
    331         X_norm = self.X_centered * self.stddev_inv
    332         output = self.gamma * X_norm + self.beta
    333 
    334         return output
    335 
    336     def backward_pass(self, accum_grad):
    337 
    338         # Save parameters used during the forward pass
    339         gamma = self.gamma
    340 
    341         # If the layer is trainable the parameters are updated
    342         if self.trainable:
    343             X_norm = self.X_centered * self.stddev_inv
    344             grad_gamma = np.sum(accum_grad * X_norm, axis=0)
    345             grad_beta = np.sum(accum_grad, axis=0)
    346 
    347             self.gamma = self.gamma_opt.update(self.gamma, grad_gamma)
    348             self.beta = self.beta_opt.update(self.beta, grad_beta)
    349 
    350         batch_size = accum_grad.shape[0]
    351 
    352         # The gradient of the loss with respect to the layer inputs (use weights and statistics from forward pass)
    353         accum_grad = (1 / batch_size) * gamma * self.stddev_inv * (
    354             batch_size * accum_grad
    355             - np.sum(accum_grad, axis=0)
    356             - self.X_centered * self.stddev_inv**2 * np.sum(accum_grad * self.X_centered, axis=0)
    357             )
    358 
    359         return accum_grad
    360 
    361     def output_shape(self):
    362         return self.input_shape
    363 
    364 
    365 class PoolingLayer(Layer):
    366     """A parent class of MaxPooling2D and AveragePooling2D
    367     """
    368     def __init__(self, pool_shape=(2, 2), stride=1, padding=0):
    369         self.pool_shape = pool_shape
    370         self.stride = stride
    371         self.padding = padding
    372         self.trainable = True
    373 
    374     def forward_pass(self, X, training=True):
    375         self.layer_input = X
    376 
    377         batch_size, channels, height, width = X.shape
    378 
    379         _, out_height, out_width = self.output_shape()
    380 
    381         X = X.reshape(batch_size*channels, 1, height, width)
    382         X_col = image_to_column(X, self.pool_shape, self.stride, self.padding)
    383 
    384         # MaxPool or AveragePool specific method
    385         output = self._pool_forward(X_col)
    386 
    387         output = output.reshape(out_height, out_width, batch_size, channels)
    388         output = output.transpose(2, 3, 0, 1)
    389 
    390         return output
    391 
    392     def backward_pass(self, accum_grad):
    393         batch_size, _, _, _ = accum_grad.shape
    394         channels, height, width = self.input_shape
    395         accum_grad = accum_grad.transpose(2, 3, 0, 1).ravel()
    396 
    397         # MaxPool or AveragePool specific method
    398         accum_grad_col = self._pool_backward(accum_grad)
    399 
    400         accum_grad = column_to_image(accum_grad_col, (batch_size * channels, 1, height, width), self.pool_shape, self.stride, 0)
    401         accum_grad = accum_grad.reshape((batch_size,) + self.input_shape)
    402 
    403         return accum_grad
    404 
    405     def output_shape(self):
    406         channels, height, width = self.input_shape
    407         out_height = (height - self.pool_shape[0]) / self.stride + 1
    408         out_width = (width - self.pool_shape[1]) / self.stride + 1
    409         assert out_height % 1 == 0
    410         assert out_width % 1 == 0
    411         return channels, int(out_height), int(out_width)
    412 
    413 
    414 class MaxPooling2D(PoolingLayer):
    415     def _pool_forward(self, X_col):
    416         arg_max = np.argmax(X_col, axis=0).flatten()
    417         output = X_col[arg_max, range(arg_max.size)]
    418         self.cache = arg_max
    419         return output
    420 
    421     def _pool_backward(self, accum_grad):
    422         accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size))
    423         arg_max = self.cache
    424         accum_grad_col[arg_max, range(accum_grad.size)] = accum_grad
    425         return accum_grad_col
    426 
    427 class AveragePooling2D(PoolingLayer):
    428     def _pool_forward(self, X_col):
    429         output = np.mean(X_col, axis=0)
    430         return output
    431 
    432     def _pool_backward(self, accum_grad):
    433         accum_grad_col = np.zeros((np.prod(self.pool_shape), accum_grad.size))
    434         accum_grad_col[:, range(accum_grad.size)] = 1. / accum_grad_col.shape[0] * accum_grad
    435         return accum_grad_col
    436 
    437 
    438 class ConstantPadding2D(Layer):
    439     """Adds rows and columns of constant values to the input.
    440     Expects the input to be of shape (batch_size, channels, height, width)
    441 
    442     Parameters:
    443     -----------
    444     padding: tuple
    445         The amount of padding along the height and width dimension of the input.
    446         If (pad_h, pad_w) the same symmetric padding is applied along height and width dimension.
    447         If ((pad_h0, pad_h1), (pad_w0, pad_w1)) the specified padding is added to beginning and end of
    448         the height and width dimension.
    449     padding_value: int or tuple
    450         The value the is added as padding.
    451     """
    452     def __init__(self, padding, padding_value=0):
    453         self.padding = padding
    454         self.trainable = True
    455         if not isinstance(padding[0], tuple):
    456             self.padding = ((padding[0], padding[0]), padding[1])
    457         if not isinstance(padding[1], tuple):
    458             self.padding = (self.padding[0], (padding[1], padding[1]))
    459         self.padding_value = padding_value
    460 
    461     def forward_pass(self, X, training=True):
    462         output = np.pad(X,
    463             pad_width=((0,0), (0,0), self.padding[0], self.padding[1]),
    464             mode="constant",
    465             constant_values=self.padding_value)
    466         return output
    467 
    468     def backward_pass(self, accum_grad):
    469         pad_top, pad_left = self.padding[0][0], self.padding[1][0]
    470         height, width = self.input_shape[1], self.input_shape[2]
    471         accum_grad = accum_grad[:, :, pad_top:pad_top+height, pad_left:pad_left+width]
    472         return accum_grad
    473 
    474     def output_shape(self):
    475         new_height = self.input_shape[1] + np.sum(self.padding[0])
    476         new_width = self.input_shape[2] + np.sum(self.padding[1])
    477         return (self.input_shape[0], new_height, new_width)
    478 
    479 
    480 class ZeroPadding2D(ConstantPadding2D):
    481     """Adds rows and columns of zero values to the input.
    482     Expects the input to be of shape (batch_size, channels, height, width)
    483 
    484     Parameters:
    485     -----------
    486     padding: tuple
    487         The amount of padding along the height and width dimension of the input.
    488         If (pad_h, pad_w) the same symmetric padding is applied along height and width dimension.
    489         If ((pad_h0, pad_h1), (pad_w0, pad_w1)) the specified padding is added to beginning and end of
    490         the height and width dimension.
    491     """
    492     def __init__(self, padding):
    493         self.padding = padding
    494         if isinstance(padding[0], int):
    495             self.padding = ((padding[0], padding[0]), padding[1])
    496         if isinstance(padding[1], int):
    497             self.padding = (self.padding[0], (padding[1], padding[1]))
    498         self.padding_value = 0
    499 
    500 
    501 class Flatten(Layer):
    502     """ Turns a multidimensional matrix into two-dimensional """
    503     def __init__(self, input_shape=None):
    504         self.prev_shape = None
    505         self.trainable = True
    506         self.input_shape = input_shape
    507 
    508     def forward_pass(self, X, training=True):
    509         self.prev_shape = X.shape
    510         return X.reshape((X.shape[0], -1))
    511 
    512     def backward_pass(self, accum_grad):
    513         return accum_grad.reshape(self.prev_shape)
    514 
    515     def output_shape(self):
    516         return (np.prod(self.input_shape),)
    517 
    518 
    519 class UpSampling2D(Layer):
    520     """ Nearest neighbor up sampling of the input. Repeats the rows and
    521     columns of the data by size[0] and size[1] respectively.
    522 
    523     Parameters:
    524     -----------
    525     size: tuple
    526         (size_y, size_x) - The number of times each axis will be repeated.
    527     """
    528     def __init__(self, size=(2,2), input_shape=None):
    529         self.prev_shape = None
    530         self.trainable = True
    531         self.size = size
    532         self.input_shape = input_shape
    533 
    534     def forward_pass(self, X, training=True):
    535         self.prev_shape = X.shape
    536         # Repeat each axis as specified by size
    537         X_new = X.repeat(self.size[0], axis=2).repeat(self.size[1], axis=3)
    538         return X_new
    539 
    540     def backward_pass(self, accum_grad):
    541         # Down sample input to previous shape
    542         accum_grad = accum_grad[:, :, ::self.size[0], ::self.size[1]]
    543         return accum_grad
    544 
    545     def output_shape(self):
    546         channels, height, width = self.input_shape
    547         return channels, self.size[0] * height, self.size[1] * width
    548 
    549 
    550 class Reshape(Layer):
    551     """ Reshapes the input tensor into specified shape
    552 
    553     Parameters:
    554     -----------
    555     shape: tuple
    556         The shape which the input shall be reshaped to.
    557     """
    558     def __init__(self, shape, input_shape=None):
    559         self.prev_shape = None
    560         self.trainable = True
    561         self.shape = shape
    562         self.input_shape = input_shape
    563 
    564     def forward_pass(self, X, training=True):
    565         self.prev_shape = X.shape
    566         return X.reshape((X.shape[0], ) + self.shape)
    567 
    568     def backward_pass(self, accum_grad):
    569         return accum_grad.reshape(self.prev_shape)
    570 
    571     def output_shape(self):
    572         return self.shape
    573 
    574 
    575 class Dropout(Layer):
    576     """A layer that randomly sets a fraction p of the output units of the previous layer
    577     to zero.
    578 
    579     Parameters:
    580     -----------
    581     p: float
    582         The probability that unit x is set to zero.
    583     """
    584     def __init__(self, p=0.2):
    585         self.p = p
    586         self._mask = None
    587         self.input_shape = None
    588         self.n_units = None
    589         self.pass_through = True
    590         self.trainable = True
    591 
    592     def forward_pass(self, X, training=True):
    593         c = (1 - self.p)
    594         if training:
    595             self._mask = np.random.uniform(size=X.shape) > self.p
    596             c = self._mask
    597         return X * c
    598 
    599     def backward_pass(self, accum_grad):
    600         return accum_grad * self._mask
    601 
    602     def output_shape(self):
    603         return self.input_shape
    604 
    605 activation_functions = {
    606     'relu': ReLU,
    607     'sigmoid': Sigmoid,
    608     'selu': SELU,
    609     'elu': ELU,
    610     'softmax': Softmax,
    611     'leaky_relu': LeakyReLU,
    612     'tanh': TanH,
    613     'softplus': SoftPlus
    614 }
    615 
    616 class Activation(Layer):
    617     """A layer that applies an activation operation to the input.
    618 
    619     Parameters:
    620     -----------
    621     name: string
    622         The name of the activation function that will be used.
    623     """
    624 
    625     def __init__(self, name):
    626         self.activation_name = name
    627         self.activation_func = activation_functions[name]()
    628         self.trainable = True
    629 
    630     def layer_name(self):
    631         return "Activation (%s)" % (self.activation_func.__class__.__name__)
    632 
    633     def forward_pass(self, X, training=True):
    634         self.layer_input = X
    635         return self.activation_func(X)
    636 
    637     def backward_pass(self, accum_grad):
    638         return accum_grad * self.activation_func.gradient(self.layer_input)
    639 
    640     def output_shape(self):
    641         return self.input_shape
    642 
    643 
    644 # Method which calculates the padding based on the specified output shape and the
    645 # shape of the filters
    646 def determine_padding(filter_shape, output_shape="same"):
    647 
    648     # No padding
    649     if output_shape == "valid":
    650         return (0, 0), (0, 0)
    651     # Pad so that the output shape is the same as input shape (given that stride=1)
    652     elif output_shape == "same":
    653         filter_height, filter_width = filter_shape
    654 
    655         # Derived from:
    656         # output_height = (height + pad_h - filter_height) / stride + 1
    657         # In this case output_height = height and stride = 1. This gives the
    658         # expression for the padding below.
    659         pad_h1 = int(math.floor((filter_height - 1)/2))
    660         pad_h2 = int(math.ceil((filter_height - 1)/2))
    661         pad_w1 = int(math.floor((filter_width - 1)/2))
    662         pad_w2 = int(math.ceil((filter_width - 1)/2))
    663 
    664         return (pad_h1, pad_h2), (pad_w1, pad_w2)
    665 
    666 
    667 # Reference: CS231n Stanford
    668 def get_im2col_indices(images_shape, filter_shape, padding, stride=1):
    669     # First figure out what the size of the output should be
    670     batch_size, channels, height, width = images_shape
    671     filter_height, filter_width = filter_shape
    672     pad_h, pad_w = padding
    673     out_height = int((height + np.sum(pad_h) - filter_height) / stride + 1)
    674     out_width = int((width + np.sum(pad_w) - filter_width) / stride + 1)
    675 
    676     i0 = np.repeat(np.arange(filter_height), filter_width)
    677     i0 = np.tile(i0, channels)
    678     i1 = stride * np.repeat(np.arange(out_height), out_width)
    679     j0 = np.tile(np.arange(filter_width), filter_height * channels)
    680     j1 = stride * np.tile(np.arange(out_width), out_height)
    681     i = i0.reshape(-1, 1) + i1.reshape(1, -1)
    682     j = j0.reshape(-1, 1) + j1.reshape(1, -1)
    683 
    684     k = np.repeat(np.arange(channels), filter_height * filter_width).reshape(-1, 1)
    685 
    686     return (k, i, j)
    687 
    688 
    689 # Method which turns the image shaped input to column shape.
    690 # Used during the forward pass.
    691 # Reference: CS231n Stanford
    692 def image_to_column(images, filter_shape, stride, output_shape='same'):
    693     filter_height, filter_width = filter_shape
    694 
    695     pad_h, pad_w = determine_padding(filter_shape, output_shape)
    696 
    697     # Add padding to the image
    698     images_padded = np.pad(images, ((0, 0), (0, 0), pad_h, pad_w), mode='constant')
    699 
    700     # Calculate the indices where the dot products are to be applied between weights
    701     # and the image
    702     k, i, j = get_im2col_indices(images.shape, filter_shape, (pad_h, pad_w), stride)
    703 
    704     # Get content from image at those indices
    705     cols = images_padded[:, k, i, j]
    706     channels = images.shape[1]
    707     # Reshape content into column shape
    708     cols = cols.transpose(1, 2, 0).reshape(filter_height * filter_width * channels, -1)
    709     return cols
    710 
    711 
    712 
    713 # Method which turns the column shaped input to image shape.
    714 # Used during the backward pass.
    715 # Reference: CS231n Stanford
    716 def column_to_image(cols, images_shape, filter_shape, stride, output_shape='same'):
    717     batch_size, channels, height, width = images_shape
    718     pad_h, pad_w = determine_padding(filter_shape, output_shape)
    719     height_padded = height + np.sum(pad_h)
    720     width_padded = width + np.sum(pad_w)
    721     images_padded = np.zeros((batch_size, channels, height_padded, width_padded))
    722 
    723     # Calculate the indices where the dot products are applied between weights
    724     # and the image
    725     k, i, j = get_im2col_indices(images_shape, filter_shape, (pad_h, pad_w), stride)
    726 
    727     cols = cols.reshape(channels * np.prod(filter_shape), -1, batch_size)
    728     cols = cols.transpose(2, 0, 1)
    729     # Add column content to the images at the indices
    730     np.add.at(images_padded, (slice(None), k, i, j), cols)
    731 
    732     # Return image without padding
    733     return images_padded[:, :, pad_h[0]:height+pad_h[0], pad_w[0]:width+pad_w[0]]