From ae2a60bfd8ad700ee137c4f6f84fb9535c435bf6 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Mon, 27 Dec 2021 20:33:32 +0000 Subject: [PATCH 01/24] Re-organise utils --- cnn/model.py | 6 +++- cnn/utils.py | 77 ----------------------------------------- cnn/utils/__init__.py | 0 cnn/utils/array.py | 48 +++++++++++++++++++++++++ cnn/utils/processing.py | 35 +++++++++++++++++++ 5 files changed, 88 insertions(+), 78 deletions(-) delete mode 100644 cnn/utils.py create mode 100644 cnn/utils/__init__.py create mode 100644 cnn/utils/array.py create mode 100644 cnn/utils/processing.py diff --git a/cnn/model.py b/cnn/model.py index 5d37938..87fb73f 100644 --- a/cnn/model.py +++ b/cnn/model.py @@ -5,8 +5,12 @@ from . import layers from . import optimisers +def load_model(name): + assert name.split('.')[-1] == 'pkl' + with open(name, 'rb') as file: + model = pickle.load(file) + return model -# CLASS class Model(): """ This is the top level class. diff --git a/cnn/utils.py b/cnn/utils.py deleted file mode 100644 index 9575136..0000000 --- a/cnn/utils.py +++ /dev/null @@ -1,77 +0,0 @@ -import numpy as np -import pickle - -def one_hot_encode(array,num_cats,axis=None): - ''' - Perform one-hot encoding on the category labels. - - - array: is a 2D np.ndarray - - num_cats: number of categories that the model is to be trained on. - - axis: the axis of array that holds the category label value. If axis=None, then this is inferred as the axis with the smallest size. - ''' - assert type(array) in (np.ndarray,list) - array = np.array(array) - assert array.ndim == 2 - if axis is None: - axis = np.argmin(array.shape) - else: - assert axis in (0,1) - assert array.shape[axis] == 1 - - N = array.shape[1 - axis] - array = array.reshape((1,N)) - - return np.eye(num_cats)[array][0] # Returns in the shape (N,num_cats) - -def shuffle(X,y,random_seed=None): - if random_seed is not None: - np.random.seed(random_seed) - permutation = np.random.permutation( X.shape[0] ) - X_shuffled = X[permutation] - y_shuffled = y[permutation] - print(X_shuffled.shape,y_shuffled.shape) - assert X.shape == X_shuffled.shape, f'X shape: {X.shape} | X shuffled shape: {X_shuffled.shape}' - return (X_shuffled, y_shuffled) - - -def array_init(shape,method=None,seed=None): - ''' Random initialisation of weights array. - Xavier or Kaiming: (https://towardsdatascience.com/weight-initialization-in-neural-networks-a-journey-from-the-basics-to-kaiming-954fb9b47c79) ''' - assert len(shape) >= 2 - fan_in = shape[-1] - fan_out = shape[-2] - - if seed: - np.random.seed(seed) - - if method is None: - array = np.random.randn(*shape) * 0.01 - elif method == 'kaiming_normal': - # AKA "he normal" after Kaiming He. - array = np.random.normal(size=shape) * np.sqrt(2./fan_in) - elif method == 'kaiming_uniform': - array = np.random.uniform(size=shape) * np.sqrt(6./fan_in) - elif method == 'xavier_uniform': - array = np.random.uniform(size=shape) * np.sqrt(6./(fan_in+fan_out)) - elif method == 'xavier_normal': - # https://arxiv.org/pdf/2004.09506.pdf - target_std = np.sqrt(2./np.sum(shape)) - array = np.random.normal(size=shape,scale=target_std) - elif method == 'abs_norm': - # Custom alternative - arr = np.random.normal(size=shape) - array = arr / np.abs(arr).max() - elif method == 'uniform': - array = np.random.uniform(size=shape) * (1./np.sqrt(fan_in)) - else: - raise BaseException('ERROR: Unrecognised array initialisation method: ' + method) - - # print(f'--> Array init method: {method}, max: {array.max()}, min: {array.min()}, std: {array.std()}' ) - # print('Array:',array) - return array - -def load_model(name): - assert name.split('.')[-1] == 'pkl' - with open(name, 'rb') as file: - model = pickle.load(file) - return model \ No newline at end of file diff --git a/cnn/utils/__init__.py b/cnn/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cnn/utils/array.py b/cnn/utils/array.py new file mode 100644 index 0000000..c5928e0 --- /dev/null +++ b/cnn/utils/array.py @@ -0,0 +1,48 @@ +import numpy as np + + +def array_init(shape: tuple,method=None,seed=None) -> np.ndarray: + ''' Random initialisation of weights array. + Xavier or Kaiming: (https://towardsdatascience.com/weight-initialization-in-neural-networks-a-journey-from-the-basics-to-kaiming-954fb9b47c79) ''' + assert len(shape) >= 2 + fan_in = shape[-1] + fan_out = shape[-2] + + if seed: + np.random.seed(seed) + + if method is None: + array = np.random.randn(*shape) * 0.01 + elif method == 'kaiming_normal': + # AKA "he normal" after Kaiming He. + array = np.random.normal(size=shape) * np.sqrt(2./fan_in) + elif method == 'kaiming_uniform': + array = np.random.uniform(size=shape) * np.sqrt(6./fan_in) + elif method == 'xavier_uniform': + array = np.random.uniform(size=shape) * np.sqrt(6./(fan_in+fan_out)) + elif method == 'xavier_normal': + # https://arxiv.org/pdf/2004.09506.pdf + target_std = np.sqrt(2./np.sum(shape)) + array = np.random.normal(size=shape,scale=target_std) + elif method == 'abs_norm': + # Custom alternative + arr = np.random.normal(size=shape) + array = arr / np.abs(arr).max() + elif method == 'uniform': + array = np.random.uniform(size=shape) * (1./np.sqrt(fan_in)) + else: + raise NameError('ERROR: Unrecognised array initialisation method: ' + method) + + return array + +def dilate(array: np.ndarray,channel_width: int) -> np.ndarray: + """ Inserts 'channel_width' number of 0s between each item in 'array'. """ + _,_, rows, cols = array.shape + dilation_idx_row = np.arange(rows-1) + 1 # Intiatial indices for insertion of zeros + dilation_idx_col = np.arange(cols-1) + 1 # Intiatial indices for insertion of zeros + dilated_array = array.copy() + for n in range(1,channel_width): # the n multiplier is to increment the indices in the non-uniform manner required. + dilated_array = np.insert( + np.insert( dilated_array, dilation_idx_row * n, 0, axis=2 ), + dilation_idx_col * n, 0, axis=3) + return dilated_array diff --git a/cnn/utils/processing.py b/cnn/utils/processing.py new file mode 100644 index 0000000..ee52d09 --- /dev/null +++ b/cnn/utils/processing.py @@ -0,0 +1,35 @@ +import numpy as np +from typing import Tuple + +def one_hot_encode(array: np.ndarray,num_cats: int,axis: bool=None) -> np.ndarray: + ''' + Perform one-hot encoding on the category labels. + + - array: is a 2D np.ndarray + - num_cats: number of categories that the model is to be trained on. + - axis: the axis of array that holds the category label value. If axis=None, then this is inferred as the axis with the smallest size. + ''' + assert type(array) in (np.ndarray,list) + array = np.array(array) + assert array.ndim == 2 + if axis is None: + axis = np.argmin(array.shape) + else: + assert axis in (0,1) + assert array.shape[axis] == 1 + + N = array.shape[1 - axis] + array = array.reshape((1,N)) + + return np.eye(num_cats)[array][0] # Returns in the shape (N,num_cats) + +def shuffle(X: np.ndarray,y: np.ndarray,random_seed: bool=None) -> Tuple[np.ndarray]: + if random_seed is not None: + np.random.seed(random_seed) + permutation = np.random.permutation( X.shape[0] ) + X_shuffled = X[permutation] + y_shuffled = y[permutation] + print(X_shuffled.shape,y_shuffled.shape) + assert X.shape == X_shuffled.shape, f'X shape: {X.shape} | X shuffled shape: {X_shuffled.shape}' + return (X_shuffled, y_shuffled) + From eec892c928937a08e2baeb41313018683defcc55 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Mon, 27 Dec 2021 20:33:53 +0000 Subject: [PATCH 02/24] Update API usage for examples --- diagram_usecase.py | 12 ++++++------ mnist_dataloader.py | 6 +++--- model_analysis.py | 6 +++--- nn_iris.py | 8 ++++---- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/diagram_usecase.py b/diagram_usecase.py index 785afd5..bce5f06 100644 --- a/diagram_usecase.py +++ b/diagram_usecase.py @@ -1,4 +1,4 @@ -from src.cnn import CNN +import cnn import numpy as np np.set_printoptions(linewidth=200) @@ -7,18 +7,18 @@ y = np.array([[0],[1]]) -model = CNN(input_shape=(3,12,12)) +model = cnn.Model(input_shape=(3,12,12)) model.add_layer( - CNN.Conv_Layer(filt_shape=(3,3),num_filters=2,stride=1,padding=1) + cnn.layers.Conv2D(filt_shape=(3,3),num_filters=2,stride=1,padding=1) ) model.add_layer( - CNN.Pool_Layer(filt_shape=(3,3),stride=3,pool_type='mean') + cnn.layers.Pool(filt_shape=(3,3),stride=3,pool_type='mean') ) model.add_layer( - CNN.FC_Layer(num_nodes=9,activation='relu') + cnn.layers.FC(num_nodes=9,activation='relu') ) model.add_layer( - CNN.FC_Layer(num_nodes=2,activation='sigmoid') + cnn.layers.FC(num_nodes=2,activation='sigmoid') ) model.train([X],[y],epochs=1) diff --git a/mnist_dataloader.py b/mnist_dataloader.py index 8fdf97f..ff6ca09 100644 --- a/mnist_dataloader.py +++ b/mnist_dataloader.py @@ -1,6 +1,6 @@ import mnist import numpy as np -from src.cnn import CNN +from cnn.utils.processing import one_hot_encode np.set_printoptions(linewidth=200) @@ -23,8 +23,8 @@ def get_data(normalise=True,one_hot=True): test_labels = test_labels.reshape((1,len(test_labels))) # labels need to be 'one-hot encoded' - train_labels = CNN.one_hot_encode(train_labels,10) if one_hot else train_labels - test_labels = CNN.one_hot_encode(test_labels,10) if one_hot else test_labels + train_labels = one_hot_encode(train_labels,10) if one_hot else train_labels + test_labels = one_hot_encode(test_labels,10) if one_hot else test_labels print('Train images shape:', train_images.shape, 'Train labels shape:', train_labels.shape) print('Test images shape:', test_images.shape, 'Test labels shape:', test_labels.shape) diff --git a/model_analysis.py b/model_analysis.py index e359906..40219ee 100644 --- a/model_analysis.py +++ b/model_analysis.py @@ -1,7 +1,7 @@ -from src.cnn import CNN -from src.cnn_analyser import CNN_Analyser +from cnn.model import load_model +from model_analysis import CNN_Analyser -model = CNN.load_model('models/cnn_model_adam_tf_comparitor_vectorised_14-33-36.pkl') +model = load_model('models/cnn_model_adam_tf_comparitor_vectorised_14-33-36.pkl') CA = CNN_Analyser(model) # print(model.get_model_details()) diff --git a/nn_iris.py b/nn_iris.py index 4dcc1d5..6222b05 100644 --- a/nn_iris.py +++ b/nn_iris.py @@ -1,4 +1,4 @@ -from src import cnn +import cnn import numpy as np from sklearn.datasets import load_iris import matplotlib.pyplot as plt @@ -12,13 +12,13 @@ print(X.shape,Y_onehot.shape) # print(X,Y_onehot) -model = cnn.CNN(optimiser_method='adam') +model = cnn.Model(optimiser_method='adam') model.add_layer( - cnn.CNN.FC_Layer(3,input_shape=(4,1),activation='relu',initiation_method='kaiming_normal') + cnn.layers.FC(3,input_shape=(4,1),activation='relu',initiation_method='kaiming_normal') ) model.add_layer( - cnn.CNN.FC_Layer(3,activation='softmax',initiation_method='kaiming_normal') + cnn.layers.FC(3,activation='softmax',initiation_method='kaiming_normal') ) model.prepare_model() From 47339734796574aa765e656910bd2d130db517a5 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Mon, 27 Dec 2021 20:51:55 +0000 Subject: [PATCH 03/24] Add function type hints --- cnn/layers/activation.py | 4 +-- cnn/layers/conv.py | 72 +++++++++++++++++++------------------- cnn/layers/fc.py | 6 ++-- cnn/layers/flatten.py | 4 +-- cnn/layers/pool.py | 6 ++-- cnn/model.py | 28 ++++++++------- cnn/optimisers/__init__.py | 4 +++ cnn/optimisers/adam.py | 5 +-- cnn/optimisers/gd.py | 7 ++-- cnn/optimisers/rmsprop.py | 5 +-- 10 files changed, 76 insertions(+), 65 deletions(-) diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py index 3ab0fc8..9ac1af7 100644 --- a/cnn/layers/activation.py +++ b/cnn/layers/activation.py @@ -22,7 +22,7 @@ def prepare_layer(self): self.OUTPUT_SHAPE = self.INPUT_SHAPE # self.output = np.zeros(shape=self.INPUT_SHAPE ) - def _forwards(self,_input): + def _forwards(self,_input: np.ndarray) -> np.ndarray: if self.prev_layer.LAYER_TYPE == 'FC': assert len(_input.shape) == 2 and _input.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(_input.shape[0],1)}' self.input = _input @@ -64,7 +64,7 @@ def _forwards(self,_input): # print(f'Layer: {self.MODEL_STRUCTURE_INDEX} output:',self.output) return self.output - def _backwards(self,dC_dA): + def _backwards(self,dC_dA: np.ndarray) -> np.ndarray: """Compute derivative of Activation w.r.t. Z NOTE: CURRENTLY NOT SUPPORTED FOR CONV/POOL LAYERS. """ diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py index 413075e..41d980a 100644 --- a/cnn/layers/conv.py +++ b/cnn/layers/conv.py @@ -38,7 +38,7 @@ def __init__(self,filt_shape: tuple or int,num_filters: int=5,stride: int=1,padd self.VECTORISED = vectorised self.TRACK_HISTORY = track_history - def prepare_layer(self): + def prepare_layer(self) -> None: if self.prev_layer == None: # This means this is the first layer in the structure, so 'input' is the only thing before. assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' else: @@ -55,7 +55,7 @@ def prepare_layer(self): # Initiate params self.filters = CNNParam( - utils.array_init(shape=(self.NUM_FILTERS,self.INPUT_SHAPE[0],self.FILT_SHAPE[0],self.FILT_SHAPE[1]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED), + utils.array.array_init(shape=(self.NUM_FILTERS,self.INPUT_SHAPE[0],self.FILT_SHAPE[0],self.FILT_SHAPE[1]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED), trainable=True ) self.bias = CNNParam( @@ -77,15 +77,15 @@ def prepare_layer(self): pad_rows_needed = ((NUM_INPUT_ROWS - self.FILT_SHAPE[0]) % self.STRIDE) pad_cols_needed = ((NUM_INPUT_COLS - self.FILT_SHAPE[1]) % self.STRIDE) - self.COL_LEFT_PAD = pad_cols_needed // 2 # // Floor division - self.COL_RIGHT_PAD = math.ceil(pad_cols_needed / 2) - self.ROW_UP_PAD = pad_rows_needed // 2 # // Floor division - self.ROW_DOWN_PAD = math.ceil(pad_rows_needed / 2) + self._COL_LEFT_PAD = pad_cols_needed // 2 # // Floor division + self._COL_RIGHT_PAD = math.ceil(pad_cols_needed / 2) + self._ROW_UP_PAD = pad_rows_needed // 2 # // Floor division + self._ROW_DOWN_PAD = math.ceil(pad_rows_needed / 2) else: - self.COL_LEFT_PAD = self.COL_RIGHT_PAD = self.ROW_UP_PAD = self.ROW_DOWN_PAD = self.PADDING + self._COL_LEFT_PAD = self._COL_RIGHT_PAD = self._ROW_UP_PAD = self._ROW_DOWN_PAD = self.PADDING - col_out = int((NUM_INPUT_COLS + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 - row_out = int((NUM_INPUT_ROWS + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 + col_out = int((NUM_INPUT_COLS + (self._COL_LEFT_PAD + self._COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 + row_out = int((NUM_INPUT_ROWS + (self._ROW_DOWN_PAD + self._ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 self.OUTPUT_SHAPE = (self.NUM_FILTERS,row_out,col_out) @@ -94,13 +94,13 @@ def prepare_layer(self): assert self.OUTPUT_SHAPE[-2:] == self.INPUT_SHAPE[-2:], f'"SAME" padding chosen however last two dimensions of input and output shapes do not match; {self.INPUT_SHAPE} and {self.OUTPUT_SHAPE} respectively.' # Channels may differ. - def _forwards(self,_input): + def _forwards(self,_input: np.ndarray) -> np.ndarray: assert _input.ndim == 4 and _input.shape[1:] == self.INPUT_SHAPE, f'Input shape, {_input.shape[1:]}, expected to be, {self.INPUT_SHAPE} for each example (observation).' self.input = _input batch_size = _input.shape[0] # Apply the padding to the input. - self.padded_input = np.pad(self.input,[(0,0),(0,0),(self.ROW_UP_PAD,self.ROW_DOWN_PAD),(self.COL_LEFT_PAD,self.COL_RIGHT_PAD)],'constant',constant_values=(0,0)) + self.padded_input = np.pad(self.input,[(0,0),(0,0),(self._ROW_UP_PAD,self._ROW_DOWN_PAD),(self._COL_LEFT_PAD,self._COL_RIGHT_PAD)],'constant',constant_values=(0,0)) if self.VECTORISED: self.output = Conv2D.convolve_vectorised(self.padded_input,self.filters,self.STRIDE) @@ -123,31 +123,29 @@ def _forwards(self,_input): if self.TRACK_HISTORY: self._track_metrics(output=self.output) return self.output # NOTE: Output is 4D array of shape: ( BATCH_SIZE, NUM_FILTS, NUM_ROWS, NUM_COLS ) - def _backwards(self,cost_gradient): + def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray: assert cost_gradient.shape == self.output.shape, f'cost_gradient shape {cost_gradient.shape} does not match layer output shape {self.output.shape}.' if self.TRACK_HISTORY: self._track_metrics(cost_gradient=cost_gradient) - _,_, c_rows, c_cols = cost_gradient.shape - dilation_idx_row = np.arange(c_rows-1) + 1 # Intiatial indices for insertion of zeros - dilation_idx_col = np.arange(c_cols-1) + 1 # Intiatial indices for insertion of zeros - - cost_gradient_dilated = cost_gradient.copy() - if self.STRIDE != 1: - for n in range(1,self.STRIDE): # the n multiplier is to increment the indices in the non-uniform manner required. - cost_gradient_dilated = np.insert( - np.insert( cost_gradient_dilated, dilation_idx_row * n, 0, axis=2 ), - dilation_idx_col * n, 0, axis=3) - # print(f'cost_gradient shape: {cost_gradient.shape} | cost_gradient_dilated shape: {cost_gradient_dilated.shape}') + + cost_gradient_dilated = utils.array.dilate(cost_gradient,self.STRIDE-1) batch_size, channels, _, _ = self.padded_input.shape - # Account for filter not shifting over input an integer number of times with given stride. + # Account for filter not shifting over input an integer number of times with given stride. In this case, + # the 'effective input is smaller than the actual input. pxls_excl_x = (self.padded_input.shape[3] - self.FILT_SHAPE[1]) % self.STRIDE # pixels excluded in x direction (cols) pxls_excl_y = (self.padded_input.shape[2] - self.FILT_SHAPE[0]) % self.STRIDE # pixels excluded in y direction (rows) - # print('PIXELS EXCLUDED:',pxls_excl_x,pxls_excl_y) - # Find cost gradient wrt previous output and filters. + # Extract effective input + effective_input = self.padded_input[ + :, # All data points + :, # All channels + :self.padded_input.shape[2] - pxls_excl_y, # Only rows up to those excluded in forwards pass + :self.padded_input.shape[3] - pxls_excl_x # Only cols up to those excluded in forwards pass + ] + + # Find cost gradient wrt layer input and filters. rotated_filters = np.rot90( self.filters, k=2, axes=(2,3) ) # rotate 2x90 degs, rotating in direction of rows to columns. - dCdX_pad = np.zeros(shape=self.padded_input.shape) if self.VECTORISED: # NOTE: convolution function sums across channels; in this case we want to sum across batch data points so we # transpose the arrays to switch the 'channels' with the 'batch' fields. We then need to switch these back for the @@ -155,7 +153,7 @@ def _backwards(self,cost_gradient): dCdF = np.transpose( Conv2D.convolve_vectorised( np.transpose( - self.padded_input[:,:, :self.padded_input.shape[2] - pxls_excl_y, :self.padded_input.shape[3] - pxls_excl_x], + effective_input, axes=(1,0,2,3) ), np.transpose( @@ -166,7 +164,7 @@ def _backwards(self,cost_gradient): ), axes=(1,0,2,3)) # NOTE: Here we need to transpose the filters to allign the channels of the filters with the batched data points in the cost gradient array. - dCdX_pad[:,:, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] = Conv2D.convolve_vectorised( + effective_input_gradient = Conv2D.convolve_vectorised( cost_gradient_dilated, np.transpose( rotated_filters, @@ -176,12 +174,12 @@ def _backwards(self,cost_gradient): full_convolve=True) else: dCdF = np.zeros(shape=self.filters.shape) + effective_input_gradient = np.zeros(shape=self.padded_input.shape) for i in range(batch_size): for filt_index in range(self.NUM_FILTERS): for channel_index in range(channels): - dCdF[filt_index, channel_index] += Conv2D.convolve( self.padded_input[i,channel_index, :self.padded_input.shape[2] - pxls_excl_y, :self.padded_input.shape[3] - pxls_excl_x], cost_gradient_dilated[i,filt_index], stride=1 ) - dCdX_pad[i,channel_index, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] += Conv2D.convolve( cost_gradient_dilated[i,filt_index], rotated_filters[filt_index,channel_index], stride=1, full_convolve=True ) - # dCdF = dCdF[:,:, : dCdF.shape[2] - pxls_excl_y, : dCdF.shape[3] - pxls_excl_x] # Remove the values from right and bottom of array (this is where the excluded pixels will be). + dCdF[filt_index, channel_index] += Conv2D.convolve( effective_input[i,channel_index,:,:], cost_gradient_dilated[i,filt_index], stride=1 ) + effective_input_gradient[i,channel_index, :, :] += Conv2D.convolve( cost_gradient_dilated[i,filt_index], rotated_filters[filt_index,channel_index], stride=1, full_convolve=True ) # ADJUST THE FILTERS assert dCdF.shape == self.filters.shape, f'dCdF shape {dCdF.shape} does not match filters shape {self.filters.shape}.' @@ -196,14 +194,16 @@ def _backwards(self,cost_gradient): if self.bias.trainable: self.bias = self.model.OPTIMISER.update_param(self.bias) - # Remove padding that was added to the input array. - dCdX = dCdX_pad[ :, : , self.ROW_UP_PAD : dCdX_pad.shape[-2] - self.ROW_DOWN_PAD , self.COL_LEFT_PAD : dCdX_pad.shape[-1] - self.COL_RIGHT_PAD ] + # Obtain dCdX, accounting for padding and excluded input values + dCdX_pad = np.zeros(shape=self.padded_input.shape) + dCdX_pad[:,:, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] = effective_input + dCdX = dCdX_pad[ :, : , self._ROW_UP_PAD : dCdX_pad.shape[-2] - self._ROW_DOWN_PAD , self._COL_LEFT_PAD : dCdX_pad.shape[-1] - self._COL_RIGHT_PAD ] assert dCdX.shape == self.input.shape, f'dCdX shape [{dCdX.shape}] does not match layer input shape [{self.input.shape}].' return dCdX @staticmethod - def convolve(A, B, stride,full_convolve=False): + def convolve(A: np.ndarray, B: np.ndarray, stride: int,full_convolve: bool=False) -> np.ndarray: """ A and B are 2D arrays. Array B will be convolved over Array A using the stride provided. - 'full_convolve' is where the bottom right cell of B starts over the top of the top left cell of A and shifts by stride until the top left cell of B is over the bottom right cell of A. (i.e. A is padded in each dimension by B - 1 in the respective dimension). """ assert A.ndim == 2 @@ -234,7 +234,7 @@ def convolve(A, B, stride,full_convolve=False): return output @staticmethod - def convolve_vectorised(X,K, stride, full_convolve=False): + def convolve_vectorised(X: np.ndarray,K: np.ndarray, stride: int, full_convolve: bool=False) -> np.ndarray: """ X: 4D array of shape: (batch_size,channels,rows,cols) K: 4D array of shape: (num_filters,X_channels,rows,cols) diff --git a/cnn/layers/fc.py b/cnn/layers/fc.py index f81e4da..4468481 100644 --- a/cnn/layers/fc.py +++ b/cnn/layers/fc.py @@ -25,7 +25,7 @@ def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_met assert len(input_shape) == 2 and input_shape[1] == 1, 'Invalid input_shape tuple. Expected (n,1)' self.INPUT_SHAPE = input_shape - def prepare_layer(self): + def prepare_layer(self) -> None: if self.prev_layer is None: # This means this is the first layer in the structure, so 'input' is the only thing before. assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' else: @@ -49,7 +49,7 @@ def prepare_layer(self): # self.output = np.zeros(shape=(self.NUM_NODES,1)) # NOTE: This is a vertical array. - def _forwards(self,_input): + def _forwards(self,_input: np.ndarray) -> np.ndarray: # print(_input.shape) if self.prev_layer is None: self.input = _input.T @@ -64,7 +64,7 @@ def _forwards(self,_input): # print(f'Layer: {self.MODEL_STRUCTURE_INDEX} output:',self.output) return self.output - def _backwards(self, dC_dZ): + def _backwards(self, dC_dZ: np.ndarray) -> np.ndarray: """ Take cost gradient dC/dZ (how the output of this layer affects the cost) and backpropogate diff --git a/cnn/layers/flatten.py b/cnn/layers/flatten.py index 4d9b541..6cb20b3 100644 --- a/cnn/layers/flatten.py +++ b/cnn/layers/flatten.py @@ -21,7 +21,7 @@ def __init__(self,input_shape=None): self.NUM_PARAMS = 0 - def prepare_layer(self): + def prepare_layer(self) -> None: if self.prev_layer is None: # This means this is the first layer in the structure, so 'input' is the only thing before. assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' else: @@ -29,7 +29,7 @@ def prepare_layer(self): self.OUTPUT_SHAPE = (np.prod(self.INPUT_SHAPE),1) # Output shape for a single example. # self.output = np.zeros(shape=(np.prod(self.INPUT_SHAPE[1:]),self.INPUT_SHAPE[0])) - def _forwards(self,_input): + def _forwards(self,_input: np.ndarray) -> np.ndarray: assert _input.shape[1:] == self.INPUT_SHAPE, f'ERROR:: Input has unexpected shape: {_input.shape[1:]} | expected: {self.INPUT_SHAPE}' self.input = _input self.output = _input.T.reshape((-1,_input.shape[0])) # Taking transpose here puts each example into its own column - number of columns == number of examles. diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py index a7a162a..c1984cf 100644 --- a/cnn/layers/pool.py +++ b/cnn/layers/pool.py @@ -34,7 +34,7 @@ def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padd self.NUM_PARAMS = 0 - def prepare_layer(self): + def prepare_layer(self) -> np.ndarray: """ This needs to be done after the input has been identified - currently happens when train() is called. """ if self.prev_layer == None: # This means this is the first layer in the structure, so 'input' is the only thing before. assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' @@ -87,7 +87,7 @@ def prepare_layer(self): if self.PAD_TYPE == 'same': assert self.OUTPUT_SHAPE == self.INPUT_SHAPE # Channels may differ. - def _forwards(self,_input): + def _forwards(self,_input: np.ndarray) -> np.ndarray: assert _input.ndim == 4 and _input.shape[1:] == self.INPUT_SHAPE, f'Input shape, {_input.shape[1:]}, expected to be, {self.INPUT_SHAPE} for each example (observation).' self.input = _input @@ -137,7 +137,7 @@ def _forwards(self,_input): if self.TRACK_HISTORY: self._track_metrics(output=self.output) return self.output - def _backwards(self,cost_gradient): + def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray: ''' Backprop in pooling layer: - nothing to be updated as there are no weights in this layer. diff --git a/cnn/model.py b/cnn/model.py index 87fb73f..36dc119 100644 --- a/cnn/model.py +++ b/cnn/model.py @@ -2,9 +2,13 @@ import pickle import math from datetime import datetime as dt + +from cnn.layers.layer import Layer from . import layers from . import optimisers +from typing import Any, AnyStr + def load_model(name): assert name.split('.')[-1] == 'pkl' with open(name, 'rb') as file: @@ -29,7 +33,7 @@ def __init__(self,optimiser_method='gd'): self.structure = [] # defines order of model (list of layer objects) - EXCLUDES INPUT DATA self.layer_counts = dict(zip(['total'] + layers.layers,[0]*(len(layers.layers)+1))) # dict for counting number of each layer type - def add_layer(self,layer): + def add_layer(self,layer: Layer) -> None: if layer.LAYER_TYPE == 'ACTIVATION' and self.structure[-1].LAYER_TYPE == 'ACTIVATION': print('-- WARNING:: Two Activation Layers in subsequent positions in the model.') if layer.FUNCTION == self.structure[-1].FUNCTION: @@ -53,7 +57,7 @@ def add_layer(self,layer): layers.Activation(function=layer.ACTIVATION) ) - def remove_layer(self,index): + def remove_layer(self,index: int) -> None: self.structure.pop(index) if self.is_prepared: print('-- INFO:: Re-compiling model...') @@ -66,7 +70,7 @@ def get_model_details(self): return details - def prepare_model(self,optimiser='gd',learning_rate=None): + def prepare_model(self,optimiser: Any='gd',learning_rate=None): """ Called once final layer is added, each layer can now initiate its weights and biases. """ print('Preparing model...') @@ -74,7 +78,7 @@ def prepare_model(self,optimiser='gd',learning_rate=None): assert optimiser.lower() in optimisers.optimiser_names, f'Unrecognised optimiser name: {optimiser}; choose from: {optimisers.optimiser_names}' self.OPTIMISER = optimisers.from_name(optimiser,learning_rate) else: - assert optimiser.__class__.__name__ in optimisers.optimiser_names, f'Invalid optimiser: {optimiser}' + assert (isinstance(optimiser,optimisers.BaseOptimiser) and optimiser.__class__.__name__ in optimisers.optimiser_names), f'Invalid optimiser: {optimiser}' self.OPTIMISER = optimiser self.details = { @@ -110,7 +114,7 @@ def prepare_model(self,optimiser='gd',learning_rate=None): self.print_summary() print(f'Model Prepared: {self.is_prepared}') - def train(self,Xs,ys,epochs,max_batch_size=32,shuffle=False,random_seed=42,learning_rate=0.01,cost_fn='mse',beta1=0.9,beta2=0.999): + def train(self,Xs: np.ndarray,ys: np.ndarray,epochs: int,max_batch_size: int=32,shuffle: bool=False,random_seed: int=42,learning_rate: float=0.01,cost_fn: AnyStr='mse',beta1: float=0.9,beta2: float=0.999) -> dt: ''' Should take array of inputs and array of labels of the same length. @@ -179,7 +183,7 @@ def train(self,Xs,ys,epochs,max_batch_size=32,shuffle=False,random_seed=42,learn return dt.now(), dt.now() - train_start # returns training finish time and duration. - def _print_train_progress(self,batch_index): + def _print_train_progress(self,batch_index: int) -> None: progess_bar_length = 30 # characters (not including '[' ']') progress = (batch_index+1) / self.BATCH_COUNT progressor = '=' * int(progress * progess_bar_length) @@ -199,7 +203,7 @@ def _print_train_progress(self,batch_index): SUPPORTED_OPTIMISERS = ('gd','momentum','rmsprop','adam') - def _iterate_forwards(self): + def _iterate_forwards(self) -> None: for batch_ind in range(self.BATCH_COUNT): ind_lower = batch_ind * self.MAX_BATCH_SIZE # Lower bound of index range ind_upper = batch_ind * self.MAX_BATCH_SIZE + self.MAX_BATCH_SIZE # Upper bound of index range @@ -233,7 +237,7 @@ def _iterate_forwards(self): self._iterate_backwards() - def _iterate_backwards(self): + def _iterate_backwards(self) -> None: self.iteration_index += 1 self.history['cost'][self.iteration_index] = self.iteration_cost # Backpropagate the cost_gradient @@ -244,7 +248,7 @@ def _iterate_backwards(self): self.iteration_cost = 0 self.iteration_cost_gradient = 0 - def predict(self,Xs,training=False): + def predict(self,Xs: np.ndarray,training: bool=False) -> np.ndarray: if training: self.feed_forwards_cycle_index += 1 for layer in self.structure: Xs = layer._forwards(Xs) @@ -252,7 +256,7 @@ def predict(self,Xs,training=False): # print('Output:',X) return Xs - def evaluate(self,Xs,ys): + def evaluate(self,Xs: np.ndarray,ys: np.ndarray) -> int: predictions = self.predict(Xs,training=False) accuracy = np.sum((np.argmax(ys.T,axis=0) == np.argmax(predictions,axis=0))) / len(Xs) return accuracy @@ -269,7 +273,7 @@ def _initiate_tracking_metrics(self): SUPPORTED_COST_FUNCTIONS = ('mse','cross_entropy') - def cost(self,predictions,labels,derivative=False): + def cost(self,predictions: np.ndarray,labels: np.ndarray,derivative: bool=False) -> float: ''' Cost function to provide measure of model 'correctness'. returns vector cost value. ''' @@ -339,4 +343,4 @@ def print_summary(self): print('Total params:',total_trainable + total_non_trainable) print('Trainable params:',total_trainable) print('Non-trainable params:',total_non_trainable) - print('='*(np.sum(field_lengths) + len(field_names))) \ No newline at end of file + print('='*(np.sum(field_lengths) + len(field_names))) diff --git a/cnn/optimisers/__init__.py b/cnn/optimisers/__init__.py index adef7e9..288ee49 100644 --- a/cnn/optimisers/__init__.py +++ b/cnn/optimisers/__init__.py @@ -1,3 +1,4 @@ +from cnn.params import CNNParam from .adam import Adam from .gd import GradientDescent from .rmsprop import RMSProp @@ -20,4 +21,7 @@ def from_name(name,learning_rate): else: return optimiser(learning_rate=learning_rate) +class BaseOptimiser: + def update_param(param: CNNParam) -> np.ndarray: + raise NotImplementedError("Optimisers inheriting from this base class must implement update_param() method.") diff --git a/cnn/optimisers/adam.py b/cnn/optimisers/adam.py index 739c468..80f01da 100644 --- a/cnn/optimisers/adam.py +++ b/cnn/optimisers/adam.py @@ -1,9 +1,10 @@ import numpy as np +from cnn.optimisers import BaseOptimiser from cnn.params import CNNParam -class Adam: +class Adam(BaseOptimiser): """ Adaptive Movement Estimation Algorithm - combination of 'Gradient Descent with Momentum' and 'RMSprop' """ @@ -15,7 +16,7 @@ def __init__(self,learning_rate=0.001,beta1=0.9,beta2=0.999,epsilon=1e-8): self.BETA2 = beta2 # Second moment decay factor self.EPSILON = epsilon # This is a very small value just to avoid division by 0. - def update_param(self,param) -> np.ndarray: + def update_param(self,param: CNNParam) -> np.ndarray: # TODO: Change function sig. Needs to be consistent with other optimisers. if "momentum1" in param.associated_data: momentum1 = param.associated_data["momentum1"] diff --git a/cnn/optimisers/gd.py b/cnn/optimisers/gd.py index 9c7e518..7c3a1f6 100644 --- a/cnn/optimisers/gd.py +++ b/cnn/optimisers/gd.py @@ -1,13 +1,14 @@ +from cnn.optimisers import BaseOptimiser from cnn.params import CNNParam +import numpy as np - -class GradientDescent: +class GradientDescent(BaseOptimiser): ALIAS = 'gd' def __init__(self,learning_rate=0.001,beta=0.9,): self.ALPHA = learning_rate - def update_param(self,param): + def update_param(self,param: CNNParam) -> np.ndarray: return param - self.ALPHA * param.gradient diff --git a/cnn/optimisers/rmsprop.py b/cnn/optimisers/rmsprop.py index 576316f..2af18f3 100644 --- a/cnn/optimisers/rmsprop.py +++ b/cnn/optimisers/rmsprop.py @@ -1,8 +1,9 @@ import numpy as np +from cnn.optimisers import BaseOptimiser from cnn.params import CNNParam -class RMSProp: +class RMSProp(BaseOptimiser): ''' Root mean square propagation ''' ALIAS = 'rmsprop' @@ -12,7 +13,7 @@ def __init__(self,learning_rate=0.001,beta=0.9,epsilon=1e-8): self.EPSILON = epsilon self.BETA = beta - def update_param(self,param) -> np.ndarray: + def update_param(self,param: CNNParam) -> np.ndarray: if "momentum1" in param.associated_data["momentum1"]: s = param.associated_data["momentum1"] else: From 958b1893690f7fc6e42f510a440f944e18e6f235 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Mon, 27 Dec 2021 21:14:01 +0000 Subject: [PATCH 04/24] fix some import errors --- cnn/optimisers/__init__.py | 7 +++---- cnn/optimisers/adam.py | 1 - cnn/optimisers/base.py | 8 ++++++++ cnn/utils/__init__.py | 2 ++ cnn/utils/array.py | 2 +- 5 files changed, 14 insertions(+), 6 deletions(-) create mode 100644 cnn/optimisers/base.py diff --git a/cnn/optimisers/__init__.py b/cnn/optimisers/__init__.py index 288ee49..ef04035 100644 --- a/cnn/optimisers/__init__.py +++ b/cnn/optimisers/__init__.py @@ -1,14 +1,16 @@ from cnn.params import CNNParam +from .base import BaseOptimiser from .adam import Adam from .gd import GradientDescent from .rmsprop import RMSProp +import numpy as np # ------------- BELOW IS DYNAMIC TO AVAILABLE OPTIMISER CLASSES ---------------- # Expose list of all optimiser class names. import inspect import sys -__optimiser_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] +__optimiser_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], lambda cls: isinstance(cls,BaseOptimiser))] # Following includes both class name and alias property. optimiser_names = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] + [opt.ALIAS for opt in __optimiser_classes] @@ -21,7 +23,4 @@ def from_name(name,learning_rate): else: return optimiser(learning_rate=learning_rate) -class BaseOptimiser: - def update_param(param: CNNParam) -> np.ndarray: - raise NotImplementedError("Optimisers inheriting from this base class must implement update_param() method.") diff --git a/cnn/optimisers/adam.py b/cnn/optimisers/adam.py index 80f01da..2d70f24 100644 --- a/cnn/optimisers/adam.py +++ b/cnn/optimisers/adam.py @@ -17,7 +17,6 @@ def __init__(self,learning_rate=0.001,beta1=0.9,beta2=0.999,epsilon=1e-8): self.EPSILON = epsilon # This is a very small value just to avoid division by 0. def update_param(self,param: CNNParam) -> np.ndarray: - # TODO: Change function sig. Needs to be consistent with other optimisers. if "momentum1" in param.associated_data: momentum1 = param.associated_data["momentum1"] else: diff --git a/cnn/optimisers/base.py b/cnn/optimisers/base.py new file mode 100644 index 0000000..611e6ca --- /dev/null +++ b/cnn/optimisers/base.py @@ -0,0 +1,8 @@ +from cnn.params import CNNParam +import numpy as np + + +class BaseOptimiser: + ALIAS = 'base' + def update_param(param: CNNParam) -> np.ndarray: + raise NotImplementedError("Optimisers inheriting from this base class must implement update_param() method.") diff --git a/cnn/utils/__init__.py b/cnn/utils/__init__.py index e69de29..7f306a4 100644 --- a/cnn/utils/__init__.py +++ b/cnn/utils/__init__.py @@ -0,0 +1,2 @@ +from . import array +from . import processing diff --git a/cnn/utils/array.py b/cnn/utils/array.py index c5928e0..1059afc 100644 --- a/cnn/utils/array.py +++ b/cnn/utils/array.py @@ -41,7 +41,7 @@ def dilate(array: np.ndarray,channel_width: int) -> np.ndarray: dilation_idx_row = np.arange(rows-1) + 1 # Intiatial indices for insertion of zeros dilation_idx_col = np.arange(cols-1) + 1 # Intiatial indices for insertion of zeros dilated_array = array.copy() - for n in range(1,channel_width): # the n multiplier is to increment the indices in the non-uniform manner required. + for n in range(1,channel_width+1): # the n multiplier is to increment the indices in the non-uniform manner required. dilated_array = np.insert( np.insert( dilated_array, dilation_idx_row * n, 0, axis=2 ), dilation_idx_col * n, 0, axis=3) From d2d47b4768986ecc7d3ac455977f774a087a03ea Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Mon, 27 Dec 2021 21:14:18 +0000 Subject: [PATCH 05/24] fix conv backprop bug --- cnn/layers/conv.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py index 41d980a..87a32b1 100644 --- a/cnn/layers/conv.py +++ b/cnn/layers/conv.py @@ -174,7 +174,7 @@ def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray: full_convolve=True) else: dCdF = np.zeros(shape=self.filters.shape) - effective_input_gradient = np.zeros(shape=self.padded_input.shape) + effective_input_gradient = np.zeros(shape=effective_input.shape) for i in range(batch_size): for filt_index in range(self.NUM_FILTERS): for channel_index in range(channels): @@ -196,7 +196,7 @@ def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray: # Obtain dCdX, accounting for padding and excluded input values dCdX_pad = np.zeros(shape=self.padded_input.shape) - dCdX_pad[:,:, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] = effective_input + dCdX_pad[:,:, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] = effective_input_gradient dCdX = dCdX_pad[ :, : , self._ROW_UP_PAD : dCdX_pad.shape[-2] - self._ROW_DOWN_PAD , self._COL_LEFT_PAD : dCdX_pad.shape[-1] - self._COL_RIGHT_PAD ] assert dCdX.shape == self.input.shape, f'dCdX shape [{dCdX.shape}] does not match layer input shape [{self.input.shape}].' From a35e01321536a03f668b5c0ac35b4518ee524a4b Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Tue, 28 Dec 2021 11:58:32 +0000 Subject: [PATCH 06/24] refactor FC layer to use new CNNParam --- cnn/layers/fc.py | 73 ++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/cnn/layers/fc.py b/cnn/layers/fc.py index 4468481..2550b97 100644 --- a/cnn/layers/fc.py +++ b/cnn/layers/fc.py @@ -1,12 +1,15 @@ import numpy as np + +from cnn.params import CNNParam from .layer import Layer from cnn import utils class FC(Layer): """ - The Fully Connected Layer is defined as being the layer of nodes and the weights of the connections that link those nodes to the previous layer. + The Fully Connected Layer is defined as being the layer of nodes and the weights of the connections that link + those nodes to the previous layer. """ - def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_method=None,input_shape=None): + def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_method=None,input_shape=None,track_history=True): """ - n: Number of nodes in layer. - activation: The name of the activation function to be used. The activation is handled by an Activation object that is transparent to the user here. Defaults to None - a transparent Activation layer will still be added however, the data passing through will be untouched. @@ -24,6 +27,7 @@ def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_met if input_shape is not None: assert len(input_shape) == 2 and input_shape[1] == 1, 'Invalid input_shape tuple. Expected (n,1)' self.INPUT_SHAPE = input_shape + self.TRACK_HISTORY = track_history def prepare_layer(self) -> None: if self.prev_layer is None: # This means this is the first layer in the structure, so 'input' is the only thing before. @@ -31,37 +35,24 @@ def prepare_layer(self) -> None: else: self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE - self.params['weights'] = { - 'name':'weights', - 'trainable':True, - 'values':utils.array_init(shape=(self.NUM_NODES,self.INPUT_SHAPE[0]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED) # NOTE: this is the correct orientation for vertical node array. - } - - self.params['bias'] = { - 'name': 'bias', - 'trainable': True, - 'values': np.zeros(shape=(self.NUM_NODES,1)) # NOTE: Recommended to initaite biases to zero. - } - - # self.NUM_PARAMS = self.weights.size + self.bias.size - + self.weights = utils.array.array_init(shape=(self.NUM_NODES,self.INPUT_SHAPE[0]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED) # NOTE: this is the correct orientation for vertical node array. + + self.bias = np.zeros(shape=(self.NUM_NODES,1)) # NOTE: Recommended to initaite biases to zero. + self.OUTPUT_SHAPE = (self.NUM_NODES,1) - # self.output = np.zeros(shape=(self.NUM_NODES,1)) # NOTE: This is a vertical array. def _forwards(self,_input: np.ndarray) -> np.ndarray: - # print(_input.shape) if self.prev_layer is None: - self.input = _input.T + self.input = _input else: assert len(_input.shape) == 2 and _input.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(_input.shape[0],1)}' self.input = _input - self.output = np.dot( self.params['weights']['values'], self.input ) + self.params['bias']['values'] + self.output = np.dot( self.weights, self.input ) + self.bias assert len(self.output.shape) == 2 and self.output.shape[0] == self.OUTPUT_SHAPE[0], f'Output shape, {(self.output.shape[0],1)}, not as expected, {self.OUTPUT_SHAPE}' - self._track_metrics(output=self.output) - # print(f'Layer: {self.MODEL_STRUCTURE_INDEX} output:',self.output) + if self.TRACK_HISTORY: self._track_metrics(output=self.output) return self.output def _backwards(self, dC_dZ: np.ndarray) -> np.ndarray: @@ -72,27 +63,41 @@ def _backwards(self, dC_dZ: np.ndarray) -> np.ndarray: """ assert dC_dZ.shape == self.output.shape, f'dC/dZ shape, {dC_dZ.shape}, does not match Z shape, {self.output.shape}.' - self._track_metrics(cost_gradient=dC_dZ) + if self.TRACK_HISTORY: self._track_metrics(cost_gradient=dC_dZ) dZ_dW = self.input.T # Partial diff of weighted sum (Z) w.r.t. weights dZ_dB = 1 - dZ_dI = self.params['weights']['values'].T # Partial diff of weighted sum w.r.t. input to layer. + dZ_dI = self.weights.T # Partial diff of weighted sum w.r.t. input to layer. - # dC_dW.shape === W.shape = (n(l),n(l-1)) | dZ_dW.shape = (1,n(l-1)) - # dC_dW = np.multiply( dC_dZ , dZ_dW ) # Element-wise multiplication. The local gradient needs transposing for the multiplication. dC_dW = np.dot(dC_dZ,dZ_dW) - assert dC_dW.shape == self.params['weights']['values'].shape, f'dC/dW shape {dC_dW.shape} does not match W shape {self.params["weights"]["values"].shape}' - # self.weights = self.weights - ( self.model.LEARNING_RATE * dC_dW ) # NOTE: Adjustments done in opposite direction to dC_dZ - if self.params['weights']['trainable']: - self.params['weights']['values'] = self.model.OPTIMISER.update_param(self.params['weights'],dC_dW,self.MODEL_STRUCTURE_INDEX) + assert dC_dW.shape == self.weights.shape, f'dC/dW shape {dC_dW.shape} does not match W shape {self.weights.shape}' + self.weights.gradient = dC_dW + if self.weights.trainable: + self.weights = self.model.OPTIMISER.update_param(self.weights) dC_dB = np.sum(dC_dZ * dZ_dB, axis=1,keepdims=True) # Element-wise multiplication (dZ_dB turns out to be just 1) - assert dC_dB.shape == self.params['bias']['values'].shape, f'dC/dB shape {dC_dB.shape} does not match B shape {self.params["bias"]["values"].shape}' - # self.bias = self.bias - ( self.model.LEARNING_RATE * dC_dB ) # NOTE: Adjustments done in opposite direction to dC_dZ - if self.params['bias']['trainable']: - self.params['bias']['values'] = self.model.OPTIMISER.update_param(self.params['bias'],dC_dB,self.MODEL_STRUCTURE_INDEX) + assert dC_dB.shape == self.bias.shape, f'dC/dB shape {dC_dB.shape} does not match B shape {self.bias.shape}' + self.bias.gradient = dC_dB + if self.bias.trainable: + self.bias = self.model.OPTIMISER.update_param(self.bias) dC_dI = np.dot( dZ_dI , dC_dZ ) assert dC_dI.shape == self.input.shape, f'dC/dI shape {dC_dI.shape} does not match input shape {self.input.shape}.' return dC_dI + + @property + def weights(self): + return self._weights + + @weights.setter + def weights(self,value): + self._weights = CNNParam(value) + + @property + def bias(self): + return self._bias + + @bias.setter + def bias(self,value): + self._bias = CNNParam(value) From 9430bde0b46723a75aaae56f86bff5c65e4ee707 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Wed, 29 Dec 2021 17:40:36 +0000 Subject: [PATCH 07/24] Add FC layer test case --- tests/test_fc_layer/test_case_1.py | 118 +++++++++++++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 tests/test_fc_layer/test_case_1.py diff --git a/tests/test_fc_layer/test_case_1.py b/tests/test_fc_layer/test_case_1.py new file mode 100644 index 0000000..8ad7291 --- /dev/null +++ b/tests/test_fc_layer/test_case_1.py @@ -0,0 +1,118 @@ +""" +TC1: +- 5 nodes +- input shape (5,1) +- bias = 0.2 (5,1) +""" +from _pytest.assertion import pytest_sessionfinish +import pytest +from cnn.layers import FC +import numpy as np + +from cnn.params import CNNParam + +@pytest.fixture +def n(): + return 5 + +@pytest.fixture +def input_shape(): + return (5,1) + +@pytest.fixture +def batch_size(): + return 2 + +@pytest.fixture +def fc_layer(n,input_shape): + layer = FC( + n, + input_shape=input_shape, + track_history=False + ) + layer.prepare_layer() + layer.weights = np.arange(n*input_shape[0]).reshape((n,input_shape[0])) + layer.weights.trainable = False + layer.bias = np.array([[0.2]]*n) + layer.bias.trainable = False + return layer + +@pytest.fixture +def forwards_input(input_shape,batch_size): + return np.arange(input_shape[0]*batch_size).reshape((input_shape[0],batch_size)) + +@pytest.fixture +def forwards_expected_result(): + return np.array( + [ + [60.2, 70.2], + [160.2, 195.2], + [260.2, 320.2], + [360.2, 445.2], + [460.2, 570.2] + ] + ) + +@pytest.fixture +def backwards_input(forwards_expected_result): + return np.arange(np.prod(forwards_expected_result.shape)).reshape(forwards_expected_result.shape) + +@pytest.fixture +def backwards_expected_output(): + return np.array( + [ + [ 300, 350 ], + [ 320, 375 ], + [ 340, 400 ], + [ 360, 425 ], + [ 380, 450 ] + ] + ) + +@pytest.fixture +def backwards_expected_weights_gradient(): + return np.array( + [ + [ 1, 3, 5, 7, 9], + [ 3, 13, 23, 33, 43], + [ 5, 23, 41, 59, 77], + [ 7, 33, 59, 85, 111], + [ 9, 43, 77, 111, 145] + ] + ) + +@pytest.fixture +def backwards_expected_bias_gradient(backwards_input): + return backwards_input.sum(axis=1,keepdims=True) + +def test_param_class_persistance(fc_layer): + assert isinstance(fc_layer.weights,CNNParam) + assert isinstance(fc_layer.bias,CNNParam) + fc_layer.weights = [1,2,3,4] + fc_layer.bias = [1,2,3] + assert isinstance(fc_layer.weights,CNNParam) + assert isinstance(fc_layer.bias,CNNParam) + +def test_forwards(fc_layer,forwards_input,forwards_expected_result): + + assert np.array_equal( + fc_layer._forwards(forwards_input), + forwards_expected_result + ) + +def test_backwards(fc_layer,backwards_input,backwards_expected_output,backwards_expected_weights_gradient,backwards_expected_bias_gradient,forwards_input,forwards_expected_result): + fc_layer.input = forwards_input + fc_layer.output = forwards_expected_result + + assert np.array_equal( + fc_layer._backwards(backwards_input), + backwards_expected_output + ) + assert np.array_equal( + fc_layer.weights.gradient, + backwards_expected_weights_gradient + ) + assert np.array_equal( + fc_layer.bias.gradient, + backwards_expected_bias_gradient + ) From 365718290a49b3c1b9d00e7b548c3aec503ca854 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Wed, 29 Dec 2021 18:00:09 +0000 Subject: [PATCH 08/24] Minor Pool layer refactor --- cnn/layers/pool.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py index c1984cf..1ae04d8 100644 --- a/cnn/layers/pool.py +++ b/cnn/layers/pool.py @@ -43,10 +43,6 @@ def prepare_layer(self) -> np.ndarray: assert len(self.INPUT_SHAPE) == 3, 'Invalid INPUT_SHAPE' - # # Convert 2D input to 3D. - # if len(self.INPUT_SHAPE) == 2: - # self.INPUT_SHAPE = tuple([1]) + self.INPUT_SHAPE - NUM_INPUT_ROWS = self.INPUT_SHAPE[-2] NUM_INPUT_COLS = self.INPUT_SHAPE[-1] @@ -83,7 +79,6 @@ def prepare_layer(self) -> np.ndarray: row_out = int((NUM_INPUT_ROWS + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 self.OUTPUT_SHAPE = (self.INPUT_SHAPE[0],row_out,col_out) - # self.output = np.zeros(shape=(self.INPUT_SHAPE[0],row_out,col_out)) # Output initiated. if self.PAD_TYPE == 'same': assert self.OUTPUT_SHAPE == self.INPUT_SHAPE # Channels may differ. @@ -107,7 +102,7 @@ def _forwards(self,_input: np.ndarray) -> np.ndarray: X_flat_pooled = np.mean(self.Xsliced, axis=2) elif self.POOL_TYPE == 'min': X_flat_pooled = np.min(self.Xsliced,axis=2) - self.output = X_flat_pooled.reshape((self.padded_input.shape[0],*self.OUTPUT_SHAPE)) + self.output = X_flat_pooled.reshape((self.padded_input.shape[0],*self.OUTPUT_SHAPE)) else: self.output = np.zeros(shape=(self.input.shape[0],*self.OUTPUT_SHAPE)) batch_size, channels, proc_rows, proc_cols = self.padded_input.shape @@ -118,14 +113,12 @@ def _forwards(self,_input: np.ndarray) -> np.ndarray: curr_x = out_x = 0 while curr_x <= proc_cols - self.FILT_SHAPE[1]: for channel_index in range(channels): + sub_arr = self.padded_input[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x + self.FILT_SHAPE[1] ] if self.POOL_TYPE == 'max': - sub_arr = self.padded_input[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x+ self.FILT_SHAPE[1] ] self.output[i,channel_index, out_y, out_x] = np.max( sub_arr ) elif self.POOL_TYPE == 'min': - sub_arr = self.padded_input[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x+ self.FILT_SHAPE[1] ] self.output[i,channel_index, out_y, out_x] = np.min( sub_arr ) elif self.POOL_TYPE == 'mean': - sub_arr = self.padded_input[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x + self.FILT_SHAPE[1] ] self.output[i,channel_index, out_y, out_x] = np.mean( sub_arr ) curr_x += self.STRIDE From 39d48d38949a0cccda96136aeae5e5b39708e650 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Wed, 29 Dec 2021 18:40:45 +0000 Subject: [PATCH 09/24] refactored padding calculations into array util --- cnn/layers/conv.py | 33 +++++---------------------------- cnn/layers/pool.py | 39 ++++++--------------------------------- cnn/utils/array.py | 35 ++++++++++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 62 deletions(-) diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py index 87a32b1..2f5e414 100644 --- a/cnn/layers/conv.py +++ b/cnn/layers/conv.py @@ -46,13 +46,6 @@ def prepare_layer(self) -> None: assert len(self.INPUT_SHAPE) == 3, 'Invalid INPUT_SHAPE' - # # Convert 2D input to 3D. - # if len(self.INPUT_SHAPE) == 2: - # self.INPUT_SHAPE = tuple([1]) + self.INPUT_SHAPE - - NUM_INPUT_ROWS = self.INPUT_SHAPE[-2] - NUM_INPUT_COLS = self.INPUT_SHAPE[-1] - # Initiate params self.filters = CNNParam( utils.array.array_init(shape=(self.NUM_FILTERS,self.INPUT_SHAPE[0],self.FILT_SHAPE[0],self.FILT_SHAPE[1]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED), @@ -64,28 +57,12 @@ def prepare_layer(self) -> None: ) # Need to account for padding. - if self.PAD_TYPE != None: - if self.PAD_TYPE == 'same': - pad_cols_needed = max((NUM_INPUT_COLS - 1) * self.STRIDE + self.FILT_SHAPE[1] - NUM_INPUT_COLS, 0) - pad_rows_needed = max((NUM_INPUT_ROWS - 1) * self.STRIDE + self.FILT_SHAPE[0] - NUM_INPUT_ROWS, 0) - elif self.PAD_TYPE == 'valid': - # TensoFlow definition of this is "no padding". The input is just processed as-is. - pad_rows_needed = pad_cols_needed = 0 - elif self.PAD_TYPE == 'include': - # Here we will implement the padding method to avoid input data being excluded/ missed by the convolution. - # - This happens when, (I_dim - F_dim) % stride != 0 - pad_rows_needed = ((NUM_INPUT_ROWS - self.FILT_SHAPE[0]) % self.STRIDE) - pad_cols_needed = ((NUM_INPUT_COLS - self.FILT_SHAPE[1]) % self.STRIDE) - - self._COL_LEFT_PAD = pad_cols_needed // 2 # // Floor division - self._COL_RIGHT_PAD = math.ceil(pad_cols_needed / 2) - self._ROW_UP_PAD = pad_rows_needed // 2 # // Floor division - self._ROW_DOWN_PAD = math.ceil(pad_rows_needed / 2) - else: - self._COL_LEFT_PAD = self._COL_RIGHT_PAD = self._ROW_UP_PAD = self._ROW_DOWN_PAD = self.PADDING + self.COL_LEFT_PAD, self.COL_RIGHT_PAD, self.ROW_UP_PAD, self.ROW_DOWN_PAD = utils.array.determine_padding( + self.PAD_TYPE, self.PADDING, self.INPUT_SHAPE, self.FILT_SHAPE, self.STRIDE + ) - col_out = int((NUM_INPUT_COLS + (self._COL_LEFT_PAD + self._COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 - row_out = int((NUM_INPUT_ROWS + (self._ROW_DOWN_PAD + self._ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 + col_out = int((self.INPUT_SHAPE[1] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 + row_out = int((self.INPUT_SHAPE[0] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 self.OUTPUT_SHAPE = (self.NUM_FILTERS,row_out,col_out) diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py index 1ae04d8..85b9474 100644 --- a/cnn/layers/pool.py +++ b/cnn/layers/pool.py @@ -1,6 +1,7 @@ import numpy as np from .layer import Layer import math +from cnn import utils class Pool(Layer): def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padding: int=0,pad_type: str=None,input_shape=None,vectorised=True,track_history=True): @@ -43,40 +44,12 @@ def prepare_layer(self) -> np.ndarray: assert len(self.INPUT_SHAPE) == 3, 'Invalid INPUT_SHAPE' - NUM_INPUT_ROWS = self.INPUT_SHAPE[-2] - NUM_INPUT_COLS = self.INPUT_SHAPE[-1] - - # Need to account for padding. - if self.PAD_TYPE != None: - if self.PAD_TYPE == 'same': - nopad_out_cols = math.ceil(float(NUM_INPUT_COLS) / float(self.STRIDE)) - pad_cols_needed = max((nopad_out_cols - 1) * self.STRIDE + self.FILT_SHAPE[1] - NUM_INPUT_COLS, 0) - nopad_out_rows = math.ceil(float(NUM_INPUT_ROWS) / float(self.STRIDE)) - pad_rows_needed = max((nopad_out_rows - 1) * self.STRIDE + self.FILT_SHAPE[0] - NUM_INPUT_ROWS, 0) - elif self.PAD_TYPE == 'valid': - # TensoFlow definition of this is "no padding". The input is just processed as-is. - pad_rows_needed = pad_cols_needed = 0 - elif self.PAD_TYPE == 'include': - # Here we will implement the padding method to avoid input data being excluded/ missed by the convolution. - # - This happens when, (I_dim - F_dim) % stride != 0 - if (NUM_INPUT_ROWS - self.FILT_SHAPE[0]) % self.STRIDE != 0: - pad_rows_needed = self.FILT_SHAPE[0] - ((NUM_INPUT_ROWS - self.FILT_SHAPE[0]) % self.STRIDE) - else: - pad_rows_needed = 0 - if (NUM_INPUT_COLS - self.FILT_SHAPE[1]) % self.STRIDE != 0: - pad_cols_needed = self.FILT_SHAPE[1] - ((NUM_INPUT_COLS - self.FILT_SHAPE[1]) % self.STRIDE) - else: - pad_cols_needed = 0 - - self.COL_LEFT_PAD = pad_cols_needed // 2 # // Floor division - self.COL_RIGHT_PAD = math.ceil(pad_cols_needed / 2) - self.ROW_UP_PAD = pad_rows_needed // 2 # // Floor division - self.ROW_DOWN_PAD = math.ceil(pad_rows_needed / 2) - else: - self.COL_LEFT_PAD = self.COL_RIGHT_PAD = self.ROW_UP_PAD = self.ROW_DOWN_PAD = self.PADDING + self.COL_LEFT_PAD, self.COL_RIGHT_PAD, self.ROW_UP_PAD, self.ROW_DOWN_PAD = utils.array.determine_padding( + self.PAD_TYPE, self.PADDING, self.INPUT_SHAPE, self.FILT_SHAPE, self.STRIDE + ) - col_out = int((NUM_INPUT_COLS + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 - row_out = int((NUM_INPUT_ROWS + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 + col_out = int((self.INPUT_SHAPE[1] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 + row_out = int((self.INPUT_SHAPE[0] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 self.OUTPUT_SHAPE = (self.INPUT_SHAPE[0],row_out,col_out) if self.PAD_TYPE == 'same': diff --git a/cnn/utils/array.py b/cnn/utils/array.py index 1059afc..b031db1 100644 --- a/cnn/utils/array.py +++ b/cnn/utils/array.py @@ -1,5 +1,6 @@ +import math import numpy as np - +from typing import AnyStr, Tuple def array_init(shape: tuple,method=None,seed=None) -> np.ndarray: ''' Random initialisation of weights array. @@ -46,3 +47,35 @@ def dilate(array: np.ndarray,channel_width: int) -> np.ndarray: np.insert( dilated_array, dilation_idx_row * n, 0, axis=2 ), dilation_idx_col * n, 0, axis=3) return dilated_array + +def determine_padding(pad_type: AnyStr,pad_size: int,shape_array_1: Tuple[int],shape_array_2: Tuple[int],stride: int) -> Tuple[int]: + """ Function to determine required padding at each edge of the array, according to the specified requirements. + array_1 refers to the larger of the two arrays that will have array_2 slide over it. """ + if pad_type is None: + col_left_pad = col_right_pad = row_up_pad = row_down_pad = pad_size + else: + if pad_type == 'same': + nopad_out_cols = math.ceil(float(shape_array_1[1]) / float(stride)) + pad_cols_needed = max((nopad_out_cols - 1) * stride + shape_array_2[1] - shape_array_1[1], 0) + nopad_out_rows = math.ceil(float(shape_array_1[0]) / float(stride)) + pad_rows_needed = max((nopad_out_rows - 1) * stride + shape_array_2[0] - shape_array_1[0], 0) + elif pad_type == 'valid': + # TensoFlow definition of this is "no padding". The input is just processed as-is. + pad_rows_needed = pad_cols_needed = 0 + elif pad_type == 'include': + # Here we will implement the padding method to avoid input data being excluded/ missed by the convolution. + # - This happens when, (I_dim - F_dim) % stride != 0 + if (shape_array_1[0] - shape_array_2[0]) % stride != 0: + pad_rows_needed = shape_array_2[0] - ((shape_array_1[0] - shape_array_2[0]) % stride) + else: + pad_rows_needed = 0 + if (shape_array_1[1] - shape_array_2[1]) % stride != 0: + pad_cols_needed = shape_array_2[1] - ((shape_array_1[1] - shape_array_2[1]) % stride) + else: + pad_cols_needed = 0 + + col_left_pad = pad_cols_needed // 2 # // Floor division + col_right_pad = math.ceil(pad_cols_needed / 2) + row_up_pad = pad_rows_needed // 2 # // Floor division + row_down_pad = math.ceil(pad_rows_needed / 2) + return col_left_pad, col_right_pad, row_up_pad, row_down_pad From 8212dce8a4a308a4a9c2c2abfd85a181c7ea29a2 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Wed, 29 Dec 2021 21:04:58 +0000 Subject: [PATCH 10/24] Fix Pool layer bug: output shape calculation --- cnn/layers/pool.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py index 85b9474..1d6347d 100644 --- a/cnn/layers/pool.py +++ b/cnn/layers/pool.py @@ -47,9 +47,8 @@ def prepare_layer(self) -> np.ndarray: self.COL_LEFT_PAD, self.COL_RIGHT_PAD, self.ROW_UP_PAD, self.ROW_DOWN_PAD = utils.array.determine_padding( self.PAD_TYPE, self.PADDING, self.INPUT_SHAPE, self.FILT_SHAPE, self.STRIDE ) - - col_out = int((self.INPUT_SHAPE[1] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 - row_out = int((self.INPUT_SHAPE[0] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 + col_out = int((self.INPUT_SHAPE[2] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 + row_out = int((self.INPUT_SHAPE[1] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 self.OUTPUT_SHAPE = (self.INPUT_SHAPE[0],row_out,col_out) if self.PAD_TYPE == 'same': From 0d965f97e060832bff454f0f0de06d32ea43d6bc Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Wed, 29 Dec 2021 21:05:53 +0000 Subject: [PATCH 11/24] Pool Layer test case 1 --- tests/test_pool_layer/test_case_1.py | 133 +++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 tests/test_pool_layer/test_case_1.py diff --git a/tests/test_pool_layer/test_case_1.py b/tests/test_pool_layer/test_case_1.py new file mode 100644 index 0000000..8fc4e65 --- /dev/null +++ b/tests/test_pool_layer/test_case_1.py @@ -0,0 +1,133 @@ +""" +TC1: +- filt shape (3,3) +- stride 2 +- pool type max +- input shape (2,5,5) +""" +import pytest +from cnn.layers import Pool +import numpy as np + +@pytest.fixture +def input_shape(): + return (2,5,5) + +@pytest.fixture +def pool_layer(): + layer = Pool( + filt_shape=(3,3), + stride=2, + pool_type='max', + input_shape=(2,5,5), + vectorised=False, + track_history=False + ) + layer.prepare_layer() + return layer + +@pytest.fixture +def forwards_input(input_shape): + batch_size = 2 + return np.arange(batch_size*np.prod(input_shape)).reshape((batch_size,*input_shape)) + +@pytest.fixture +def forwards_expected_result(): + return np.array( + [ + [ + [ + [12, 14], + [22, 24] + ], + [ + [37, 39], + [47, 49] + ] + ], + [ + [ + [62, 64], + [72, 74] + ], + [ + [87, 89], + [97, 99] + ] + ] + ] + ) + +@pytest.fixture +def backwards_input(forwards_expected_result): + return np.arange(np.prod(forwards_expected_result.shape)).reshape((forwards_expected_result.shape)) + +@pytest.fixture +def backwards_expected_result(): + return np.array( + [ + [ + [ + [0,0,0,0,0], + [0,0,0,0,0], + [0,0,0,0,1], + [0,0,0,0,0], + [0,0,2,0,3] + ], + [ + [0,0,0,0,0], + [0,0,0,0,0], + [0,0,4,0,5], + [0,0,0,0,0], + [0,0,6,0,7] + ] + ], + [ + [ + [0,0,0,0,0], + [0,0,0,0,0], + [0,0,8,0,9], + [0,0,0,0,0], + [0,0,10,0,11] + ], + [ + [0,0,0,0,0], + [0,0,0,0,0], + [0,0,12,0,13], + [0,0,0,0,0], + [0,0,14,0,15] + ] + ] + ] + ) + +def test_forwards(pool_layer,forwards_input,forwards_expected_result): + assert np.array_equal( + pool_layer._forwards(forwards_input), + forwards_expected_result + ) + +def test_vectorised_forwards(pool_layer,forwards_input,forwards_expected_result): + pool_layer.VECTORISED = True + assert np.array_equal( + pool_layer._forwards(forwards_input), + forwards_expected_result + ) + +def test_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result): + pool_layer.input = forwards_input + pool_layer.padded_input = forwards_input + pool_layer.output = forwards_expected_result + assert np.array_equal( + pool_layer._backwards(backwards_input), + backwards_expected_result + ) + +def test_vectorised_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result): + pool_layer.VECTORISED = True + pool_layer._forwards(forwards_input) + pool_layer.output = forwards_expected_result + assert np.array_equal( + pool_layer._backwards(backwards_input), + backwards_expected_result + ) From eb3adb6754cb6a6c6fc722c30cd8de3f5d84eac2 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Thu, 30 Dec 2021 17:53:08 +0000 Subject: [PATCH 12/24] Fix Pool layer backprop bug with 'mean' method --- cnn/layers/pool.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py index 1d6347d..87bddef 100644 --- a/cnn/layers/pool.py +++ b/cnn/layers/pool.py @@ -120,22 +120,30 @@ def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray: assert cost_gradient.shape == self.output.shape if self.TRACK_HISTORY: self._track_metrics(cost_gradient=cost_gradient) # Initiate to input shape. - dC_dIpad = np.zeros_like(self.padded_input) + dC_dIpad = np.zeros(self.padded_input.shape,dtype=np.float64) batch_size, channels, padded_rows, padded_cols = dC_dIpad.shape if self.VECTORISED: + # Distribution array represents a boolean array indicating which data points the cost gradient should flow back through. if self.POOL_TYPE == 'max': distribution_arr = (np.max(self.Xsliced,axis=2,keepdims=True) == self.Xsliced).astype(int) elif self.POOL_TYPE == 'min': distribution_arr = (np.min(self.Xsliced,axis=2,keepdims=True) == self.Xsliced).astype(int) elif self.POOL_TYPE == 'mean': - distribution_arr = np.ones_like(self.Xsliced) + distribution_arr = np.ones(self.Xsliced.shape) + + # The cost gradient array is 'flattened' so that each column corresponds to the sub array from the forwards propagation cg_flat = cost_gradient.reshape((*self.Xsliced.shape[:2],1,self.Xsliced.shape[-1])) * distribution_arr + + # Here the cost gradient values are combined to form the cost gradient values corresponding to each of the values in + # the padded input. col_index = 0 for vstart in range(0,self.padded_input.shape[-2] - self.FILT_SHAPE[0] + 1, self.STRIDE): for hstart in range(0, self.padded_input.shape[-1] - self.FILT_SHAPE[1] + 1, self.STRIDE): - dC_dIpad[:,:,vstart:vstart+self.FILT_SHAPE[0],hstart:hstart+self.FILT_SHAPE[1]] += np.transpose(cg_flat[:,:,:,col_index].reshape((*self.padded_input.shape[:2],*self.FILT_SHAPE[::-1])),axes=(0,1,3,2)) + dC_dIpad[:,:,vstart:vstart+self.FILT_SHAPE[0],hstart:hstart+self.FILT_SHAPE[1]] += np.transpose( + cg_flat[:,:,:,col_index].reshape((*self.padded_input.shape[:2],*self.FILT_SHAPE[::-1])), + axes=(0,1,3,2)) col_index += 1 else: # Step over the array similarly to the forwards pass and compute the expanded cost gradients. @@ -150,24 +158,22 @@ def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray: if self.POOL_TYPE == 'max': # Set value of node that corresponds with the max value node of the input to the cost gradient value at (cost_y,cost_x) max_node_y, max_node_x = np.array( np.unravel_index( np.argmax( sub_arr ), sub_arr.shape ) ) + np.array([curr_y, curr_x]) # addition of curr_y & curr_x is to get position in padded_input array (not just local sub_arr). - dC_dIpad[i, channel_index, max_node_y, max_node_x] += cost_val elif self.POOL_TYPE == 'min': # Set value of node that corresponds with the min value node of the input to the cost gradient value at (cost_y,cost_x) min_node_y, min_node_x = np.array( np.unravel_index( np.argmin( sub_arr ), sub_arr.shape ) ) + np.array([curr_y, curr_x]) # addition of curr_y & curr_x is to get position in padded_input array (not just local sub_arr). - dC_dIpad[i, channel_index, min_node_y, min_node_x] += cost_val elif self.POOL_TYPE == 'mean': - sub_arr_props = sub_arr / sub_arr.sum() - - dC_dIpad[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x + self.FILT_SHAPE[1] ] += sub_arr_props * cost_val + # Set all of the values associated with each sub-array from forwards pass as the corresponding cost gradient value; + # summing values where sub-arrays overlap. + dC_dIpad[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x + self.FILT_SHAPE[1] ] += cost_val curr_x += self.STRIDE cost_x += 1 curr_y += self.STRIDE cost_y += 1 - # Remove padding that was added to the input array. + # Remove padding that was added to the input array to obtain the cost gradient array for the layer input. dC_dI = dC_dIpad[ :, : , self.ROW_UP_PAD : dC_dIpad.shape[-2] - self.ROW_DOWN_PAD , self.COL_LEFT_PAD : dC_dIpad.shape[-1] - self.COL_RIGHT_PAD ] assert dC_dI.shape == self.input.shape, f'dC/dI shape [{dC_dI.shape}] does not match layer input shape [{self.input.shape}].' return dC_dI From 726d74f45f5fa77e9861d81bd5fbf112cb9ef604 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Thu, 30 Dec 2021 18:03:45 +0000 Subject: [PATCH 13/24] Fix Conv2D layer bug in calculating output shape --- cnn/layers/conv.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py index 2f5e414..e0f48f4 100644 --- a/cnn/layers/conv.py +++ b/cnn/layers/conv.py @@ -57,12 +57,12 @@ def prepare_layer(self) -> None: ) # Need to account for padding. - self.COL_LEFT_PAD, self.COL_RIGHT_PAD, self.ROW_UP_PAD, self.ROW_DOWN_PAD = utils.array.determine_padding( + self._COL_LEFT_PAD, self._COL_RIGHT_PAD, self._ROW_UP_PAD, self._ROW_DOWN_PAD = utils.array.determine_padding( self.PAD_TYPE, self.PADDING, self.INPUT_SHAPE, self.FILT_SHAPE, self.STRIDE ) - col_out = int((self.INPUT_SHAPE[1] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 - row_out = int((self.INPUT_SHAPE[0] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 + col_out = int((self.INPUT_SHAPE[2] + (self._COL_LEFT_PAD + self._COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1 + row_out = int((self.INPUT_SHAPE[1] + (self._ROW_DOWN_PAD + self._ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1 self.OUTPUT_SHAPE = (self.NUM_FILTERS,row_out,col_out) From ed32f9e2b6dd3636e6b94b61748fde83473d12df Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Thu, 30 Dec 2021 18:04:53 +0000 Subject: [PATCH 14/24] Add TC2 and 3 for Pool layer --- tests/test_pool_layer/test_case_2.py | 133 ++++++++++++++++++++++++++ tests/test_pool_layer/test_case_3.py | 136 +++++++++++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 tests/test_pool_layer/test_case_2.py create mode 100644 tests/test_pool_layer/test_case_3.py diff --git a/tests/test_pool_layer/test_case_2.py b/tests/test_pool_layer/test_case_2.py new file mode 100644 index 0000000..c5ff640 --- /dev/null +++ b/tests/test_pool_layer/test_case_2.py @@ -0,0 +1,133 @@ +""" +TC1: +- filt shape (3,3) +- stride 2 +- pool type min +- input shape (2,5,5) +""" +import pytest +from cnn.layers import Pool +import numpy as np + +@pytest.fixture +def input_shape(): + return (2,5,5) + +@pytest.fixture +def pool_layer(): + layer = Pool( + filt_shape=(3,3), + stride=2, + pool_type='min', + input_shape=(2,5,5), + vectorised=False, + track_history=False + ) + layer.prepare_layer() + return layer + +@pytest.fixture +def forwards_input(input_shape): + batch_size = 2 + return np.arange(batch_size*np.prod(input_shape)).reshape((batch_size,*input_shape)) + +@pytest.fixture +def forwards_expected_result(): + return np.array( + [ + [ + [ + [0, 2], + [10, 12] + ], + [ + [25, 27], + [35, 37] + ] + ], + [ + [ + [50, 52], + [60, 62] + ], + [ + [75, 77], + [85, 87] + ] + ] + ] + ) + +@pytest.fixture +def backwards_input(forwards_expected_result): + return np.arange(np.prod(forwards_expected_result.shape)).reshape((forwards_expected_result.shape)) + +@pytest.fixture +def backwards_expected_result(): + return np.array( + [ + [ + [ + [0,0,1,0,0], + [0,0,0,0,0], + [2,0,3,0,0], + [0,0,0,0,0], + [0,0,0,0,0] + ], + [ + [4,0,5,0,0], + [0,0,0,0,0], + [6,0,7,0,0], + [0,0,0,0,0], + [0,0,0,0,0] + ] + ], + [ + [ + [8,0,9,0,0], + [0,0,0,0,0], + [10,0,11,0,0], + [0,0,0,0,0], + [0,0,0,0,0] + ], + [ + [12,0,13,0,0], + [0,0,0,0,0], + [14,0,15,0,0], + [0,0,0,0,0], + [0,0,0,0,0] + ] + ] + ] + ) + +def test_forwards(pool_layer,forwards_input,forwards_expected_result): + assert np.array_equal( + pool_layer._forwards(forwards_input), + forwards_expected_result + ) + +def test_vectorised_forwards(pool_layer,forwards_input,forwards_expected_result): + pool_layer.VECTORISED = True + assert np.array_equal( + pool_layer._forwards(forwards_input), + forwards_expected_result + ) + +def test_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result): + pool_layer.input = forwards_input + pool_layer.padded_input = forwards_input + pool_layer.output = forwards_expected_result + assert np.array_equal( + pool_layer._backwards(backwards_input), + backwards_expected_result + ) + +def test_vectorised_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result): + pool_layer.VECTORISED = True + pool_layer._forwards(forwards_input) + pool_layer.output = forwards_expected_result + assert np.array_equal( + pool_layer._backwards(backwards_input), + backwards_expected_result + ) diff --git a/tests/test_pool_layer/test_case_3.py b/tests/test_pool_layer/test_case_3.py new file mode 100644 index 0000000..ce217d7 --- /dev/null +++ b/tests/test_pool_layer/test_case_3.py @@ -0,0 +1,136 @@ +""" +TC1: +- filt shape (3,3) +- stride 2 +- pool type mean +- input shape (2,5,5) +""" +import pytest +from cnn.layers import Pool +import numpy as np + +@pytest.fixture +def input_shape(): + return (2,5,5) + +@pytest.fixture +def pool_layer(): + layer = Pool( + filt_shape=(3,3), + stride=2, + pool_type='mean', + input_shape=(2,5,5), + vectorised=False, + track_history=False + ) + layer.prepare_layer() + return layer + +@pytest.fixture +def forwards_input(input_shape): + batch_size = 2 + return np.arange(batch_size*np.prod(input_shape)).reshape((batch_size,*input_shape)) + +@pytest.fixture +def forwards_expected_result(): + return np.array( + [ + [ + [ + [6, 8], + [16, 18] + ], + [ + [31, 33], + [41, 43] + ] + ], + [ + [ + [56, 58], + [66, 68] + ], + [ + [81, 83], + [91, 93] + ] + ] + ] + ) + +@pytest.fixture +def backwards_input(forwards_expected_result): + return np.arange(np.prod(forwards_expected_result.shape)).reshape((forwards_expected_result.shape)) + +@pytest.fixture +def backwards_expected_result(): + return np.array( + [ + [ + [ + [0,0,1,1,1], + [0,0,1,1,1], + [2,2,6,4,4], + [2,2,5,3,3], + [2,2,5,3,3] + ], + [ + [4,4,9,5,5], + [4,4,9,5,5], + [10,10,22,12,12], + [6,6,13,7,7], + [6,6,13,7,7] + ] + ], + [ + [ + [8,8,17,9,9], + [8,8,17,9,9], + [18,18,38,20,20], + [10,10,21,11,11], + [10,10,21,11,11] + ], + [ + [12,12,25,13,13], + [12,12,25,13,13], + [26,26,54,28,28], + [14,14,29,15,15], + [14,14,29,15,15] + ] + ] + ] + ) + +def test_forwards(pool_layer,forwards_input,forwards_expected_result): + assert np.array_equal( + pool_layer._forwards(forwards_input), + forwards_expected_result + ) + +def test_vectorised_forwards(pool_layer,forwards_input,forwards_expected_result): + pool_layer.VECTORISED = True + assert np.array_equal( + pool_layer._forwards(forwards_input), + forwards_expected_result + ) + +def test_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result): + pool_layer.input = forwards_input + pool_layer.padded_input = forwards_input + pool_layer.output = forwards_expected_result + print(pool_layer._backwards(backwards_input)) + assert np.array_equal( + pool_layer._backwards(backwards_input), + backwards_expected_result + ) + +def test_vectorised_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result): + pool_layer.VECTORISED = True + pool_layer._forwards(forwards_input) + pool_layer.output = forwards_expected_result + print(backwards_expected_result) + print(pool_layer._backwards(backwards_input)) + assert np.array_equal( + pool_layer._backwards(backwards_input), + backwards_expected_result + ) From c3942d5428c2b66ce90205a5ac26a4fdd3cf54a4 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Thu, 30 Dec 2021 18:04:59 +0000 Subject: [PATCH 15/24] Add init files in test modules to resolve filename conflicts (global namespace) --- tests/test_conv_layer/__init__.py | 0 tests/test_fc_layer/__init__.py | 0 tests/test_pool_layer/__init__.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/test_conv_layer/__init__.py create mode 100644 tests/test_fc_layer/__init__.py create mode 100644 tests/test_pool_layer/__init__.py diff --git a/tests/test_conv_layer/__init__.py b/tests/test_conv_layer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_fc_layer/__init__.py b/tests/test_fc_layer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_pool_layer/__init__.py b/tests/test_pool_layer/__init__.py new file mode 100644 index 0000000..e69de29 From 32b61fdd805a4824896f2d7d3a57bcafb7da82db Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Mon, 3 Jan 2022 17:25:31 +0000 Subject: [PATCH 16/24] Clean param counting --- cnn/layers/activation.py | 2 -- cnn/layers/flatten.py | 2 -- cnn/layers/layer.py | 37 ++++++++++++++++++------------------- cnn/layers/pool.py | 2 -- cnn/model.py | 31 ++----------------------------- 5 files changed, 20 insertions(+), 54 deletions(-) diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py index 9ac1af7..a842cba 100644 --- a/cnn/layers/activation.py +++ b/cnn/layers/activation.py @@ -12,8 +12,6 @@ def __init__(self,function: str=None,alpha=0.01,input_shape=None): self.FUNCTION = None if function is None else function.lower() - self.NUM_PARAMS = 0 - def prepare_layer(self): if self.prev_layer is None: # This means this is the first layer in the structure, so 'input' is the only thing before. assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' diff --git a/cnn/layers/flatten.py b/cnn/layers/flatten.py index 6cb20b3..59a33c4 100644 --- a/cnn/layers/flatten.py +++ b/cnn/layers/flatten.py @@ -19,8 +19,6 @@ def __init__(self,input_shape=None): assert len(input_shape) == 3, f'ERROR: Expected input_shape to be a tuple of length 3; (channels, height, width).' self.INPUT_SHAPE = input_shape - self.NUM_PARAMS = 0 - def prepare_layer(self) -> None: if self.prev_layer is None: # This means this is the first layer in the structure, so 'input' is the only thing before. assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' diff --git a/cnn/layers/layer.py b/cnn/layers/layer.py index 81d20dc..bd0898e 100644 --- a/cnn/layers/layer.py +++ b/cnn/layers/layer.py @@ -1,5 +1,7 @@ import numpy as np import sys +from cnn.params import CNNParam +from typing import Tuple, Union class Layer: ''' @@ -12,7 +14,6 @@ def __init__(self): self.prev_layer = None self.output = None - self.params = {} def _initiate_history(self): out_init_arr = np.zeros(self.model.EPOCHS * self.model.N) @@ -75,23 +76,21 @@ def define_details(self): return details - def count_params(self): - ''' - params = { - 'param_name': { - 'trainable':True, - 'values':[....] <--- np.ndarray - } - } - ''' + def count_params(self,split_trainable=True) -> Union(Tuple, int): + """ Sums sizes of any parameter attributes of the layer object. + 'parameter' is defined as any attribute that is of type 'CNNParam'. + + Returns: Tuple(trainable, non trainable) [if split_trainable is True]; total params otherwise. + """ trainable = 0 non_trainable = 0 - - - for param in self.params: - if param.trainable: - trainable += param.values.size - else: - non_trainable += param.values.size - - return trainable, non_trainable \ No newline at end of file + for att in self.__dict__.values(): + if isinstance(att,CNNParam): + if att.trainable: + trainable += att.size + else: + non_trainable += att.size + if split_trainable: + return trainable, non_trainable + else: + return trainable + non_trainable diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py index 87bddef..d1bbcd1 100644 --- a/cnn/layers/pool.py +++ b/cnn/layers/pool.py @@ -33,8 +33,6 @@ def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padd self.VECTORISED = vectorised self.TRACK_HISTORY = track_history - self.NUM_PARAMS = 0 - def prepare_layer(self) -> np.ndarray: """ This needs to be done after the input has been identified - currently happens when train() is called. """ if self.prev_layer == None: # This means this is the first layer in the structure, so 'input' is the only thing before. diff --git a/cnn/model.py b/cnn/model.py index 36dc119..17a19f1 100644 --- a/cnn/model.py +++ b/cnn/model.py @@ -99,18 +99,12 @@ def prepare_model(self,optimiser: Any='gd',learning_rate=None): curr_layer.MODEL_STRUCTURE_INDEX = index - # print(f'Preparing Layer:: Type = {curr_layer.LAYER_TYPE} | Structure index = {curr_layer.MODEL_STRUCTURE_INDEX}') curr_layer.prepare_layer() - # print('--> Num params:',curr_layer.NUM_PARAMS) - # print('--> Expected output shape:',curr_layer.OUTPUT_SHAPE) if curr_layer.MODEL_STRUCTURE_INDEX == 0: # First layer; set model input shape. self.INPUT_SHAPE = curr_layer.INPUT_SHAPE - # self.details['param_counts'].append(curr_layer.NUM_PARAMS) - # self.details['output_shapes'].append(curr_layer.OUTPUT_SHAPE) self.is_prepared = True - # print(self.details) self.print_summary() print(f'Model Prepared: {self.is_prepared}') @@ -211,12 +205,8 @@ def _iterate_forwards(self) -> None: ind_upper = self.N self.current_batch_size = ind_upper - ind_lower - # print('Lower index:',ind_lower,'Upper index:',ind_upper) - # print(self.Xs) - # print(self.BATCH_COUNT, self.Xs.shape) batch_Xs = self.Xs[ ind_lower : ind_upper ].copy() batch_ys = self.ys[ ind_lower : ind_upper ].copy() - # print(batch_Xs.shape,batch_ys.shape) predictions = self.predict(batch_Xs,training=True) @@ -225,14 +215,7 @@ def _iterate_forwards(self) -> None: batch_correct = np.sum((np.argmax(batch_ys.T,axis=0) == np.argmax(predictions,axis=0))) self.epoch_accuracy = (self.epoch_accuracy * ind_lower + batch_correct) / (ind_upper+1) - # for ex_ind , X in enumerate(batch_Xs): # For each example (observation) - # print(X.shape) - # prediction = self.predict(X,training=True) - - # self.iteration_cost += self.cost(prediction, batch_ys[ex_ind],batch_size=batch_size) - # self.iteration_cost_gradient += self.cost(prediction, batch_ys[ex_ind],batch_size=batch_size,derivative=True) - - # print(f'-- Epoch: {self.epoch_ind+1}/{self.EPOCHS } | Batch: {batch_ind+1}/{self.BATCH_COUNT} | Cost: {self.iteration_cost}') + self._print_train_progress(batch_ind) self._iterate_backwards() @@ -252,8 +235,6 @@ def predict(self,Xs: np.ndarray,training: bool=False) -> np.ndarray: if training: self.feed_forwards_cycle_index += 1 for layer in self.structure: Xs = layer._forwards(Xs) - # print('Layer index:',layer.MODEL_STRUCTURE_INDEX) - # print('Output:',X) return Xs def evaluate(self,Xs: np.ndarray,ys: np.ndarray) -> int: @@ -290,9 +271,7 @@ def cost(self,predictions: np.ndarray,labels: np.ndarray,derivative: bool=False) return -( 2 * error ) / batch_size # Vector elif self.COST_FN == 'cross_entropy': if not derivative: - # print('logprobs:',np.log(predictions)) cost = -np.sum(labels * np.log(predictions)) / batch_size - # print('Cost:',cost) return cost else: return - np.divide(labels,predictions) / batch_size @@ -321,13 +300,7 @@ def print_summary(self): index = str(layer.MODEL_STRUCTURE_INDEX) type_ = layer.LAYER_TYPE out_shape = layer.OUTPUT_SHAPE - trainable_params = 0 - non_trainable_params = 0 - for _,param in layer.params.items(): - if param['trainable']: - trainable_params += param['values'].size - else: - non_trainable_params += param['values'].size + trainable_params, non_trainable_params = layer.count_params(split_trainable=True) total_trainable += trainable_params total_non_trainable += non_trainable_params info_str = ' ' + index + ' '*(field_lengths[0] - len(index)-1) + \ From 53f3beb4bf1cb93f1178878e313e99f4d9bb02e3 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Mon, 3 Jan 2022 17:51:34 +0000 Subject: [PATCH 17/24] Clean up layer 'trainable' attribute and establish link with params --- cnn/layers/activation.py | 3 +-- cnn/layers/conv.py | 1 - cnn/layers/fc.py | 1 - cnn/layers/flatten.py | 2 +- cnn/layers/layer.py | 40 +++++++++++++++++++++++++++++++++------- cnn/layers/pool.py | 2 +- 6 files changed, 36 insertions(+), 13 deletions(-) diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py index a842cba..08ae202 100644 --- a/cnn/layers/activation.py +++ b/cnn/layers/activation.py @@ -6,7 +6,7 @@ def __init__(self,function: str=None,alpha=0.01,input_shape=None): super().__init__() self.LAYER_TYPE = self.__class__.__name__ + ' (' + function + ')' - self.TRAINABLE = False + self.trainable = False self.alpha = alpha self.INPUT_SHAPE = input_shape @@ -18,7 +18,6 @@ def prepare_layer(self): else: self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE self.OUTPUT_SHAPE = self.INPUT_SHAPE - # self.output = np.zeros(shape=self.INPUT_SHAPE ) def _forwards(self,_input: np.ndarray) -> np.ndarray: if self.prev_layer.LAYER_TYPE == 'FC': diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py index e0f48f4..031ee19 100644 --- a/cnn/layers/conv.py +++ b/cnn/layers/conv.py @@ -20,7 +20,6 @@ def __init__(self,filt_shape: tuple or int,num_filters: int=5,stride: int=1,padd super().__init__() self.LAYER_TYPE = self.__class__.__name__ - self.TRAINABLE = True if type(filt_shape) == tuple: assert len(filt_shape) == 2, 'Expected 2 dimensional tuple in form: (rows,cols)' self.FILT_SHAPE = filt_shape # 2D tuple describing num rows and cols diff --git a/cnn/layers/fc.py b/cnn/layers/fc.py index 2550b97..3aa3d24 100644 --- a/cnn/layers/fc.py +++ b/cnn/layers/fc.py @@ -19,7 +19,6 @@ def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_met super().__init__() self.LAYER_TYPE = self.__class__.__name__ - self.TRAINABLE = True self.NUM_NODES = num_nodes self.ACTIVATION = None if activation is None else activation.lower() self.RANDOM_SEED = random_seed diff --git a/cnn/layers/flatten.py b/cnn/layers/flatten.py index 59a33c4..fada80b 100644 --- a/cnn/layers/flatten.py +++ b/cnn/layers/flatten.py @@ -14,7 +14,7 @@ def __init__(self,input_shape=None): super().__init__() self.LAYER_TYPE = self.__class__.__name__ - self.TRAINABLE = False + self.trainable = False if input_shape is not None: assert len(input_shape) == 3, f'ERROR: Expected input_shape to be a tuple of length 3; (channels, height, width).' self.INPUT_SHAPE = input_shape diff --git a/cnn/layers/layer.py b/cnn/layers/layer.py index bd0898e..fa3bdfe 100644 --- a/cnn/layers/layer.py +++ b/cnn/layers/layer.py @@ -76,7 +76,7 @@ def define_details(self): return details - def count_params(self,split_trainable=True) -> Union(Tuple, int): + def count_params(self,split_trainable=True) -> Union[Tuple, int]: """ Sums sizes of any parameter attributes of the layer object. 'parameter' is defined as any attribute that is of type 'CNNParam'. @@ -84,13 +84,39 @@ def count_params(self,split_trainable=True) -> Union(Tuple, int): """ trainable = 0 non_trainable = 0 - for att in self.__dict__.values(): - if isinstance(att,CNNParam): - if att.trainable: - trainable += att.size - else: - non_trainable += att.size + for param in self.get_params(): + if param.trainable: + trainable += param.size + else: + non_trainable += param.size if split_trainable: return trainable, non_trainable else: return trainable + non_trainable + + def get_params(self): + params = [] + for att in self.__dict__.values(): + if isinstance(att,CNNParam): + params.append(att) + return params + + @property + def trainable(self): + try: + return self._trainable + except AttributeError as e: + # Defaults to 'True' + self.trainable = True + return self._trainable + + @trainable.setter + def trainable(self,value): + """ When setting to trainability of the layer, this should link with the param trainability; + i.e. set the value for each param. + This relationship is one-directional. + """ + assert isinstance(value,bool), f"{self}.trainable must be a boolean value." + self._trainable = value + for param in self.get_params(): + param.trainable = value diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py index d1bbcd1..f722690 100644 --- a/cnn/layers/pool.py +++ b/cnn/layers/pool.py @@ -16,7 +16,7 @@ def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padd super().__init__() self.LAYER_TYPE = self.__class__.__name__ - self.TRAINABLE = False + self.trainable = False if type(filt_shape) == tuple: assert len(filt_shape) == 2, 'Expected 2 dimensional tuple in form: (rows,cols)' self.FILT_SHAPE = filt_shape # 2D tuple describing num rows and cols From ee766d0524b42ea0ff7203d292270470b648d436 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Mon, 3 Jan 2022 18:33:33 +0000 Subject: [PATCH 18/24] Clean up LAYER_TYPE attribute --- cnn/layers/activation.py | 1 - cnn/layers/conv.py | 1 - cnn/layers/fc.py | 1 - cnn/layers/flatten.py | 1 - cnn/layers/layer.py | 1 + cnn/layers/pool.py | 1 - cnn/model.py | 14 +++++++------- 7 files changed, 8 insertions(+), 12 deletions(-) diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py index 08ae202..8495643 100644 --- a/cnn/layers/activation.py +++ b/cnn/layers/activation.py @@ -5,7 +5,6 @@ class Activation(Layer): def __init__(self,function: str=None,alpha=0.01,input_shape=None): super().__init__() - self.LAYER_TYPE = self.__class__.__name__ + ' (' + function + ')' self.trainable = False self.alpha = alpha self.INPUT_SHAPE = input_shape diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py index 031ee19..d1c8ea7 100644 --- a/cnn/layers/conv.py +++ b/cnn/layers/conv.py @@ -19,7 +19,6 @@ def __init__(self,filt_shape: tuple or int,num_filters: int=5,stride: int=1,padd assert num_filters > 0, 'Cannot use less than 1 filter in Conv Layer.' super().__init__() - self.LAYER_TYPE = self.__class__.__name__ if type(filt_shape) == tuple: assert len(filt_shape) == 2, 'Expected 2 dimensional tuple in form: (rows,cols)' self.FILT_SHAPE = filt_shape # 2D tuple describing num rows and cols diff --git a/cnn/layers/fc.py b/cnn/layers/fc.py index 3aa3d24..705052d 100644 --- a/cnn/layers/fc.py +++ b/cnn/layers/fc.py @@ -18,7 +18,6 @@ def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_met """ super().__init__() - self.LAYER_TYPE = self.__class__.__name__ self.NUM_NODES = num_nodes self.ACTIVATION = None if activation is None else activation.lower() self.RANDOM_SEED = random_seed diff --git a/cnn/layers/flatten.py b/cnn/layers/flatten.py index fada80b..57273d2 100644 --- a/cnn/layers/flatten.py +++ b/cnn/layers/flatten.py @@ -13,7 +13,6 @@ def __init__(self,input_shape=None): super().__init__() - self.LAYER_TYPE = self.__class__.__name__ self.trainable = False if input_shape is not None: assert len(input_shape) == 3, f'ERROR: Expected input_shape to be a tuple of length 3; (channels, height, width).' diff --git a/cnn/layers/layer.py b/cnn/layers/layer.py index fa3bdfe..47692a0 100644 --- a/cnn/layers/layer.py +++ b/cnn/layers/layer.py @@ -8,6 +8,7 @@ class Layer: ABSTRACT LAYER CLASS FOR ALL LAYER TYPES ''' def __init__(self): + self.LAYER_TYPE = self.__class__.__name__ self.model = None self.next_layer = None diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py index f722690..752e795 100644 --- a/cnn/layers/pool.py +++ b/cnn/layers/pool.py @@ -15,7 +15,6 @@ def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padd ''' super().__init__() - self.LAYER_TYPE = self.__class__.__name__ self.trainable = False if type(filt_shape) == tuple: assert len(filt_shape) == 2, 'Expected 2 dimensional tuple in form: (rows,cols)' diff --git a/cnn/model.py b/cnn/model.py index 17a19f1..d50efc2 100644 --- a/cnn/model.py +++ b/cnn/model.py @@ -34,7 +34,7 @@ def __init__(self,optimiser_method='gd'): self.layer_counts = dict(zip(['total'] + layers.layers,[0]*(len(layers.layers)+1))) # dict for counting number of each layer type def add_layer(self,layer: Layer) -> None: - if layer.LAYER_TYPE == 'ACTIVATION' and self.structure[-1].LAYER_TYPE == 'ACTIVATION': + if layer.LAYER_TYPE == 'Activation' and self.structure[-1].LAYER_TYPE == 'Activation': print('-- WARNING:: Two Activation Layers in subsequent positions in the model.') if layer.FUNCTION == self.structure[-1].FUNCTION: print('--- INFO:: Both Activation Layers are the same, skipping creation of second layer.') @@ -43,15 +43,15 @@ def add_layer(self,layer: Layer) -> None: layer.model = self if len(self.structure) > 0: - if layer.__class__.__name__ == 'FC' and self.structure[-1].__class__.__name__ not in ('Flatten','FC','Activation'): + if layer.LAYER_TYPE == 'FC' and self.structure[-1].LAYER_TYPE not in ('Flatten','FC','Activation'): # If no Flatten layer added before adding first FC layer, one will be added automatically. self.add_layer(layers.Flatten()) self.structure.append(layer) - self.layer_counts[layer.__class__.__name__] += 1 + self.layer_counts[layer.LAYER_TYPE] += 1 self.layer_counts['total'] += 1 - if layer.__class__.__name__ == 'FC': + if layer.LAYER_TYPE == 'FC': # Create the Activation Layer (transparent to user). self.add_layer( layers.Activation(function=layer.ACTIVATION) @@ -128,7 +128,7 @@ def train(self,Xs: np.ndarray,ys: np.ndarray,epochs: int,max_batch_size: int=32, ys = ys.reshape(-1,1) if ys.ndim == 1 else ys # --------- ASSERTIONS ----------- # Check shapes and orientation are as expected - assert self.structure[-1].__class__.__name__ in ('FC','Activation'), 'Model must have either FC or ACTIVATION as final layer.' + assert self.structure[-1].LAYER_TYPE in ('FC','Activation'), 'Model must have either FC or Activation as final layer.' assert Xs.shape[0] == ys.shape[0], f'Dimension (0) of input data [{Xs.shape}] and labels [{ys.shape}] does not match.' assert Xs.ndim in (2,4), 'Xs must be either 2 dimensions (for NN) or 4 dimensions (for Model).' if Xs.ndim == 4: @@ -237,7 +237,7 @@ def predict(self,Xs: np.ndarray,training: bool=False) -> np.ndarray: Xs = layer._forwards(Xs) return Xs - def evaluate(self,Xs: np.ndarray,ys: np.ndarray) -> int: + def evaluate(self,Xs: np.ndarray,ys: np.ndarray) -> float: predictions = self.predict(Xs,training=False) accuracy = np.sum((np.argmax(ys.T,axis=0) == np.argmax(predictions,axis=0))) / len(Xs) return accuracy @@ -298,7 +298,7 @@ def print_summary(self): total_non_trainable = 0 for layer in self.structure: index = str(layer.MODEL_STRUCTURE_INDEX) - type_ = layer.LAYER_TYPE + type_ = layer.LAYER_TYPE + ' (' + layer.FUNCTION + ')' if layer.LAYER_TYPE == "Activation" else layer.LAYER_TYPE out_shape = layer.OUTPUT_SHAPE trainable_params, non_trainable_params = layer.count_params(split_trainable=True) total_trainable += trainable_params From 468a8ffeda1fff974b4d48486957b558171f2b7f Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Sat, 21 May 2022 18:37:17 +0100 Subject: [PATCH 19/24] feat: add activations module group including ReLU. --- cnn/layers/activations/__init__.py | 2 + cnn/layers/activations/base.py | 19 ++++++ cnn/layers/activations/relu.py | 22 +++++++ tests/test_activations/__init__.py | 0 tests/test_activations/test_relu/__init__.py | 0 .../test_activations/test_relu/test_case_1.py | 65 +++++++++++++++++++ 6 files changed, 108 insertions(+) create mode 100644 cnn/layers/activations/__init__.py create mode 100644 cnn/layers/activations/base.py create mode 100644 cnn/layers/activations/relu.py create mode 100644 tests/test_activations/__init__.py create mode 100644 tests/test_activations/test_relu/__init__.py create mode 100644 tests/test_activations/test_relu/test_case_1.py diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py new file mode 100644 index 0000000..5c01e85 --- /dev/null +++ b/cnn/layers/activations/__init__.py @@ -0,0 +1,2 @@ +from .relu import ReLU + diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py new file mode 100644 index 0000000..f6045f1 --- /dev/null +++ b/cnn/layers/activations/base.py @@ -0,0 +1,19 @@ +from ..layer import Layer + +class BaseActivation(Layer): + def __init__(self,function: str=None,alpha=0.01,input_shape=None): + super().__init__() + + self.trainable = False + self.alpha = alpha + self.INPUT_SHAPE = input_shape + + self.FUNCTION = None if function is None else function.lower() + + def prepare_layer(self): + if self.prev_layer is None: # This means this is the first layer in the structure, so 'input' is the only thing before. + assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' + else: + self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE + self.OUTPUT_SHAPE = self.INPUT_SHAPE + diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py new file mode 100644 index 0000000..b52e9e8 --- /dev/null +++ b/cnn/layers/activations/relu.py @@ -0,0 +1,22 @@ +import numpy as np +from .base import BaseActivation + +class ReLU(BaseActivation): + + def _forwards(self,X:np.ndarray): + self.input = X.copy() + self.output = np.maximum(self.input,0) + return self.output + + def _backwards(self,dCdA:np.ndarray): + # Init dAdZ as square array representing all connections between input and output nodes + dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0])) # TODO: Will need varifying for Conv Activation. + + # Insert layer input along dAdZ diagonals - values > 0 -> 1; values <= 0 -> 0 + ix,iy = np.diag_indices_from(dAdZ[0,:,:]) + dAdZ[:,iy,ix] = (self.input.T > 0).astype(int) + + dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1)) + dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded) + + return dC_dZexpanded.reshape(dCdA.shape[1],-1).T diff --git a/tests/test_activations/__init__.py b/tests/test_activations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_activations/test_relu/__init__.py b/tests/test_activations/test_relu/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_activations/test_relu/test_case_1.py b/tests/test_activations/test_relu/test_case_1.py new file mode 100644 index 0000000..64583f6 --- /dev/null +++ b/tests/test_activations/test_relu/test_case_1.py @@ -0,0 +1,65 @@ +""" +TC1: +- input (2,5) +""" +import numpy as np +import pytest +from cnn.layers.activations import ReLU + +@pytest.fixture +def input_shape(): + return (2,5) + +@pytest.fixture +def relu_layer(input_shape): + layer = ReLU(input_shape=input_shape) + class DummyPrevLayer: + output = np.zeros(input_shape) + OUTPUT_SHAPE = input_shape + layer.prev_layer = DummyPrevLayer() + layer.prepare_layer() + return layer + +@pytest.fixture +def forwards_input(input_shape): + arr = np.arange(np.prod(input_shape)).reshape(input_shape).astype(np.float) + median = np.median(arr) + arr -= median + return arr + +@pytest.fixture +def forwards_expected_result(): + return np.array( + [ + [0, 0, 0, 0, 0], + [0.5, 1.5, 2.5, 3.5, 4.5] + ] + ) + +@pytest.fixture +def backwards_input(relu_layer): + out_shape = relu_layer.OUTPUT_SHAPE + return np.arange(np.prod(out_shape)).reshape(out_shape) + +@pytest.fixture +def backwards_expected_result(): + return np.array( + [ + [0, 0, 0, 0, 0], + [5, 6, 7, 8, 9] + ] + ) + +def test_forwards(relu_layer,forwards_input,forwards_expected_result): + assert np.array_equal( + relu_layer._forwards(forwards_input), + forwards_expected_result + ) + +def test_backwards(relu_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result): + relu_layer.input = forwards_input + relu_layer.output = forwards_expected_result + assert np.array_equal( + relu_layer._backwards(backwards_input), + backwards_expected_result + ) From 17c34fd0597f87bb0dba8cc19551188145881ce4 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Sat, 21 May 2022 18:45:17 +0100 Subject: [PATCH 20/24] feat: add softmax and sigmoid to activations/ -- untested --- cnn/layers/activations/__init__.py | 3 ++- cnn/layers/activations/sigmoid.py | 23 +++++++++++++++++++++++ cnn/layers/activations/softmax.py | 26 ++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 cnn/layers/activations/sigmoid.py create mode 100644 cnn/layers/activations/softmax.py diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py index 5c01e85..292e1e0 100644 --- a/cnn/layers/activations/__init__.py +++ b/cnn/layers/activations/__init__.py @@ -1,2 +1,3 @@ from .relu import ReLU - +from .softmax import Softmax +from .sigmoid import Sigmoid diff --git a/cnn/layers/activations/sigmoid.py b/cnn/layers/activations/sigmoid.py new file mode 100644 index 0000000..78d012f --- /dev/null +++ b/cnn/layers/activations/sigmoid.py @@ -0,0 +1,23 @@ +import numpy as np +from .base import BaseActivation + +class Sigmoid(BaseActivation): + + def _forwards(self,X:np.ndarray): + self.input = X.copy() + # The sigmoid function has a smooth gradient and outputs values between zero and one. For very high or low values of the input parameters, the network can be very slow to reach a prediction, called the vanishing gradient problem. + self.output = 1 / (1 + np.exp(-X)) + return self.output + + def _backwards(self,dCdA:np.ndarray): + # Init dAdZ as square array representing all connections between input and output nodes + dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0])) # TODO: Will need varifying for Conv Activation. + + # sig (1 - sig) across diagonals + ix,iy = np.diag_indices_from(dAdZ[0,:,:]) + dAdZ[:,iy,ix] = (self.output * (1 - self.output)).T # Element-wise multiplication. + + dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1)) + dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded) + + return dC_dZexpanded.reshape(dCdA.shape[1],-1).T diff --git a/cnn/layers/activations/softmax.py b/cnn/layers/activations/softmax.py new file mode 100644 index 0000000..02e68f9 --- /dev/null +++ b/cnn/layers/activations/softmax.py @@ -0,0 +1,26 @@ +import numpy as np +from .base import BaseActivation + +class Softmax(BaseActivation): + + def _forwards(self,X:np.ndarray): + self.input = X.copy() + assert self.prev_layer.LAYER_TYPE == 'FC', 'Softmax activation function is not supported for non-FC inputs.' + # Softmax is a special activation function used for output neurons. It normalizes outputs for each class between 0 and 1, and returns the probability that the input belongs to a specific class. + exp = np.exp(X - np.max(X,axis=0)) # Normalises by max value - provides "numerical stability" + self.output = exp / np.sum(exp,axis=0) + return self.output + + def _backwards(self,dCdA:np.ndarray): + # Vectorised implementation from https://stackoverflow.com/questions/59286911/vectorized-softmax-gradient + # NOTE: Transpose is required to create the square matrices of each set of node values. + outputT = self.output.T + diag_matrices = outputT.reshape(outputT.shape[0],-1,1) * np.diag(np.ones(outputT.shape[1])) # Diagonal Matrices + outer_product = np.matmul(outputT.reshape(outputT.shape[0],-1,1), outputT.reshape(outputT.shape[0],1,-1)) # Outer product + Jsm = diag_matrices - outer_product + dAdZ = Jsm # NOTE: Even though this equation uses softmax transpose at start, the output does not require transposing because the softmax derivative is symmetrical along diagonal. + + dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1)) + dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded) + + return dC_dZexpanded.reshape(dCdA.shape[1],-1).T From 34d1e15f9995e2e546e41993cb9f84b6ec82037d Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Sat, 21 May 2022 18:55:02 +0100 Subject: [PATCH 21/24] feat: add Tanh & LeakReLU to activations/ -- untested --- cnn/layers/activations/__init__.py | 3 ++- cnn/layers/activations/relu.py | 21 +++++++++++++++++++++ cnn/layers/activations/tanh.py | 18 ++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 cnn/layers/activations/tanh.py diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py index 292e1e0..db192a3 100644 --- a/cnn/layers/activations/__init__.py +++ b/cnn/layers/activations/__init__.py @@ -1,3 +1,4 @@ -from .relu import ReLU +from .relu import ReLU, LeakyReLU from .softmax import Softmax from .sigmoid import Sigmoid +from .tanh import Tanh diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py index b52e9e8..a1c0e89 100644 --- a/cnn/layers/activations/relu.py +++ b/cnn/layers/activations/relu.py @@ -20,3 +20,24 @@ def _backwards(self,dCdA:np.ndarray): dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded) return dC_dZexpanded.reshape(dCdA.shape[1],-1).T + +class LeakyReLU(BaseActivation): + + def _forwards(self,X:np.ndarray): + self.input = X.copy() + # The Leaky ReLu function has a small positive slope in its negative area, enabling it to process zero or negative values. + self.output = X + self.output[self.output <= 0] = self.alpha * self.output[self.output <= 0] + return self.output + + def _backwards(self,dCdA:np.ndarray): + # Init dAdZ as square array representing all connections between input and output nodes + dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0])) # TODO: Will need varifying for Conv Activation. + + ix,iy = np.diag_indices_from(dAdZ[0,:,:]) + dAdZ[:,iy,ix] = ( (self.input > 0).astype(int) + ((self.input < 0).astype(int) * self.alpha ) ).T + + dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1)) + dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded) + + return dC_dZexpanded.reshape(dCdA.shape[1],-1).T diff --git a/cnn/layers/activations/tanh.py b/cnn/layers/activations/tanh.py new file mode 100644 index 0000000..6faf71b --- /dev/null +++ b/cnn/layers/activations/tanh.py @@ -0,0 +1,18 @@ +import numpy as np +from .base import BaseActivation + +class Tanh(BaseActivation): + + def _forwards(self,X:np.ndarray): + self.input = X.copy() + # The TanH function is zero-centered making it easier to model inputs that are strongly negative strongly positive or neutral. + self.output = ( np.exp(X) - np.exp(-X) ) / ( np.exp(X) + np.exp(-X) ) + return self.output + + def _backwards(self,dCdA:np.ndarray): + dAdZ = np.diag((1 - np.square( self.output )).flatten()) + + dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1)) + dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded) + + return dC_dZexpanded.reshape(dCdA.shape[1],-1).T From 13ed2e02d9194e8f22da805841191a273f14228b Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Sat, 21 May 2022 19:02:08 +0100 Subject: [PATCH 22/24] feat: add wrapping forwards/ backwards methods for BaseActivation --- cnn/layers/activations/base.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py index f6045f1..3b9f4e4 100644 --- a/cnn/layers/activations/base.py +++ b/cnn/layers/activations/base.py @@ -17,3 +17,27 @@ def prepare_layer(self): self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE self.OUTPUT_SHAPE = self.INPUT_SHAPE + def forwards(self, X): + if self.prev_layer.LAYER_TYPE == 'FC': + assert len(X.shape) == 2 and X.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(X.shape[0],1)}' + self.input = X + + self._forwards(X) + + assert self.output.shape == X.shape, f'Output shape, {self.output.shape}, not the same as input shape, {X.shape}.' + self._track_metrics(output=self.output) + + return self.output + + def backwards(self, dCdA): + assert dCdA.shape == self.output.shape, f'dC/dA shape, {dCdA.shape}, not as expected, {self.output.shape}.' + self._track_metrics(cost_gradient=dCdA) + + dCdZ = self._backwards(dCdA) + + assert dCdZ.shape == self.prev_layer.output.shape, f'Back propagating dC_dZ has shape: {dCdZ.shape} when previous layer output has shape {self.prev_layer.output.shape}' + + return dCdZ + + + From 40b94dfe20d61a7433f33370b90b56864997a62b Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Sat, 21 May 2022 19:06:40 +0100 Subject: [PATCH 23/24] fix: cleanup unnecessary init args in BaseActivation --- cnn/layers/activations/base.py | 5 +---- cnn/layers/activations/relu.py | 3 +++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py index 3b9f4e4..92f9452 100644 --- a/cnn/layers/activations/base.py +++ b/cnn/layers/activations/base.py @@ -1,15 +1,12 @@ from ..layer import Layer class BaseActivation(Layer): - def __init__(self,function: str=None,alpha=0.01,input_shape=None): + def __init__(self,input_shape=None): super().__init__() self.trainable = False - self.alpha = alpha self.INPUT_SHAPE = input_shape - self.FUNCTION = None if function is None else function.lower() - def prepare_layer(self): if self.prev_layer is None: # This means this is the first layer in the structure, so 'input' is the only thing before. assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py index a1c0e89..43da5d2 100644 --- a/cnn/layers/activations/relu.py +++ b/cnn/layers/activations/relu.py @@ -22,6 +22,9 @@ def _backwards(self,dCdA:np.ndarray): return dC_dZexpanded.reshape(dCdA.shape[1],-1).T class LeakyReLU(BaseActivation): + def __init__(self, alpha=0.01, input_shape=None): + super().__init__(input_shape=input_shape) + self.alpha = alpha def _forwards(self,X:np.ndarray): self.input = X.copy() From 5e15790121813df2acec317c90eb263b8042a1b3 Mon Sep 17 00:00:00 2001 From: JamesQuirk Date: Sat, 21 May 2022 20:41:17 +0100 Subject: [PATCH 24/24] feat: cleanup model.py; remove bad assertion; remove activation.py --- cnn/layers/__init__.py | 7 +- cnn/layers/activation.py | 120 ----------------------------- cnn/layers/activations/__init__.py | 14 ++++ cnn/layers/activations/base.py | 2 + cnn/layers/activations/relu.py | 3 + cnn/layers/activations/sigmoid.py | 1 + cnn/layers/activations/softmax.py | 1 + cnn/layers/activations/tanh.py | 1 + cnn/layers/layer.py | 10 +-- cnn/model.py | 47 ++++------- cnn/optimisers/__init__.py | 11 +-- 11 files changed, 45 insertions(+), 172 deletions(-) delete mode 100644 cnn/layers/activation.py diff --git a/cnn/layers/__init__.py b/cnn/layers/__init__.py index c3029be..d918d16 100644 --- a/cnn/layers/__init__.py +++ b/cnn/layers/__init__.py @@ -1,11 +1,6 @@ -from .activation import Activation +from . import activations from .conv import Conv2D from .fc import FC from .flatten import Flatten from .pool import Pool - -# Expose list of all optimiser class names. -import inspect -import sys -layers = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] \ No newline at end of file diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py deleted file mode 100644 index 8495643..0000000 --- a/cnn/layers/activation.py +++ /dev/null @@ -1,120 +0,0 @@ -import numpy as np -from .layer import Layer - -class Activation(Layer): - def __init__(self,function: str=None,alpha=0.01,input_shape=None): - super().__init__() - - self.trainable = False - self.alpha = alpha - self.INPUT_SHAPE = input_shape - - self.FUNCTION = None if function is None else function.lower() - - def prepare_layer(self): - if self.prev_layer is None: # This means this is the first layer in the structure, so 'input' is the only thing before. - assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.' - else: - self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE - self.OUTPUT_SHAPE = self.INPUT_SHAPE - - def _forwards(self,_input: np.ndarray) -> np.ndarray: - if self.prev_layer.LAYER_TYPE == 'FC': - assert len(_input.shape) == 2 and _input.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(_input.shape[0],1)}' - self.input = _input - - if self.FUNCTION is None: - self.output = _input - elif self.FUNCTION == 'relu': # NOTE: This would work for Conv activation. - # The ReLu function is highly computationally efficient but is not able to process inputs that approach zero or negative. - self.output = np.maximum(_input,0) - elif self.FUNCTION == 'softmax': - assert self.prev_layer.LAYER_TYPE == 'FC', 'Softmax activation function is not supported for non-FC inputs.' - # Softmax is a special activation function used for output neurons. It normalizes outputs for each class between 0 and 1, and returns the probability that the input belongs to a specific class. - exp = np.exp(_input - np.max(_input,axis=0)) # Normalises by max value - provides "numerical stability" - self.output = exp / np.sum(exp,axis=0) - # print(_input) - # print(self.output) - # assert round(self.output.sum()) == 1, f'Output array sum {self.output.sum()} is not equal to 1.\nInput Array: {self.input.reshape((1,-1))}\nOuput Array: {self.output.reshape((1,-1))}' - elif self.FUNCTION == 'sigmoid': # NOTE: This would work for Conv activation. - # The sigmoid function has a smooth gradient and outputs values between zero and one. For very high or low values of the input parameters, the network can be very slow to reach a prediction, called the vanishing gradient problem. - self.output = 1 / (1 + np.exp(-_input)) - elif self.FUNCTION == 'step': # TODO: Define "step function" activation - pass - elif self.FUNCTION == 'tanh': - # The TanH function is zero-centered making it easier to model inputs that are strongly negative strongly positive or neutral. - self.output = ( np.exp(_input) - np.exp(-_input) ) / ( np.exp(_input) + np.exp(-_input) ) - elif self.FUNCTION == 'swish': # TODO: Define "Swish function" activation - # Swish is a new activation function discovered by Google researchers. It performs better than ReLu with a similar level of computational efficiency. - pass - elif self.FUNCTION == 'leaky relu': - # The Leaky ReLu function has a small positive slope in its negative area, enabling it to process zero or negative values. - _input[_input <= 0] = self.alpha * _input[_input <= 0] - self.output = _input - elif self.FUNCTION == 'parametric relu': # TODO: Define "Parametric ReLu" - # The Parametric ReLu function allows the negative slope to be learned, performing backpropagation to learn the most effective slope for zero and negative input values. - pass - - assert self.output.shape == _input.shape, f'Output shape, {self.output.shape}, not the same as input shape, {_input.shape}.' - self._track_metrics(output=self.output) - # print(f'Layer: {self.MODEL_STRUCTURE_INDEX} output:',self.output) - return self.output - - def _backwards(self,dC_dA: np.ndarray) -> np.ndarray: - """Compute derivative of Activation w.r.t. Z - NOTE: CURRENTLY NOT SUPPORTED FOR CONV/POOL LAYERS. - """ - assert dC_dA.shape == self.output.shape, f'dC/dA shape, {dC_dA.shape}, not as expected, {self.output.shape}.' - self._track_metrics(cost_gradient=dC_dA) - dA_dZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0])) # TODO: Will need varifying for Conv Activation. - if self.FUNCTION is None: # a = z - dA_dZ = np.broadcast_to(np.diag(np.ones(dA_dZ.shape[-1])),dA_dZ.shape ) - elif self.FUNCTION == 'relu': - # Insert layer input along dA_dZ diagonals - values > 0 -> 1; values <= 0 -> 0 - ix,iy = np.diag_indices_from(dA_dZ[0,:,:]) - dA_dZ[:,iy,ix] = (self.input.T > 0).astype(int) - elif self.FUNCTION == 'softmax': - # Vectorised implementation from https://stackoverflow.com/questions/59286911/vectorized-softmax-gradient - # NOTE: Transpose is required to create the square matrices of each set of node values. - outputT = self.output.T - diag_matrices = outputT.reshape(outputT.shape[0],-1,1) * np.diag(np.ones(outputT.shape[1])) # Diagonal Matrices - outer_product = np.matmul(outputT.reshape(outputT.shape[0],-1,1), outputT.reshape(outputT.shape[0],1,-1)) # Outer product - Jsm = diag_matrices - outer_product - dA_dZ = Jsm # NOTE: Even though this equation uses softmax transpose at start, the output does not require transposing because the softmax derivative is symmetrical along diagonal. - - elif self.FUNCTION == 'sigmoid': - # sig (1 - sig) across diagonals - ix,iy = np.diag_indices_from(dA_dZ[0,:,:]) - dA_dZ[:,iy,ix] = (self.output * (1 - self.output)).T # Element-wise multiplication. - elif self.FUNCTION == 'step': # TODO: Define "step function" derivative - dA_dZ = None - elif self.FUNCTION == 'tanh': - dA_dZ = np.diag((1 - np.square( self.output )).flatten()) - elif self.FUNCTION == 'swish': # TODO: Define "Swish function" derivative - dA_dZ = None - elif self.FUNCTION == 'leaky relu': - ix,iy = np.diag_indices_from(dA_dZ[0,:,:]) - dA_dZ[:,iy,ix] = ( (self.input > 0).astype(int) + ((self.input < 0).astype(int) * self.alpha ) ).T - - # input_diag = np.diag(self.input.flatten()) - # input_diag[input_diag > 0] = 1 - # input_diag[input_diag < 0] = self.alpha - # dA_dZ = input_diag - elif self.FUNCTION == 'parametric relu': # TODO: Define "Parametric ReLu" derivative - dA_dZ = None - - assert dA_dZ is not None, f'No derivative defined for chosen activation function "{self.FUNCTION}"' - assert dA_dZ.shape[1:] == (self.output.shape[0],self.output.shape[0]), 'dA/dZ is expected to be a square matrix (for each example in batch) containing gradient between each activation node and each input node.' - # print('Layer: ', self.LAYER_TYPE) - # print('Local gradient shape:',dA_dZ.shape) - # print('Cost gradient shape:',dC_dA.shape) - - dC_dAexpanded = dC_dA.T.reshape((dC_dA.T.shape[0],-1,1)) - dC_dZexpanded = np.matmul(dA_dZ,dC_dAexpanded) - dC_dZ = dC_dZexpanded.reshape(dC_dA.shape[1],-1).T - - assert dC_dZ.shape == self.prev_layer.output.shape, f'Back propagating dC_dZ has shape: {dC_dZ.shape} when previous layer output has shape {self.prev_layer.output.shape}' - if self.FUNCTION is None: - assert np.array_equal(dC_dZ,dC_dA), 'For activation: None; dC/dZ is expected to be the same as dC/dA.' - - return dC_dZ diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py index db192a3..efd531a 100644 --- a/cnn/layers/activations/__init__.py +++ b/cnn/layers/activations/__init__.py @@ -2,3 +2,17 @@ from .softmax import Softmax from .sigmoid import Sigmoid from .tanh import Tanh + +# ------------- BELOW IS DYNAMIC TO AVAILABLE ACTIVATION CLASSES ---------------- + +# Expose list of all activation class names. +import inspect +import sys +available_activations = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] + +__activation_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] + +def from_name(name): + for activation in __activation_classes: + if activation.ALIAS == name or activation.__name__ == name: + return activation() diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py index 92f9452..62357be 100644 --- a/cnn/layers/activations/base.py +++ b/cnn/layers/activations/base.py @@ -1,6 +1,8 @@ from ..layer import Layer class BaseActivation(Layer): + ALIAS = "base" + def __init__(self,input_shape=None): super().__init__() diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py index 43da5d2..51eeb79 100644 --- a/cnn/layers/activations/relu.py +++ b/cnn/layers/activations/relu.py @@ -2,6 +2,7 @@ from .base import BaseActivation class ReLU(BaseActivation): + ALIAS = "relu" def _forwards(self,X:np.ndarray): self.input = X.copy() @@ -22,6 +23,8 @@ def _backwards(self,dCdA:np.ndarray): return dC_dZexpanded.reshape(dCdA.shape[1],-1).T class LeakyReLU(BaseActivation): + ALIAS = "leaky_relu" + def __init__(self, alpha=0.01, input_shape=None): super().__init__(input_shape=input_shape) self.alpha = alpha diff --git a/cnn/layers/activations/sigmoid.py b/cnn/layers/activations/sigmoid.py index 78d012f..e2f9d57 100644 --- a/cnn/layers/activations/sigmoid.py +++ b/cnn/layers/activations/sigmoid.py @@ -2,6 +2,7 @@ from .base import BaseActivation class Sigmoid(BaseActivation): + ALIAS = "sigmoid" def _forwards(self,X:np.ndarray): self.input = X.copy() diff --git a/cnn/layers/activations/softmax.py b/cnn/layers/activations/softmax.py index 02e68f9..6a0adb6 100644 --- a/cnn/layers/activations/softmax.py +++ b/cnn/layers/activations/softmax.py @@ -2,6 +2,7 @@ from .base import BaseActivation class Softmax(BaseActivation): + ALIAS = "softmax" def _forwards(self,X:np.ndarray): self.input = X.copy() diff --git a/cnn/layers/activations/tanh.py b/cnn/layers/activations/tanh.py index 6faf71b..d274903 100644 --- a/cnn/layers/activations/tanh.py +++ b/cnn/layers/activations/tanh.py @@ -2,6 +2,7 @@ from .base import BaseActivation class Tanh(BaseActivation): + ALIAS = "tanh" def _forwards(self,X:np.ndarray): self.input = X.copy() diff --git a/cnn/layers/layer.py b/cnn/layers/layer.py index 47692a0..4a9541e 100644 --- a/cnn/layers/layer.py +++ b/cnn/layers/layer.py @@ -52,17 +52,17 @@ def define_details(self): 'LAYER_INDEX':self.MODEL_STRUCTURE_INDEX, 'LAYER_TYPE':self.LAYER_TYPE } - if self.LAYER_TYPE is 'CONV': + if self.LAYER_TYPE is 'Conv2D': details.update({ 'NUM_FILTERS':self.NUM_FILTERS, 'STRIDE':self.STRIDE }) - elif self.LAYER_TYPE is 'POOL': + elif self.LAYER_TYPE is 'Pool': details.update({ 'STRIDE':self.STRIDE, 'POOL_TYPE':self.POOL_TYPE }) - elif self.LAYER_TYPE is 'FLATTEN': + elif self.LAYER_TYPE is 'Flatten': details.update({ }) elif self.LAYER_TYPE is 'FC': @@ -70,10 +70,6 @@ def define_details(self): 'NUM_NODES':self.NUM_NODES, 'ACTIVATION':self.ACTIVATION }) - elif self.LAYER_TYPE is 'ACTIVATION': - details.update({ - 'FUNCTION':self.FUNCTION - }) return details diff --git a/cnn/model.py b/cnn/model.py index d50efc2..d8ad27a 100644 --- a/cnn/model.py +++ b/cnn/model.py @@ -20,41 +20,35 @@ class Model(): This is the top level class. """ - def __init__(self,optimiser_method='gd'): + def __init__(self): ''' - optimiser_method (str): Options: ('gd','momentum','rmsprop','adam'). Default is 'gd'. ''' - assert optimiser_method.lower() in Model.SUPPORTED_OPTIMISERS, f'You must provide an optimiser that is supported. The options are: {Model.SUPPORTED_OPTIMISERS}' self.is_prepared = False - self.OPTIMISER_METHOD = optimiser_method.lower() - self.structure = [] # defines order of model (list of layer objects) - EXCLUDES INPUT DATA - self.layer_counts = dict(zip(['total'] + layers.layers,[0]*(len(layers.layers)+1))) # dict for counting number of each layer type def add_layer(self,layer: Layer) -> None: - if layer.LAYER_TYPE == 'Activation' and self.structure[-1].LAYER_TYPE == 'Activation': + if layer.LAYER_TYPE in layers.activations.available_activations and self.structure[-1].LAYER_TYPE in layers.activations.available_activations: print('-- WARNING:: Two Activation Layers in subsequent positions in the model.') - if layer.FUNCTION == self.structure[-1].FUNCTION: + if layer.LAYER_TYPE == self.structure[-1].LAYER_TYPE: print('--- INFO:: Both Activation Layers are the same, skipping creation of second layer.') return layer.model = self if len(self.structure) > 0: - if layer.LAYER_TYPE == 'FC' and self.structure[-1].LAYER_TYPE not in ('Flatten','FC','Activation'): + if layer.LAYER_TYPE == 'FC' and self.structure[-1].LAYER_TYPE not in ('Flatten','FC',*layers.activations.available_activations): # If no Flatten layer added before adding first FC layer, one will be added automatically. self.add_layer(layers.Flatten()) self.structure.append(layer) - self.layer_counts[layer.LAYER_TYPE] += 1 - self.layer_counts['total'] += 1 if layer.LAYER_TYPE == 'FC': # Create the Activation Layer (transparent to user). self.add_layer( - layers.Activation(function=layer.ACTIVATION) + layers.activations.from_name(layer.ACTIVATION) ) def remove_layer(self,index: int) -> None: @@ -62,31 +56,24 @@ def remove_layer(self,index: int) -> None: if self.is_prepared: print('-- INFO:: Re-compiling model...') self.prepare_model() - - def get_model_details(self): - details = [] - for layer in self.structure: - details.append(layer.define_details()) - - return details - def prepare_model(self,optimiser: Any='gd',learning_rate=None): + def prepare_model(self,optimiser: Any='gd'): """ Called once final layer is added, each layer can now initiate its weights and biases. """ print('Preparing model...') if type(optimiser) == str: - assert optimiser.lower() in optimisers.optimiser_names, f'Unrecognised optimiser name: {optimiser}; choose from: {optimisers.optimiser_names}' - self.OPTIMISER = optimisers.from_name(optimiser,learning_rate) + assert optimiser.lower() in optimisers.optimiser_identifiers, f'Unrecognised optimiser name: {optimiser}; choose from: {optimisers.optimiser_identifiers}' + self.OPTIMISER = optimisers.from_name(optimiser) else: - assert (isinstance(optimiser,optimisers.BaseOptimiser) and optimiser.__class__.__name__ in optimisers.optimiser_names), f'Invalid optimiser: {optimiser}' + assert (isinstance(optimiser,optimisers.BaseOptimiser) and optimiser.__class__.__name__ in optimisers.optimiser_identifiers), f'Invalid optimiser: {optimiser}' self.OPTIMISER = optimiser self.details = { 'param_counts': [], 'output_shapes': [] } - if self.layer_counts['total'] > 1: - for index in range(self.layer_counts['total']): + if len(self.structure) > 1: + for index, curr_layer in enumerate(self.structure): curr_layer = self.structure[index] if index != len(self.structure) - 1: next_layer = self.structure[index + 1] @@ -100,7 +87,7 @@ def prepare_model(self,optimiser: Any='gd',learning_rate=None): curr_layer.MODEL_STRUCTURE_INDEX = index curr_layer.prepare_layer() - if curr_layer.MODEL_STRUCTURE_INDEX == 0: + if index == 0: # First layer; set model input shape. self.INPUT_SHAPE = curr_layer.INPUT_SHAPE @@ -128,7 +115,6 @@ def train(self,Xs: np.ndarray,ys: np.ndarray,epochs: int,max_batch_size: int=32, ys = ys.reshape(-1,1) if ys.ndim == 1 else ys # --------- ASSERTIONS ----------- # Check shapes and orientation are as expected - assert self.structure[-1].LAYER_TYPE in ('FC','Activation'), 'Model must have either FC or Activation as final layer.' assert Xs.shape[0] == ys.shape[0], f'Dimension (0) of input data [{Xs.shape}] and labels [{ys.shape}] does not match.' assert Xs.ndim in (2,4), 'Xs must be either 2 dimensions (for NN) or 4 dimensions (for Model).' if Xs.ndim == 4: @@ -195,8 +181,6 @@ def _print_train_progress(self,batch_index: int) -> None: else: print(print_string,end='\r') - SUPPORTED_OPTIMISERS = ('gd','momentum','rmsprop','adam') - def _iterate_forwards(self) -> None: for batch_ind in range(self.BATCH_COUNT): ind_lower = batch_ind * self.MAX_BATCH_SIZE # Lower bound of index range @@ -296,14 +280,13 @@ def print_summary(self): # Add layer info... total_trainable = 0 total_non_trainable = 0 - for layer in self.structure: - index = str(layer.MODEL_STRUCTURE_INDEX) - type_ = layer.LAYER_TYPE + ' (' + layer.FUNCTION + ')' if layer.LAYER_TYPE == "Activation" else layer.LAYER_TYPE + for index, layer in enumerate(self.structure): + type_ = layer.LAYER_TYPE out_shape = layer.OUTPUT_SHAPE trainable_params, non_trainable_params = layer.count_params(split_trainable=True) total_trainable += trainable_params total_non_trainable += non_trainable_params - info_str = ' ' + index + ' '*(field_lengths[0] - len(index)-1) + \ + info_str = ' ' + str(index) + ' '*(field_lengths[0] - len(str(index))-1) + \ ' ' + type_ + ' '*(field_lengths[1] - len(type_) -1) + \ ' ' + str(out_shape) + ' '*(field_lengths[2] - len(str(out_shape))-1) + \ ' ' + str(trainable_params) + ' '*(field_lengths[3] - len(str(trainable_params))-1) + \ diff --git a/cnn/optimisers/__init__.py b/cnn/optimisers/__init__.py index ef04035..33ee71a 100644 --- a/cnn/optimisers/__init__.py +++ b/cnn/optimisers/__init__.py @@ -10,17 +10,14 @@ # Expose list of all optimiser class names. import inspect import sys -__optimiser_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], lambda cls: isinstance(cls,BaseOptimiser))] +__optimiser_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], lambda cls: inspect.isclass(cls) and issubclass(cls,BaseOptimiser))] # Following includes both class name and alias property. -optimiser_names = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] + [opt.ALIAS for opt in __optimiser_classes] +optimiser_identifiers = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] + [opt.ALIAS for opt in __optimiser_classes] -def from_name(name,learning_rate): +def from_name(name): for optimiser in __optimiser_classes: if optimiser.ALIAS == name or optimiser.__name__ == name: - if learning_rate is None: - return optimiser() - else: - return optimiser(learning_rate=learning_rate) + return optimiser()