From ae2a60bfd8ad700ee137c4f6f84fb9535c435bf6 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Mon, 27 Dec 2021 20:33:32 +0000
Subject: [PATCH 01/24] Re-organise utils

---
 cnn/model.py            |  6 +++-
 cnn/utils.py            | 77 -----------------------------------------
 cnn/utils/__init__.py   |  0
 cnn/utils/array.py      | 48 +++++++++++++++++++++++++
 cnn/utils/processing.py | 35 +++++++++++++++++++
 5 files changed, 88 insertions(+), 78 deletions(-)
 delete mode 100644 cnn/utils.py
 create mode 100644 cnn/utils/__init__.py
 create mode 100644 cnn/utils/array.py
 create mode 100644 cnn/utils/processing.py

diff --git a/cnn/model.py b/cnn/model.py
index 5d37938..87fb73f 100644
--- a/cnn/model.py
+++ b/cnn/model.py
@@ -5,8 +5,12 @@
 from . import layers
 from . import optimisers
 
+def load_model(name):
+	assert name.split('.')[-1] == 'pkl'
+	with open(name, 'rb') as file:  
+		model = pickle.load(file)
+	return model
 
-# CLASS
 class Model():
 	"""
 	This is the top level class.
diff --git a/cnn/utils.py b/cnn/utils.py
deleted file mode 100644
index 9575136..0000000
--- a/cnn/utils.py
+++ /dev/null
@@ -1,77 +0,0 @@
-import numpy as np
-import pickle
-
-def one_hot_encode(array,num_cats,axis=None):
-	'''
-	Perform one-hot encoding on the category labels.
-
-	- array: is a 2D np.ndarray
-	- num_cats: number of categories that the model is to be trained on.
-	- axis: the axis of array that holds the category label value. If axis=None, then this is inferred as the axis with the smallest size.
-	'''
-	assert type(array) in (np.ndarray,list)
-	array = np.array(array)
-	assert array.ndim == 2
-	if axis is None:
-		axis = np.argmin(array.shape)
-	else:
-		assert axis in (0,1)
-	assert array.shape[axis] == 1
-
-	N = array.shape[1 - axis]
-	array = array.reshape((1,N))
-	
-	return np.eye(num_cats)[array][0]	# Returns in the shape (N,num_cats)
-
-def shuffle(X,y,random_seed=None):
-	if random_seed is not None:
-		np.random.seed(random_seed)
-	permutation = np.random.permutation( X.shape[0] )
-	X_shuffled = X[permutation]
-	y_shuffled = y[permutation]
-	print(X_shuffled.shape,y_shuffled.shape)
-	assert X.shape == X_shuffled.shape, f'X shape: {X.shape} | X shuffled shape: {X_shuffled.shape}'
-	return (X_shuffled, y_shuffled)
-
-
-def array_init(shape,method=None,seed=None):
-	''' Random initialisation of weights array.
-	Xavier or Kaiming: (https://towardsdatascience.com/weight-initialization-in-neural-networks-a-journey-from-the-basics-to-kaiming-954fb9b47c79) '''
-	assert len(shape) >= 2
-	fan_in = shape[-1]
-	fan_out = shape[-2]
-
-	if seed:
-		np.random.seed(seed)
-
-	if method is None:
-		array = np.random.randn(*shape) * 0.01
-	elif method == 'kaiming_normal':
-		# AKA "he normal" after Kaiming He.
-		array = np.random.normal(size=shape) * np.sqrt(2./fan_in)
-	elif method == 'kaiming_uniform':
-		array = np.random.uniform(size=shape) * np.sqrt(6./fan_in)
-	elif method == 'xavier_uniform':
-		array = np.random.uniform(size=shape) * np.sqrt(6./(fan_in+fan_out))
-	elif method == 'xavier_normal':
-		# https://arxiv.org/pdf/2004.09506.pdf
-		target_std = np.sqrt(2./np.sum(shape))
-		array = np.random.normal(size=shape,scale=target_std)
-	elif method == 'abs_norm':
-		# Custom alternative
-		arr = np.random.normal(size=shape)
-		array = arr / np.abs(arr).max()
-	elif method == 'uniform':
-		array = np.random.uniform(size=shape) * (1./np.sqrt(fan_in))
-	else:
-		raise BaseException('ERROR: Unrecognised array initialisation method: ' + method)
-
-	# print(f'--> Array init method: {method}, max: {array.max()}, min: {array.min()}, std: {array.std()}' )
-	# print('Array:',array)
-	return array
-
-def load_model(name):
-	assert name.split('.')[-1] == 'pkl'
-	with open(name, 'rb') as file:  
-		model = pickle.load(file)
-	return model
\ No newline at end of file
diff --git a/cnn/utils/__init__.py b/cnn/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cnn/utils/array.py b/cnn/utils/array.py
new file mode 100644
index 0000000..c5928e0
--- /dev/null
+++ b/cnn/utils/array.py
@@ -0,0 +1,48 @@
+import numpy as np
+
+
+def array_init(shape: tuple,method=None,seed=None) -> np.ndarray:
+	''' Random initialisation of weights array.
+	Xavier or Kaiming: (https://towardsdatascience.com/weight-initialization-in-neural-networks-a-journey-from-the-basics-to-kaiming-954fb9b47c79) '''
+	assert len(shape) >= 2
+	fan_in = shape[-1]
+	fan_out = shape[-2]
+
+	if seed:
+		np.random.seed(seed)
+
+	if method is None:
+		array = np.random.randn(*shape) * 0.01
+	elif method == 'kaiming_normal':
+		# AKA "he normal" after Kaiming He.
+		array = np.random.normal(size=shape) * np.sqrt(2./fan_in)
+	elif method == 'kaiming_uniform':
+		array = np.random.uniform(size=shape) * np.sqrt(6./fan_in)
+	elif method == 'xavier_uniform':
+		array = np.random.uniform(size=shape) * np.sqrt(6./(fan_in+fan_out))
+	elif method == 'xavier_normal':
+		# https://arxiv.org/pdf/2004.09506.pdf
+		target_std = np.sqrt(2./np.sum(shape))
+		array = np.random.normal(size=shape,scale=target_std)
+	elif method == 'abs_norm':
+		# Custom alternative
+		arr = np.random.normal(size=shape)
+		array = arr / np.abs(arr).max()
+	elif method == 'uniform':
+		array = np.random.uniform(size=shape) * (1./np.sqrt(fan_in))
+	else:
+		raise NameError('ERROR: Unrecognised array initialisation method: ' + method)
+
+	return array
+
+def dilate(array: np.ndarray,channel_width: int) -> np.ndarray:
+	""" Inserts 'channel_width' number of 0s between each item in 'array'. """
+	_,_, rows, cols = array.shape
+	dilation_idx_row = np.arange(rows-1) + 1	# Intiatial indices for insertion of zeros
+	dilation_idx_col = np.arange(cols-1) + 1	# Intiatial indices for insertion of zeros
+	dilated_array = array.copy()
+	for n in range(1,channel_width):	# the n multiplier is to increment the indices in the non-uniform manner required.
+		dilated_array = np.insert(
+			np.insert( dilated_array, dilation_idx_row * n, 0, axis=2 ),
+			dilation_idx_col * n, 0, axis=3)
+	return dilated_array
diff --git a/cnn/utils/processing.py b/cnn/utils/processing.py
new file mode 100644
index 0000000..ee52d09
--- /dev/null
+++ b/cnn/utils/processing.py
@@ -0,0 +1,35 @@
+import numpy as np
+from typing import Tuple
+
+def one_hot_encode(array: np.ndarray,num_cats: int,axis: bool=None) -> np.ndarray:
+	'''
+	Perform one-hot encoding on the category labels.
+
+	- array: is a 2D np.ndarray
+	- num_cats: number of categories that the model is to be trained on.
+	- axis: the axis of array that holds the category label value. If axis=None, then this is inferred as the axis with the smallest size.
+	'''
+	assert type(array) in (np.ndarray,list)
+	array = np.array(array)
+	assert array.ndim == 2
+	if axis is None:
+		axis = np.argmin(array.shape)
+	else:
+		assert axis in (0,1)
+	assert array.shape[axis] == 1
+
+	N = array.shape[1 - axis]
+	array = array.reshape((1,N))
+	
+	return np.eye(num_cats)[array][0]	# Returns in the shape (N,num_cats)
+
+def shuffle(X: np.ndarray,y: np.ndarray,random_seed: bool=None) -> Tuple[np.ndarray]:
+	if random_seed is not None:
+		np.random.seed(random_seed)
+	permutation = np.random.permutation( X.shape[0] )
+	X_shuffled = X[permutation]
+	y_shuffled = y[permutation]
+	print(X_shuffled.shape,y_shuffled.shape)
+	assert X.shape == X_shuffled.shape, f'X shape: {X.shape} | X shuffled shape: {X_shuffled.shape}'
+	return (X_shuffled, y_shuffled)
+

From eec892c928937a08e2baeb41313018683defcc55 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Mon, 27 Dec 2021 20:33:53 +0000
Subject: [PATCH 02/24] Update API usage for examples

---
 diagram_usecase.py  | 12 ++++++------
 mnist_dataloader.py |  6 +++---
 model_analysis.py   |  6 +++---
 nn_iris.py          |  8 ++++----
 4 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/diagram_usecase.py b/diagram_usecase.py
index 785afd5..bce5f06 100644
--- a/diagram_usecase.py
+++ b/diagram_usecase.py
@@ -1,4 +1,4 @@
-from src.cnn import CNN
+import cnn
 import numpy as np
 np.set_printoptions(linewidth=200)
 
@@ -7,18 +7,18 @@
 y = np.array([[0],[1]])
 
 
-model = CNN(input_shape=(3,12,12))
+model = cnn.Model(input_shape=(3,12,12))
 model.add_layer(
-	CNN.Conv_Layer(filt_shape=(3,3),num_filters=2,stride=1,padding=1)
+	cnn.layers.Conv2D(filt_shape=(3,3),num_filters=2,stride=1,padding=1)
 )
 model.add_layer(
-	CNN.Pool_Layer(filt_shape=(3,3),stride=3,pool_type='mean')
+	cnn.layers.Pool(filt_shape=(3,3),stride=3,pool_type='mean')
 )
 model.add_layer(
-	CNN.FC_Layer(num_nodes=9,activation='relu')
+	cnn.layers.FC(num_nodes=9,activation='relu')
 )
 model.add_layer(
-	CNN.FC_Layer(num_nodes=2,activation='sigmoid')
+	cnn.layers.FC(num_nodes=2,activation='sigmoid')
 )
 
 model.train([X],[y],epochs=1)
diff --git a/mnist_dataloader.py b/mnist_dataloader.py
index 8fdf97f..ff6ca09 100644
--- a/mnist_dataloader.py
+++ b/mnist_dataloader.py
@@ -1,6 +1,6 @@
 import mnist
 import numpy as np
-from src.cnn import CNN
+from cnn.utils.processing import one_hot_encode
 np.set_printoptions(linewidth=200)
 
 
@@ -23,8 +23,8 @@ def get_data(normalise=True,one_hot=True):
 	test_labels = test_labels.reshape((1,len(test_labels)))
 
 	# labels need to be 'one-hot encoded'
-	train_labels = CNN.one_hot_encode(train_labels,10) if one_hot else train_labels
-	test_labels = CNN.one_hot_encode(test_labels,10) if one_hot else test_labels
+	train_labels = one_hot_encode(train_labels,10) if one_hot else train_labels
+	test_labels = one_hot_encode(test_labels,10) if one_hot else test_labels
 
 	print('Train images shape:', train_images.shape, 'Train labels shape:', train_labels.shape)
 	print('Test images shape:', test_images.shape, 'Test labels shape:', test_labels.shape)
diff --git a/model_analysis.py b/model_analysis.py
index e359906..40219ee 100644
--- a/model_analysis.py
+++ b/model_analysis.py
@@ -1,7 +1,7 @@
-from src.cnn import CNN
-from src.cnn_analyser import CNN_Analyser
+from cnn.model import load_model
+from model_analysis import CNN_Analyser
 
-model = CNN.load_model('models/cnn_model_adam_tf_comparitor_vectorised_14-33-36.pkl')
+model = load_model('models/cnn_model_adam_tf_comparitor_vectorised_14-33-36.pkl')
 CA = CNN_Analyser(model)
 
 # print(model.get_model_details())
diff --git a/nn_iris.py b/nn_iris.py
index 4dcc1d5..6222b05 100644
--- a/nn_iris.py
+++ b/nn_iris.py
@@ -1,4 +1,4 @@
-from src import cnn
+import cnn
 import numpy as np
 from sklearn.datasets import load_iris
 import matplotlib.pyplot as plt
@@ -12,13 +12,13 @@
 print(X.shape,Y_onehot.shape)
 # print(X,Y_onehot)
 
-model = cnn.CNN(optimiser_method='adam')
+model = cnn.Model(optimiser_method='adam')
 
 model.add_layer(
-	cnn.CNN.FC_Layer(3,input_shape=(4,1),activation='relu',initiation_method='kaiming_normal')
+	cnn.layers.FC(3,input_shape=(4,1),activation='relu',initiation_method='kaiming_normal')
 )
 model.add_layer(
-	cnn.CNN.FC_Layer(3,activation='softmax',initiation_method='kaiming_normal')
+	cnn.layers.FC(3,activation='softmax',initiation_method='kaiming_normal')
 )
 
 model.prepare_model()

From 47339734796574aa765e656910bd2d130db517a5 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Mon, 27 Dec 2021 20:51:55 +0000
Subject: [PATCH 03/24] Add function type hints

---
 cnn/layers/activation.py   |  4 +--
 cnn/layers/conv.py         | 72 +++++++++++++++++++-------------------
 cnn/layers/fc.py           |  6 ++--
 cnn/layers/flatten.py      |  4 +--
 cnn/layers/pool.py         |  6 ++--
 cnn/model.py               | 28 ++++++++-------
 cnn/optimisers/__init__.py |  4 +++
 cnn/optimisers/adam.py     |  5 +--
 cnn/optimisers/gd.py       |  7 ++--
 cnn/optimisers/rmsprop.py  |  5 +--
 10 files changed, 76 insertions(+), 65 deletions(-)

diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py
index 3ab0fc8..9ac1af7 100644
--- a/cnn/layers/activation.py
+++ b/cnn/layers/activation.py
@@ -22,7 +22,7 @@ def prepare_layer(self):
 		self.OUTPUT_SHAPE = self.INPUT_SHAPE
 		# self.output = np.zeros(shape=self.INPUT_SHAPE )
 
-	def _forwards(self,_input):
+	def _forwards(self,_input: np.ndarray) -> np.ndarray:
 		if self.prev_layer.LAYER_TYPE == 'FC':
 			assert len(_input.shape) == 2 and _input.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(_input.shape[0],1)}'
 		self.input = _input
@@ -64,7 +64,7 @@ def _forwards(self,_input):
 		# print(f'Layer: {self.MODEL_STRUCTURE_INDEX} output:',self.output)
 		return self.output
 
-	def _backwards(self,dC_dA):
+	def _backwards(self,dC_dA: np.ndarray) -> np.ndarray:
 		"""Compute derivative of Activation w.r.t. Z
 		NOTE: CURRENTLY NOT SUPPORTED FOR CONV/POOL LAYERS.
 		"""
diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py
index 413075e..41d980a 100644
--- a/cnn/layers/conv.py
+++ b/cnn/layers/conv.py
@@ -38,7 +38,7 @@ def __init__(self,filt_shape: tuple or int,num_filters: int=5,stride: int=1,padd
 		self.VECTORISED = vectorised
 		self.TRACK_HISTORY = track_history
 
-	def prepare_layer(self):
+	def prepare_layer(self) -> None:
 		if self.prev_layer == None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
 			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
 		else:
@@ -55,7 +55,7 @@ def prepare_layer(self):
 
 		# Initiate params
 		self.filters = CNNParam(
-			utils.array_init(shape=(self.NUM_FILTERS,self.INPUT_SHAPE[0],self.FILT_SHAPE[0],self.FILT_SHAPE[1]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED),
+			utils.array.array_init(shape=(self.NUM_FILTERS,self.INPUT_SHAPE[0],self.FILT_SHAPE[0],self.FILT_SHAPE[1]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED),
 			trainable=True
 		)
 		self.bias = CNNParam(
@@ -77,15 +77,15 @@ def prepare_layer(self):
 				pad_rows_needed = ((NUM_INPUT_ROWS - self.FILT_SHAPE[0]) % self.STRIDE)
 				pad_cols_needed = ((NUM_INPUT_COLS - self.FILT_SHAPE[1]) % self.STRIDE)
 
-			self.COL_LEFT_PAD = pad_cols_needed // 2	# // Floor division
-			self.COL_RIGHT_PAD = math.ceil(pad_cols_needed / 2)
-			self.ROW_UP_PAD = pad_rows_needed // 2	# // Floor division
-			self.ROW_DOWN_PAD = math.ceil(pad_rows_needed / 2)
+			self._COL_LEFT_PAD = pad_cols_needed // 2	# // Floor division
+			self._COL_RIGHT_PAD = math.ceil(pad_cols_needed / 2)
+			self._ROW_UP_PAD = pad_rows_needed // 2	# // Floor division
+			self._ROW_DOWN_PAD = math.ceil(pad_rows_needed / 2)
 		else:
-			self.COL_LEFT_PAD = self.COL_RIGHT_PAD = self.ROW_UP_PAD = self.ROW_DOWN_PAD = self.PADDING
+			self._COL_LEFT_PAD = self._COL_RIGHT_PAD = self._ROW_UP_PAD = self._ROW_DOWN_PAD = self.PADDING
 
-		col_out = int((NUM_INPUT_COLS + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
-		row_out = int((NUM_INPUT_ROWS + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
+		col_out = int((NUM_INPUT_COLS + (self._COL_LEFT_PAD + self._COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
+		row_out = int((NUM_INPUT_ROWS + (self._ROW_DOWN_PAD + self._ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
 
 		self.OUTPUT_SHAPE = (self.NUM_FILTERS,row_out,col_out)
 		
@@ -94,13 +94,13 @@ def prepare_layer(self):
 			assert self.OUTPUT_SHAPE[-2:] == self.INPUT_SHAPE[-2:], f'"SAME" padding chosen however last two dimensions of input and output shapes do not match; {self.INPUT_SHAPE} and {self.OUTPUT_SHAPE} respectively.'	# Channels may differ.
 
 
-	def _forwards(self,_input):
+	def _forwards(self,_input: np.ndarray) -> np.ndarray:
 		assert _input.ndim == 4 and _input.shape[1:] == self.INPUT_SHAPE, f'Input shape, {_input.shape[1:]}, expected to be, {self.INPUT_SHAPE} for each example (observation).'
 		self.input = _input
 		batch_size = _input.shape[0]
 
 		# Apply the padding to the input.
-		self.padded_input = np.pad(self.input,[(0,0),(0,0),(self.ROW_UP_PAD,self.ROW_DOWN_PAD),(self.COL_LEFT_PAD,self.COL_RIGHT_PAD)],'constant',constant_values=(0,0))
+		self.padded_input = np.pad(self.input,[(0,0),(0,0),(self._ROW_UP_PAD,self._ROW_DOWN_PAD),(self._COL_LEFT_PAD,self._COL_RIGHT_PAD)],'constant',constant_values=(0,0))
 
 		if self.VECTORISED:
 			self.output = Conv2D.convolve_vectorised(self.padded_input,self.filters,self.STRIDE)
@@ -123,31 +123,29 @@ def _forwards(self,_input):
 		if self.TRACK_HISTORY: self._track_metrics(output=self.output)
 		return self.output	# NOTE: Output is 4D array of shape: ( BATCH_SIZE, NUM_FILTS, NUM_ROWS, NUM_COLS )
 
-	def _backwards(self,cost_gradient):	
+	def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray:	
 		assert cost_gradient.shape == self.output.shape, f'cost_gradient shape {cost_gradient.shape} does not match layer output shape {self.output.shape}.'
 		if self.TRACK_HISTORY: self._track_metrics(cost_gradient=cost_gradient)
-		_,_, c_rows, c_cols = cost_gradient.shape
-		dilation_idx_row = np.arange(c_rows-1) + 1	# Intiatial indices for insertion of zeros
-		dilation_idx_col = np.arange(c_cols-1) + 1	# Intiatial indices for insertion of zeros
-
-		cost_gradient_dilated = cost_gradient.copy()
-		if self.STRIDE != 1:
-			for n in range(1,self.STRIDE):	# the n multiplier is to increment the indices in the non-uniform manner required.
-				cost_gradient_dilated = np.insert(
-					np.insert( cost_gradient_dilated, dilation_idx_row * n, 0, axis=2 ),
-					dilation_idx_col * n, 0, axis=3)
-		# print(f'cost_gradient shape: {cost_gradient.shape} | cost_gradient_dilated shape: {cost_gradient_dilated.shape}')
+
+		cost_gradient_dilated = utils.array.dilate(cost_gradient,self.STRIDE-1)
 
 		batch_size, channels, _, _ = self.padded_input.shape
 
-		# Account for filter not shifting over input an integer number of times with given stride.
+		# Account for filter not shifting over input an integer number of times with given stride. In this case, 
+		# the 'effective input is smaller than the actual input.
 		pxls_excl_x = (self.padded_input.shape[3] - self.FILT_SHAPE[1]) % self.STRIDE	# pixels excluded in x direction (cols)
 		pxls_excl_y = (self.padded_input.shape[2] - self.FILT_SHAPE[0]) % self.STRIDE	# pixels excluded in y direction (rows)
-		# print('PIXELS EXCLUDED:',pxls_excl_x,pxls_excl_y)
 
-		# Find cost gradient wrt previous output and filters.
+		# Extract effective input
+		effective_input = self.padded_input[
+			:,	# All data points 
+			:,	# All channels
+			:self.padded_input.shape[2] - pxls_excl_y, # Only rows up to those excluded in forwards pass
+			:self.padded_input.shape[3] - pxls_excl_x	# Only cols up to those excluded in forwards pass
+			]
+
+		# Find cost gradient wrt layer input and filters.
 		rotated_filters = np.rot90( self.filters, k=2, axes=(2,3) )	# rotate 2x90 degs, rotating in direction of rows to columns.
-		dCdX_pad = np.zeros(shape=self.padded_input.shape)
 		if self.VECTORISED:
 			# NOTE: convolution function sums across channels; in this case we want to sum across batch data points so we 
 			# transpose the arrays to switch the 'channels' with the 'batch' fields. We then need to switch these back for the
@@ -155,7 +153,7 @@ def _backwards(self,cost_gradient):
 			dCdF = np.transpose(
 				Conv2D.convolve_vectorised(
 					np.transpose(
-						self.padded_input[:,:, :self.padded_input.shape[2] - pxls_excl_y, :self.padded_input.shape[3] - pxls_excl_x],
+						effective_input,
 						axes=(1,0,2,3)
 					),
 					np.transpose(
@@ -166,7 +164,7 @@ def _backwards(self,cost_gradient):
 				),
 				axes=(1,0,2,3))
 			# NOTE: Here we need to transpose the filters to allign the channels of the filters with the batched data points in the cost gradient array.
-			dCdX_pad[:,:, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] = Conv2D.convolve_vectorised(
+			effective_input_gradient = Conv2D.convolve_vectorised(
 				cost_gradient_dilated,
 				np.transpose(
 					rotated_filters,
@@ -176,12 +174,12 @@ def _backwards(self,cost_gradient):
 				full_convolve=True)
 		else:
 			dCdF = np.zeros(shape=self.filters.shape)
+			effective_input_gradient = np.zeros(shape=self.padded_input.shape)
 			for i in range(batch_size):
 				for filt_index in range(self.NUM_FILTERS):
 					for channel_index in range(channels):
-						dCdF[filt_index, channel_index] += Conv2D.convolve( self.padded_input[i,channel_index, :self.padded_input.shape[2] - pxls_excl_y, :self.padded_input.shape[3] - pxls_excl_x], cost_gradient_dilated[i,filt_index], stride=1 )
-						dCdX_pad[i,channel_index, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] += Conv2D.convolve( cost_gradient_dilated[i,filt_index], rotated_filters[filt_index,channel_index], stride=1, full_convolve=True )
-		# dCdF = dCdF[:,:, : dCdF.shape[2] - pxls_excl_y, : dCdF.shape[3] - pxls_excl_x]	# Remove the values from right and bottom of array (this is where the excluded pixels will be).
+						dCdF[filt_index, channel_index] += Conv2D.convolve( effective_input[i,channel_index,:,:], cost_gradient_dilated[i,filt_index], stride=1 )
+						effective_input_gradient[i,channel_index, :, :] += Conv2D.convolve( cost_gradient_dilated[i,filt_index], rotated_filters[filt_index,channel_index], stride=1, full_convolve=True )
 		
 		# ADJUST THE FILTERS
 		assert dCdF.shape == self.filters.shape, f'dCdF shape {dCdF.shape} does not match filters shape {self.filters.shape}.'
@@ -196,14 +194,16 @@ def _backwards(self,cost_gradient):
 		if self.bias.trainable:
 			self.bias = self.model.OPTIMISER.update_param(self.bias)
 
-		# Remove padding that was added to the input array.
-		dCdX = dCdX_pad[ :, : , self.ROW_UP_PAD : dCdX_pad.shape[-2] - self.ROW_DOWN_PAD , self.COL_LEFT_PAD : dCdX_pad.shape[-1] - self.COL_RIGHT_PAD ]
+		# Obtain dCdX, accounting for padding and excluded input values
+		dCdX_pad = np.zeros(shape=self.padded_input.shape)
+		dCdX_pad[:,:, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] = effective_input
+		dCdX = dCdX_pad[ :, : , self._ROW_UP_PAD : dCdX_pad.shape[-2] - self._ROW_DOWN_PAD , self._COL_LEFT_PAD : dCdX_pad.shape[-1] - self._COL_RIGHT_PAD ]
 		assert dCdX.shape == self.input.shape, f'dCdX shape [{dCdX.shape}] does not match layer input shape [{self.input.shape}].'
 
 		return dCdX
 
 	@staticmethod
-	def convolve(A, B, stride,full_convolve=False):
+	def convolve(A: np.ndarray, B: np.ndarray, stride: int,full_convolve: bool=False) -> np.ndarray:
 		""" A and B are 2D arrays. Array B will be convolved over Array A using the stride provided.
 			- 'full_convolve' is where the bottom right cell of B starts over the top of the top left cell of A and shifts by stride until the top left cell of B is over the bottom right cell of A. (i.e. A is padded in each dimension by B - 1 in the respective dimension). """
 		assert A.ndim == 2
@@ -234,7 +234,7 @@ def convolve(A, B, stride,full_convolve=False):
 		return output
 
 	@staticmethod
-	def convolve_vectorised(X,K, stride, full_convolve=False):
+	def convolve_vectorised(X: np.ndarray,K: np.ndarray, stride: int, full_convolve: bool=False) -> np.ndarray:
 		"""
 		X: 4D array of shape: (batch_size,channels,rows,cols)
 		K: 4D array of shape: (num_filters,X_channels,rows,cols)
diff --git a/cnn/layers/fc.py b/cnn/layers/fc.py
index f81e4da..4468481 100644
--- a/cnn/layers/fc.py
+++ b/cnn/layers/fc.py
@@ -25,7 +25,7 @@ def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_met
 			assert len(input_shape) == 2 and input_shape[1] == 1, 'Invalid input_shape tuple. Expected (n,1)'
 		self.INPUT_SHAPE = input_shape
 
-	def prepare_layer(self):
+	def prepare_layer(self) -> None:
 		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
 			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
 		else:
@@ -49,7 +49,7 @@ def prepare_layer(self):
 		# self.output = np.zeros(shape=(self.NUM_NODES,1))	# NOTE: This is a vertical array.
 
 
-	def _forwards(self,_input):
+	def _forwards(self,_input: np.ndarray) -> np.ndarray:
 		# print(_input.shape)
 		if self.prev_layer is None:
 			self.input = _input.T
@@ -64,7 +64,7 @@ def _forwards(self,_input):
 		# print(f'Layer: {self.MODEL_STRUCTURE_INDEX} output:',self.output)
 		return self.output
 
-	def _backwards(self, dC_dZ):
+	def _backwards(self, dC_dZ: np.ndarray) -> np.ndarray:
 		"""
 		Take cost gradient dC/dZ (how the output of this layer affects the cost) and backpropogate
 
diff --git a/cnn/layers/flatten.py b/cnn/layers/flatten.py
index 4d9b541..6cb20b3 100644
--- a/cnn/layers/flatten.py
+++ b/cnn/layers/flatten.py
@@ -21,7 +21,7 @@ def __init__(self,input_shape=None):
 
 		self.NUM_PARAMS = 0
 
-	def prepare_layer(self):
+	def prepare_layer(self) -> None:
 		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
 			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
 		else:
@@ -29,7 +29,7 @@ def prepare_layer(self):
 		self.OUTPUT_SHAPE = (np.prod(self.INPUT_SHAPE),1)	# Output shape for a single example.
 		# self.output = np.zeros(shape=(np.prod(self.INPUT_SHAPE[1:]),self.INPUT_SHAPE[0]))
 
-	def _forwards(self,_input):
+	def _forwards(self,_input: np.ndarray) -> np.ndarray:
 		assert _input.shape[1:] == self.INPUT_SHAPE, f'ERROR:: Input has unexpected shape: {_input.shape[1:]} | expected: {self.INPUT_SHAPE}'
 		self.input = _input
 		self.output = _input.T.reshape((-1,_input.shape[0]))	# Taking transpose here puts each example into its own column - number of columns == number of examles.
diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py
index a7a162a..c1984cf 100644
--- a/cnn/layers/pool.py
+++ b/cnn/layers/pool.py
@@ -34,7 +34,7 @@ def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padd
 
 		self.NUM_PARAMS = 0
 
-	def prepare_layer(self):
+	def prepare_layer(self) -> np.ndarray:
 		""" This needs to be done after the input has been identified - currently happens when train() is called. """
 		if self.prev_layer == None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
 			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
@@ -87,7 +87,7 @@ def prepare_layer(self):
 		if self.PAD_TYPE == 'same':
 			assert self.OUTPUT_SHAPE == self.INPUT_SHAPE	# Channels may differ.
 
-	def _forwards(self,_input):
+	def _forwards(self,_input: np.ndarray) -> np.ndarray:
 		assert _input.ndim == 4 and _input.shape[1:] == self.INPUT_SHAPE, f'Input shape, {_input.shape[1:]}, expected to be, {self.INPUT_SHAPE} for each example (observation).'
 		self.input = _input
 
@@ -137,7 +137,7 @@ def _forwards(self,_input):
 		if self.TRACK_HISTORY: self._track_metrics(output=self.output)
 		return self.output
 
-	def _backwards(self,cost_gradient):
+	def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray:
 		'''
 		Backprop in pooling layer:
 		- nothing to be updated as there are no weights in this layer.
diff --git a/cnn/model.py b/cnn/model.py
index 87fb73f..36dc119 100644
--- a/cnn/model.py
+++ b/cnn/model.py
@@ -2,9 +2,13 @@
 import pickle
 import math
 from datetime import datetime as dt
+
+from cnn.layers.layer import Layer
 from . import layers
 from . import optimisers
 
+from typing import Any, AnyStr
+
 def load_model(name):
 	assert name.split('.')[-1] == 'pkl'
 	with open(name, 'rb') as file:  
@@ -29,7 +33,7 @@ def __init__(self,optimiser_method='gd'):
 		self.structure = []	# defines order of model (list of layer objects) - EXCLUDES INPUT DATA
 		self.layer_counts = dict(zip(['total'] + layers.layers,[0]*(len(layers.layers)+1)))	# dict for counting number of each layer type
 
-	def add_layer(self,layer):
+	def add_layer(self,layer: Layer) -> None:
 		if layer.LAYER_TYPE == 'ACTIVATION' and self.structure[-1].LAYER_TYPE == 'ACTIVATION':
 			print('-- WARNING:: Two Activation Layers in subsequent positions in the model.')
 			if layer.FUNCTION == self.structure[-1].FUNCTION:
@@ -53,7 +57,7 @@ def add_layer(self,layer):
 				layers.Activation(function=layer.ACTIVATION)
 			)
 
-	def remove_layer(self,index):
+	def remove_layer(self,index: int) -> None:
 		self.structure.pop(index)
 		if self.is_prepared:
 			print('-- INFO:: Re-compiling model...')
@@ -66,7 +70,7 @@ def get_model_details(self):
 
 		return details
 		
-	def prepare_model(self,optimiser='gd',learning_rate=None):
+	def prepare_model(self,optimiser: Any='gd',learning_rate=None):
 		""" Called once final layer is added, each layer can now initiate its weights and biases. """
 		print('Preparing model...')
 
@@ -74,7 +78,7 @@ def prepare_model(self,optimiser='gd',learning_rate=None):
 			assert optimiser.lower() in optimisers.optimiser_names, f'Unrecognised optimiser name: {optimiser}; choose from: {optimisers.optimiser_names}'
 			self.OPTIMISER = optimisers.from_name(optimiser,learning_rate)
 		else:
-			assert optimiser.__class__.__name__ in optimisers.optimiser_names, f'Invalid optimiser: {optimiser}'
+			assert (isinstance(optimiser,optimisers.BaseOptimiser) and optimiser.__class__.__name__ in optimisers.optimiser_names), f'Invalid optimiser: {optimiser}'
 			self.OPTIMISER = optimiser
 
 		self.details = {
@@ -110,7 +114,7 @@ def prepare_model(self,optimiser='gd',learning_rate=None):
 		self.print_summary()
 		print(f'Model Prepared: {self.is_prepared}')
 
-	def train(self,Xs,ys,epochs,max_batch_size=32,shuffle=False,random_seed=42,learning_rate=0.01,cost_fn='mse',beta1=0.9,beta2=0.999):
+	def train(self,Xs: np.ndarray,ys: np.ndarray,epochs: int,max_batch_size: int=32,shuffle: bool=False,random_seed: int=42,learning_rate: float=0.01,cost_fn: AnyStr='mse',beta1: float=0.9,beta2: float=0.999) -> dt:
 		'''
 		Should take array of inputs and array of labels of the same length.
 
@@ -179,7 +183,7 @@ def train(self,Xs,ys,epochs,max_batch_size=32,shuffle=False,random_seed=42,learn
 
 		return dt.now(), dt.now() - train_start	# returns training finish time and duration.
 
-	def _print_train_progress(self,batch_index):
+	def _print_train_progress(self,batch_index: int) -> None:
 		progess_bar_length = 30	# characters (not including '[' ']')
 		progress = (batch_index+1) / self.BATCH_COUNT
 		progressor = '=' * int(progress * progess_bar_length)
@@ -199,7 +203,7 @@ def _print_train_progress(self,batch_index):
 
 	SUPPORTED_OPTIMISERS = ('gd','momentum','rmsprop','adam')
 
-	def _iterate_forwards(self):
+	def _iterate_forwards(self) -> None:
 		for batch_ind in range(self.BATCH_COUNT):
 			ind_lower = batch_ind * self.MAX_BATCH_SIZE	# Lower bound of index range
 			ind_upper = batch_ind * self.MAX_BATCH_SIZE + self.MAX_BATCH_SIZE	# Upper bound of index range
@@ -233,7 +237,7 @@ def _iterate_forwards(self):
 
 			self._iterate_backwards()
 
-	def _iterate_backwards(self):
+	def _iterate_backwards(self) -> None:
 		self.iteration_index += 1
 		self.history['cost'][self.iteration_index] = self.iteration_cost
 		# Backpropagate the cost_gradient
@@ -244,7 +248,7 @@ def _iterate_backwards(self):
 		self.iteration_cost = 0
 		self.iteration_cost_gradient = 0
 
-	def predict(self,Xs,training=False):
+	def predict(self,Xs: np.ndarray,training: bool=False) -> np.ndarray:
 		if training: self.feed_forwards_cycle_index += 1
 		for layer in self.structure:
 			Xs = layer._forwards(Xs)
@@ -252,7 +256,7 @@ def predict(self,Xs,training=False):
 			# print('Output:',X)
 		return Xs
 
-	def evaluate(self,Xs,ys):
+	def evaluate(self,Xs: np.ndarray,ys: np.ndarray) -> int:
 		predictions = self.predict(Xs,training=False)
 		accuracy = np.sum((np.argmax(ys.T,axis=0) == np.argmax(predictions,axis=0))) / len(Xs)
 		return accuracy
@@ -269,7 +273,7 @@ def _initiate_tracking_metrics(self):
 
 	SUPPORTED_COST_FUNCTIONS = ('mse','cross_entropy')
 
-	def cost(self,predictions,labels,derivative=False):
+	def cost(self,predictions: np.ndarray,labels: np.ndarray,derivative: bool=False) -> float:
 		'''
 		Cost function to provide measure of model 'correctness'. returns vector cost value.
 		'''
@@ -339,4 +343,4 @@ def print_summary(self):
 		print('Total params:',total_trainable + total_non_trainable)
 		print('Trainable params:',total_trainable)
 		print('Non-trainable params:',total_non_trainable)
-		print('='*(np.sum(field_lengths) + len(field_names)))
\ No newline at end of file
+		print('='*(np.sum(field_lengths) + len(field_names)))
diff --git a/cnn/optimisers/__init__.py b/cnn/optimisers/__init__.py
index adef7e9..288ee49 100644
--- a/cnn/optimisers/__init__.py
+++ b/cnn/optimisers/__init__.py
@@ -1,3 +1,4 @@
+from cnn.params import CNNParam
 from .adam import Adam
 from .gd import GradientDescent
 from .rmsprop import RMSProp
@@ -20,4 +21,7 @@ def from_name(name,learning_rate):
 			else:
 				return optimiser(learning_rate=learning_rate)
 			
+class BaseOptimiser:
+	def update_param(param: CNNParam) -> np.ndarray:
+		raise NotImplementedError("Optimisers inheriting from this base class must implement update_param() method.")
 
diff --git a/cnn/optimisers/adam.py b/cnn/optimisers/adam.py
index 739c468..80f01da 100644
--- a/cnn/optimisers/adam.py
+++ b/cnn/optimisers/adam.py
@@ -1,9 +1,10 @@
 
 import numpy as np
+from cnn.optimisers import BaseOptimiser
 
 from cnn.params import CNNParam
 
-class Adam:
+class Adam(BaseOptimiser):
 	""" Adaptive Movement Estimation Algorithm 
 	- combination of 'Gradient Descent with Momentum' and 'RMSprop' """
 	
@@ -15,7 +16,7 @@ def __init__(self,learning_rate=0.001,beta1=0.9,beta2=0.999,epsilon=1e-8):
 		self.BETA2 = beta2	# Second moment decay factor
 		self.EPSILON = epsilon	# This is a very small value just to avoid division by 0.
 
-	def update_param(self,param) -> np.ndarray:
+	def update_param(self,param: CNNParam) -> np.ndarray:
 		# TODO: Change function sig. Needs to be consistent with other optimisers.
 		if "momentum1" in param.associated_data:
 			momentum1 = param.associated_data["momentum1"]
diff --git a/cnn/optimisers/gd.py b/cnn/optimisers/gd.py
index 9c7e518..7c3a1f6 100644
--- a/cnn/optimisers/gd.py
+++ b/cnn/optimisers/gd.py
@@ -1,13 +1,14 @@
 
+from cnn.optimisers import BaseOptimiser
 from cnn.params import CNNParam
+import numpy as np
 
-
-class GradientDescent:
+class GradientDescent(BaseOptimiser):
 
 	ALIAS = 'gd'
 
 	def __init__(self,learning_rate=0.001,beta=0.9,):
 		self.ALPHA = learning_rate
 
-	def update_param(self,param):
+	def update_param(self,param: CNNParam) -> np.ndarray:
 		return param - self.ALPHA * param.gradient
diff --git a/cnn/optimisers/rmsprop.py b/cnn/optimisers/rmsprop.py
index 576316f..2af18f3 100644
--- a/cnn/optimisers/rmsprop.py
+++ b/cnn/optimisers/rmsprop.py
@@ -1,8 +1,9 @@
 import numpy as np
+from cnn.optimisers import BaseOptimiser
 
 from cnn.params import CNNParam
 
-class RMSProp:
+class RMSProp(BaseOptimiser):
 	''' Root mean square propagation '''
 
 	ALIAS = 'rmsprop'
@@ -12,7 +13,7 @@ def __init__(self,learning_rate=0.001,beta=0.9,epsilon=1e-8):
 		self.EPSILON = epsilon
 		self.BETA = beta
 
-	def update_param(self,param) -> np.ndarray:
+	def update_param(self,param: CNNParam) -> np.ndarray:
 		if "momentum1" in param.associated_data["momentum1"]:
 			s = param.associated_data["momentum1"]
 		else:

From 958b1893690f7fc6e42f510a440f944e18e6f235 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Mon, 27 Dec 2021 21:14:01 +0000
Subject: [PATCH 04/24] fix some import errors

---
 cnn/optimisers/__init__.py | 7 +++----
 cnn/optimisers/adam.py     | 1 -
 cnn/optimisers/base.py     | 8 ++++++++
 cnn/utils/__init__.py      | 2 ++
 cnn/utils/array.py         | 2 +-
 5 files changed, 14 insertions(+), 6 deletions(-)
 create mode 100644 cnn/optimisers/base.py

diff --git a/cnn/optimisers/__init__.py b/cnn/optimisers/__init__.py
index 288ee49..ef04035 100644
--- a/cnn/optimisers/__init__.py
+++ b/cnn/optimisers/__init__.py
@@ -1,14 +1,16 @@
 from cnn.params import CNNParam
+from .base import BaseOptimiser
 from .adam import Adam
 from .gd import GradientDescent
 from .rmsprop import RMSProp
+import numpy as np
 
 # ------------- BELOW IS DYNAMIC TO AVAILABLE OPTIMISER CLASSES ----------------
 
 # Expose list of all optimiser class names.
 import inspect
 import sys
-__optimiser_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)]
+__optimiser_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], lambda cls: isinstance(cls,BaseOptimiser))]
 
 # Following includes both class name and alias property.
 optimiser_names = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] + [opt.ALIAS for opt in __optimiser_classes]
@@ -21,7 +23,4 @@ def from_name(name,learning_rate):
 			else:
 				return optimiser(learning_rate=learning_rate)
 			
-class BaseOptimiser:
-	def update_param(param: CNNParam) -> np.ndarray:
-		raise NotImplementedError("Optimisers inheriting from this base class must implement update_param() method.")
 
diff --git a/cnn/optimisers/adam.py b/cnn/optimisers/adam.py
index 80f01da..2d70f24 100644
--- a/cnn/optimisers/adam.py
+++ b/cnn/optimisers/adam.py
@@ -17,7 +17,6 @@ def __init__(self,learning_rate=0.001,beta1=0.9,beta2=0.999,epsilon=1e-8):
 		self.EPSILON = epsilon	# This is a very small value just to avoid division by 0.
 
 	def update_param(self,param: CNNParam) -> np.ndarray:
-		# TODO: Change function sig. Needs to be consistent with other optimisers.
 		if "momentum1" in param.associated_data:
 			momentum1 = param.associated_data["momentum1"]
 		else:
diff --git a/cnn/optimisers/base.py b/cnn/optimisers/base.py
new file mode 100644
index 0000000..611e6ca
--- /dev/null
+++ b/cnn/optimisers/base.py
@@ -0,0 +1,8 @@
+from cnn.params import CNNParam
+import numpy as np
+
+
+class BaseOptimiser:
+	ALIAS = 'base'
+	def update_param(param: CNNParam) -> np.ndarray:
+		raise NotImplementedError("Optimisers inheriting from this base class must implement update_param() method.")
diff --git a/cnn/utils/__init__.py b/cnn/utils/__init__.py
index e69de29..7f306a4 100644
--- a/cnn/utils/__init__.py
+++ b/cnn/utils/__init__.py
@@ -0,0 +1,2 @@
+from . import array
+from . import processing
diff --git a/cnn/utils/array.py b/cnn/utils/array.py
index c5928e0..1059afc 100644
--- a/cnn/utils/array.py
+++ b/cnn/utils/array.py
@@ -41,7 +41,7 @@ def dilate(array: np.ndarray,channel_width: int) -> np.ndarray:
 	dilation_idx_row = np.arange(rows-1) + 1	# Intiatial indices for insertion of zeros
 	dilation_idx_col = np.arange(cols-1) + 1	# Intiatial indices for insertion of zeros
 	dilated_array = array.copy()
-	for n in range(1,channel_width):	# the n multiplier is to increment the indices in the non-uniform manner required.
+	for n in range(1,channel_width+1):	# the n multiplier is to increment the indices in the non-uniform manner required.
 		dilated_array = np.insert(
 			np.insert( dilated_array, dilation_idx_row * n, 0, axis=2 ),
 			dilation_idx_col * n, 0, axis=3)

From d2d47b4768986ecc7d3ac455977f774a087a03ea Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Mon, 27 Dec 2021 21:14:18 +0000
Subject: [PATCH 05/24] fix conv backprop bug

---
 cnn/layers/conv.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py
index 41d980a..87a32b1 100644
--- a/cnn/layers/conv.py
+++ b/cnn/layers/conv.py
@@ -174,7 +174,7 @@ def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray:
 				full_convolve=True)
 		else:
 			dCdF = np.zeros(shape=self.filters.shape)
-			effective_input_gradient = np.zeros(shape=self.padded_input.shape)
+			effective_input_gradient = np.zeros(shape=effective_input.shape)
 			for i in range(batch_size):
 				for filt_index in range(self.NUM_FILTERS):
 					for channel_index in range(channels):
@@ -196,7 +196,7 @@ def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray:
 
 		# Obtain dCdX, accounting for padding and excluded input values
 		dCdX_pad = np.zeros(shape=self.padded_input.shape)
-		dCdX_pad[:,:, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] = effective_input
+		dCdX_pad[:,:, :dCdX_pad.shape[2] - pxls_excl_y, :dCdX_pad.shape[3] - pxls_excl_x] = effective_input_gradient
 		dCdX = dCdX_pad[ :, : , self._ROW_UP_PAD : dCdX_pad.shape[-2] - self._ROW_DOWN_PAD , self._COL_LEFT_PAD : dCdX_pad.shape[-1] - self._COL_RIGHT_PAD ]
 		assert dCdX.shape == self.input.shape, f'dCdX shape [{dCdX.shape}] does not match layer input shape [{self.input.shape}].'
 

From a35e01321536a03f668b5c0ac35b4518ee524a4b Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Tue, 28 Dec 2021 11:58:32 +0000
Subject: [PATCH 06/24] refactor FC layer to use new CNNParam

---
 cnn/layers/fc.py | 73 ++++++++++++++++++++++++++----------------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/cnn/layers/fc.py b/cnn/layers/fc.py
index 4468481..2550b97 100644
--- a/cnn/layers/fc.py
+++ b/cnn/layers/fc.py
@@ -1,12 +1,15 @@
 import numpy as np
+
+from cnn.params import CNNParam
 from .layer import Layer
 from cnn import utils
 
 class FC(Layer):
 	"""
-	The Fully Connected Layer is defined as being the layer of nodes and the weights of the connections that link those nodes to the previous layer.
+	The Fully Connected Layer is defined as being the layer of nodes and the weights of the connections that link 
+	those nodes to the previous layer.
 	"""
-	def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_method=None,input_shape=None):
+	def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_method=None,input_shape=None,track_history=True):
 		"""
 		- n: Number of nodes in layer.
 		- activation: The name of the activation function to be used. The activation is handled by an Activation object that is transparent to the user here. Defaults to None - a transparent Activation layer will still be added however, the data passing through will be untouched.
@@ -24,6 +27,7 @@ def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_met
 		if input_shape is not None:
 			assert len(input_shape) == 2 and input_shape[1] == 1, 'Invalid input_shape tuple. Expected (n,1)'
 		self.INPUT_SHAPE = input_shape
+		self.TRACK_HISTORY = track_history
 
 	def prepare_layer(self) -> None:
 		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
@@ -31,37 +35,24 @@ def prepare_layer(self) -> None:
 		else:
 			self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE
 		
-		self.params['weights'] = {
-			'name':'weights',
-			'trainable':True,
-			'values':utils.array_init(shape=(self.NUM_NODES,self.INPUT_SHAPE[0]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED)	# NOTE: this is the correct orientation for vertical node array.
-		}
-
-		self.params['bias'] = {
-			'name': 'bias',
-			'trainable': True,
-			'values': np.zeros(shape=(self.NUM_NODES,1))	# NOTE: Recommended to initaite biases to zero.
-		}
-
-		# self.NUM_PARAMS = self.weights.size + self.bias.size
-		
+		self.weights = utils.array.array_init(shape=(self.NUM_NODES,self.INPUT_SHAPE[0]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED)	# NOTE: this is the correct orientation for vertical node array.
+
+		self.bias = np.zeros(shape=(self.NUM_NODES,1))	# NOTE: Recommended to initaite biases to zero.
+
 		self.OUTPUT_SHAPE = (self.NUM_NODES,1)
-		# self.output = np.zeros(shape=(self.NUM_NODES,1))	# NOTE: This is a vertical array.
 
 
 	def _forwards(self,_input: np.ndarray) -> np.ndarray:
-		# print(_input.shape)
 		if self.prev_layer is None:
-			self.input = _input.T
+			self.input = _input
 		else:
 			assert len(_input.shape) == 2 and _input.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(_input.shape[0],1)}'
 			self.input = _input
 
-		self.output = np.dot( self.params['weights']['values'], self.input ) + self.params['bias']['values']
+		self.output = np.dot( self.weights, self.input ) + self.bias
 		
 		assert len(self.output.shape) == 2 and self.output.shape[0] == self.OUTPUT_SHAPE[0], f'Output shape, {(self.output.shape[0],1)}, not as expected, {self.OUTPUT_SHAPE}'
-		self._track_metrics(output=self.output)
-		# print(f'Layer: {self.MODEL_STRUCTURE_INDEX} output:',self.output)
+		if self.TRACK_HISTORY: self._track_metrics(output=self.output)
 		return self.output
 
 	def _backwards(self, dC_dZ: np.ndarray) -> np.ndarray:
@@ -72,27 +63,41 @@ def _backwards(self, dC_dZ: np.ndarray) -> np.ndarray:
 
 		"""
 		assert dC_dZ.shape == self.output.shape, f'dC/dZ shape, {dC_dZ.shape}, does not match Z shape, {self.output.shape}.'
-		self._track_metrics(cost_gradient=dC_dZ)
+		if self.TRACK_HISTORY: self._track_metrics(cost_gradient=dC_dZ)
 
 		dZ_dW = self.input.T	# Partial diff of weighted sum (Z) w.r.t. weights
 		dZ_dB = 1
-		dZ_dI = self.params['weights']['values'].T	# Partial diff of weighted sum w.r.t. input to layer.
+		dZ_dI = self.weights.T	# Partial diff of weighted sum w.r.t. input to layer.
 		
-		# dC_dW.shape === W.shape = (n(l),n(l-1)) | dZ_dW.shape = (1,n(l-1))
-		# dC_dW = np.multiply( dC_dZ , dZ_dW )	# Element-wise multiplication. The local gradient needs transposing for the multiplication.
 		dC_dW = np.dot(dC_dZ,dZ_dW)
-		assert dC_dW.shape == self.params['weights']['values'].shape, f'dC/dW shape {dC_dW.shape} does not match W shape {self.params["weights"]["values"].shape}'
-		# self.weights = self.weights - ( self.model.LEARNING_RATE * dC_dW )	# NOTE: Adjustments done in opposite direction to dC_dZ
-		if self.params['weights']['trainable']:
-			self.params['weights']['values'] = self.model.OPTIMISER.update_param(self.params['weights'],dC_dW,self.MODEL_STRUCTURE_INDEX)
+		assert dC_dW.shape == self.weights.shape, f'dC/dW shape {dC_dW.shape} does not match W shape {self.weights.shape}'
+		self.weights.gradient = dC_dW
+		if self.weights.trainable:
+			self.weights = self.model.OPTIMISER.update_param(self.weights)
 
 		dC_dB = np.sum(dC_dZ * dZ_dB, axis=1,keepdims=True)	# Element-wise multiplication (dZ_dB turns out to be just 1)
 
-		assert dC_dB.shape == self.params['bias']['values'].shape, f'dC/dB shape {dC_dB.shape} does not match B shape {self.params["bias"]["values"].shape}'
-		# self.bias = self.bias - ( self.model.LEARNING_RATE * dC_dB )	# NOTE: Adjustments done in opposite direction to dC_dZ
-		if self.params['bias']['trainable']:
-			self.params['bias']['values'] = self.model.OPTIMISER.update_param(self.params['bias'],dC_dB,self.MODEL_STRUCTURE_INDEX)
+		assert dC_dB.shape == self.bias.shape, f'dC/dB shape {dC_dB.shape} does not match B shape {self.bias.shape}'
+		self.bias.gradient = dC_dB
+		if self.bias.trainable:
+			self.bias = self.model.OPTIMISER.update_param(self.bias)
 
 		dC_dI = np.dot( dZ_dI , dC_dZ )
 		assert dC_dI.shape == self.input.shape, f'dC/dI shape {dC_dI.shape} does not match input shape {self.input.shape}.'
 		return dC_dI
+
+	@property
+	def weights(self):
+		return self._weights
+
+	@weights.setter
+	def weights(self,value):
+		self._weights = CNNParam(value)
+
+	@property
+	def bias(self):
+		return self._bias
+
+	@bias.setter
+	def bias(self,value):
+		self._bias = CNNParam(value)

From 9430bde0b46723a75aaae56f86bff5c65e4ee707 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Wed, 29 Dec 2021 17:40:36 +0000
Subject: [PATCH 07/24] Add FC layer test case

---
 tests/test_fc_layer/test_case_1.py | 118 +++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 tests/test_fc_layer/test_case_1.py

diff --git a/tests/test_fc_layer/test_case_1.py b/tests/test_fc_layer/test_case_1.py
new file mode 100644
index 0000000..8ad7291
--- /dev/null
+++ b/tests/test_fc_layer/test_case_1.py
@@ -0,0 +1,118 @@
+"""
+TC1:
+- 5 nodes
+- input shape (5,1)
+- bias = 0.2 (5,1)
+"""
+from _pytest.assertion import pytest_sessionfinish
+import pytest
+from cnn.layers import FC
+import numpy as np
+
+from cnn.params import CNNParam
+
+@pytest.fixture
+def n():
+	return 5
+
+@pytest.fixture
+def input_shape():
+	return (5,1)
+
+@pytest.fixture
+def batch_size():
+	return 2
+
+@pytest.fixture
+def fc_layer(n,input_shape):
+	layer = FC(
+		n,
+		input_shape=input_shape,
+		track_history=False
+	)
+	layer.prepare_layer()
+	layer.weights = np.arange(n*input_shape[0]).reshape((n,input_shape[0]))
+	layer.weights.trainable = False
+	layer.bias = np.array([[0.2]]*n)
+	layer.bias.trainable = False
+	return layer
+
+@pytest.fixture
+def forwards_input(input_shape,batch_size):
+	return np.arange(input_shape[0]*batch_size).reshape((input_shape[0],batch_size))
+
+@pytest.fixture
+def forwards_expected_result():
+	return np.array(
+		[
+			[60.2, 70.2],
+			[160.2, 195.2],
+			[260.2, 320.2],
+			[360.2, 445.2],
+			[460.2, 570.2]
+		]
+	)
+
+@pytest.fixture
+def backwards_input(forwards_expected_result):
+	return np.arange(np.prod(forwards_expected_result.shape)).reshape(forwards_expected_result.shape)
+
+@pytest.fixture
+def backwards_expected_output():
+	return np.array(
+		[
+			[ 300, 350 ],
+			[ 320, 375 ],
+			[ 340, 400 ],
+			[ 360, 425 ],
+			[ 380, 450 ]
+		]
+	)
+
+@pytest.fixture
+def backwards_expected_weights_gradient():
+	return np.array(
+		[
+			[ 1, 3, 5, 7, 9],
+			[ 3, 13, 23, 33, 43],
+			[ 5, 23, 41, 59, 77],
+			[ 7, 33, 59, 85, 111],
+			[ 9, 43, 77, 111, 145]
+		]
+	)
+
+@pytest.fixture
+def backwards_expected_bias_gradient(backwards_input):
+	return backwards_input.sum(axis=1,keepdims=True)
+
+def test_param_class_persistance(fc_layer):
+	assert isinstance(fc_layer.weights,CNNParam)
+	assert isinstance(fc_layer.bias,CNNParam)
+	fc_layer.weights = [1,2,3,4]
+	fc_layer.bias = [1,2,3]
+	assert isinstance(fc_layer.weights,CNNParam)
+	assert isinstance(fc_layer.bias,CNNParam)
+
+def test_forwards(fc_layer,forwards_input,forwards_expected_result):
+
+	assert np.array_equal(
+		fc_layer._forwards(forwards_input),
+		forwards_expected_result
+	)
+
+def test_backwards(fc_layer,backwards_input,backwards_expected_output,backwards_expected_weights_gradient,backwards_expected_bias_gradient,forwards_input,forwards_expected_result):
+	fc_layer.input = forwards_input
+	fc_layer.output = forwards_expected_result
+
+	assert np.array_equal(
+		fc_layer._backwards(backwards_input),
+		backwards_expected_output
+	)
+	assert np.array_equal(
+		fc_layer.weights.gradient,
+		backwards_expected_weights_gradient
+	)
+	assert np.array_equal(
+		fc_layer.bias.gradient,
+		backwards_expected_bias_gradient
+	)

From 365718290a49b3c1b9d00e7b548c3aec503ca854 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Wed, 29 Dec 2021 18:00:09 +0000
Subject: [PATCH 08/24] Minor Pool layer refactor

---
 cnn/layers/pool.py | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py
index c1984cf..1ae04d8 100644
--- a/cnn/layers/pool.py
+++ b/cnn/layers/pool.py
@@ -43,10 +43,6 @@ def prepare_layer(self) -> np.ndarray:
 
 		assert len(self.INPUT_SHAPE) == 3, 'Invalid INPUT_SHAPE'
 
-		# # Convert 2D input to 3D.
-		# if len(self.INPUT_SHAPE) == 2:
-		# 	self.INPUT_SHAPE = tuple([1]) + self.INPUT_SHAPE
-
 		NUM_INPUT_ROWS = self.INPUT_SHAPE[-2]
 		NUM_INPUT_COLS = self.INPUT_SHAPE[-1]
 
@@ -83,7 +79,6 @@ def prepare_layer(self) -> np.ndarray:
 		row_out = int((NUM_INPUT_ROWS + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
 
 		self.OUTPUT_SHAPE = (self.INPUT_SHAPE[0],row_out,col_out)
-		# self.output = np.zeros(shape=(self.INPUT_SHAPE[0],row_out,col_out))	# Output initiated.
 		if self.PAD_TYPE == 'same':
 			assert self.OUTPUT_SHAPE == self.INPUT_SHAPE	# Channels may differ.
 
@@ -107,7 +102,7 @@ def _forwards(self,_input: np.ndarray) -> np.ndarray:
 				X_flat_pooled = np.mean(self.Xsliced, axis=2)
 			elif self.POOL_TYPE == 'min':
 				X_flat_pooled = np.min(self.Xsliced,axis=2)
-			self.output =  X_flat_pooled.reshape((self.padded_input.shape[0],*self.OUTPUT_SHAPE))
+			self.output = X_flat_pooled.reshape((self.padded_input.shape[0],*self.OUTPUT_SHAPE))
 		else:
 			self.output = np.zeros(shape=(self.input.shape[0],*self.OUTPUT_SHAPE))
 			batch_size, channels, proc_rows, proc_cols = self.padded_input.shape
@@ -118,14 +113,12 @@ def _forwards(self,_input: np.ndarray) -> np.ndarray:
 					curr_x = out_x = 0
 					while curr_x <= proc_cols - self.FILT_SHAPE[1]:
 						for channel_index in range(channels):
+							sub_arr = self.padded_input[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x + self.FILT_SHAPE[1] ]
 							if self.POOL_TYPE == 'max':
-								sub_arr = self.padded_input[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x+ self.FILT_SHAPE[1] ]
 								self.output[i,channel_index, out_y, out_x] = np.max( sub_arr )
 							elif self.POOL_TYPE == 'min':
-								sub_arr = self.padded_input[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x+ self.FILT_SHAPE[1] ]
 								self.output[i,channel_index, out_y, out_x] = np.min( sub_arr )
 							elif self.POOL_TYPE == 'mean':
-								sub_arr = self.padded_input[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x + self.FILT_SHAPE[1] ]
 								self.output[i,channel_index, out_y, out_x] = np.mean( sub_arr )
 
 						curr_x += self.STRIDE

From 39d48d38949a0cccda96136aeae5e5b39708e650 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Wed, 29 Dec 2021 18:40:45 +0000
Subject: [PATCH 09/24] refactored padding calculations into array util

---
 cnn/layers/conv.py | 33 +++++----------------------------
 cnn/layers/pool.py | 39 ++++++---------------------------------
 cnn/utils/array.py | 35 ++++++++++++++++++++++++++++++++++-
 3 files changed, 45 insertions(+), 62 deletions(-)

diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py
index 87a32b1..2f5e414 100644
--- a/cnn/layers/conv.py
+++ b/cnn/layers/conv.py
@@ -46,13 +46,6 @@ def prepare_layer(self) -> None:
 
 		assert len(self.INPUT_SHAPE) == 3, 'Invalid INPUT_SHAPE'
 
-		# # Convert 2D input to 3D.
-		# if len(self.INPUT_SHAPE) == 2:
-		# 	self.INPUT_SHAPE = tuple([1]) + self.INPUT_SHAPE	
-
-		NUM_INPUT_ROWS = self.INPUT_SHAPE[-2]
-		NUM_INPUT_COLS = self.INPUT_SHAPE[-1]
-
 		# Initiate params
 		self.filters = CNNParam(
 			utils.array.array_init(shape=(self.NUM_FILTERS,self.INPUT_SHAPE[0],self.FILT_SHAPE[0],self.FILT_SHAPE[1]),method=self.INITIATION_METHOD,seed=self.RANDOM_SEED),
@@ -64,28 +57,12 @@ def prepare_layer(self) -> None:
 		)
 
 		# Need to account for padding.
-		if self.PAD_TYPE != None:
-			if self.PAD_TYPE == 'same':
-				pad_cols_needed = max((NUM_INPUT_COLS - 1) * self.STRIDE + self.FILT_SHAPE[1] - NUM_INPUT_COLS, 0)
-				pad_rows_needed = max((NUM_INPUT_ROWS - 1) * self.STRIDE + self.FILT_SHAPE[0] - NUM_INPUT_ROWS, 0)
-			elif self.PAD_TYPE == 'valid':
-				# TensoFlow definition of this is "no padding". The input is just processed as-is.
-				pad_rows_needed = pad_cols_needed = 0
-			elif self.PAD_TYPE == 'include':
-				# Here we will implement the padding method to avoid input data being excluded/ missed by the convolution.
-				# - This happens when, (I_dim - F_dim) % stride != 0
-				pad_rows_needed = ((NUM_INPUT_ROWS - self.FILT_SHAPE[0]) % self.STRIDE)
-				pad_cols_needed = ((NUM_INPUT_COLS - self.FILT_SHAPE[1]) % self.STRIDE)
-
-			self._COL_LEFT_PAD = pad_cols_needed // 2	# // Floor division
-			self._COL_RIGHT_PAD = math.ceil(pad_cols_needed / 2)
-			self._ROW_UP_PAD = pad_rows_needed // 2	# // Floor division
-			self._ROW_DOWN_PAD = math.ceil(pad_rows_needed / 2)
-		else:
-			self._COL_LEFT_PAD = self._COL_RIGHT_PAD = self._ROW_UP_PAD = self._ROW_DOWN_PAD = self.PADDING
+		self.COL_LEFT_PAD, self.COL_RIGHT_PAD, self.ROW_UP_PAD, self.ROW_DOWN_PAD = utils.array.determine_padding(
+			self.PAD_TYPE, self.PADDING, self.INPUT_SHAPE, self.FILT_SHAPE, self.STRIDE
+		)
 
-		col_out = int((NUM_INPUT_COLS + (self._COL_LEFT_PAD + self._COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
-		row_out = int((NUM_INPUT_ROWS + (self._ROW_DOWN_PAD + self._ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
+		col_out = int((self.INPUT_SHAPE[1] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
+		row_out = int((self.INPUT_SHAPE[0] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
 
 		self.OUTPUT_SHAPE = (self.NUM_FILTERS,row_out,col_out)
 		
diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py
index 1ae04d8..85b9474 100644
--- a/cnn/layers/pool.py
+++ b/cnn/layers/pool.py
@@ -1,6 +1,7 @@
 import numpy as np
 from .layer import Layer
 import math
+from cnn import utils
 
 class Pool(Layer):
 	def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padding: int=0,pad_type: str=None,input_shape=None,vectorised=True,track_history=True):
@@ -43,40 +44,12 @@ def prepare_layer(self) -> np.ndarray:
 
 		assert len(self.INPUT_SHAPE) == 3, 'Invalid INPUT_SHAPE'
 
-		NUM_INPUT_ROWS = self.INPUT_SHAPE[-2]
-		NUM_INPUT_COLS = self.INPUT_SHAPE[-1]
-
-		# Need to account for padding.
-		if self.PAD_TYPE != None:
-			if self.PAD_TYPE == 'same':
-				nopad_out_cols = math.ceil(float(NUM_INPUT_COLS) / float(self.STRIDE))
-				pad_cols_needed = max((nopad_out_cols - 1) * self.STRIDE + self.FILT_SHAPE[1] - NUM_INPUT_COLS, 0)
-				nopad_out_rows = math.ceil(float(NUM_INPUT_ROWS) / float(self.STRIDE))
-				pad_rows_needed = max((nopad_out_rows - 1) * self.STRIDE + self.FILT_SHAPE[0] - NUM_INPUT_ROWS, 0)
-			elif self.PAD_TYPE == 'valid':
-				# TensoFlow definition of this is "no padding". The input is just processed as-is.
-				pad_rows_needed = pad_cols_needed = 0
-			elif self.PAD_TYPE == 'include':
-				# Here we will implement the padding method to avoid input data being excluded/ missed by the convolution.
-				# - This happens when, (I_dim - F_dim) % stride != 0
-				if (NUM_INPUT_ROWS - self.FILT_SHAPE[0]) % self.STRIDE != 0:
-					pad_rows_needed = self.FILT_SHAPE[0] - ((NUM_INPUT_ROWS - self.FILT_SHAPE[0]) % self.STRIDE)
-				else:
-					pad_rows_needed = 0
-				if (NUM_INPUT_COLS - self.FILT_SHAPE[1]) % self.STRIDE != 0:
-					pad_cols_needed = self.FILT_SHAPE[1] - ((NUM_INPUT_COLS - self.FILT_SHAPE[1]) % self.STRIDE)
-				else:
-					pad_cols_needed = 0
-
-			self.COL_LEFT_PAD = pad_cols_needed // 2	# // Floor division
-			self.COL_RIGHT_PAD = math.ceil(pad_cols_needed / 2)
-			self.ROW_UP_PAD = pad_rows_needed // 2	# // Floor division
-			self.ROW_DOWN_PAD = math.ceil(pad_rows_needed / 2)
-		else:
-			self.COL_LEFT_PAD = self.COL_RIGHT_PAD = self.ROW_UP_PAD = self.ROW_DOWN_PAD = self.PADDING
+		self.COL_LEFT_PAD, self.COL_RIGHT_PAD, self.ROW_UP_PAD, self.ROW_DOWN_PAD = utils.array.determine_padding(
+			self.PAD_TYPE, self.PADDING, self.INPUT_SHAPE, self.FILT_SHAPE, self.STRIDE
+		)
 
-		col_out = int((NUM_INPUT_COLS + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
-		row_out = int((NUM_INPUT_ROWS + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
+		col_out = int((self.INPUT_SHAPE[1] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
+		row_out = int((self.INPUT_SHAPE[0] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
 
 		self.OUTPUT_SHAPE = (self.INPUT_SHAPE[0],row_out,col_out)
 		if self.PAD_TYPE == 'same':
diff --git a/cnn/utils/array.py b/cnn/utils/array.py
index 1059afc..b031db1 100644
--- a/cnn/utils/array.py
+++ b/cnn/utils/array.py
@@ -1,5 +1,6 @@
+import math
 import numpy as np
-
+from typing import AnyStr, Tuple
 
 def array_init(shape: tuple,method=None,seed=None) -> np.ndarray:
 	''' Random initialisation of weights array.
@@ -46,3 +47,35 @@ def dilate(array: np.ndarray,channel_width: int) -> np.ndarray:
 			np.insert( dilated_array, dilation_idx_row * n, 0, axis=2 ),
 			dilation_idx_col * n, 0, axis=3)
 	return dilated_array
+
+def determine_padding(pad_type: AnyStr,pad_size: int,shape_array_1: Tuple[int],shape_array_2: Tuple[int],stride: int) -> Tuple[int]:
+	""" Function to determine required padding at each edge of the array, according to the specified requirements. 
+	array_1 refers to the larger of the two arrays that will have array_2 slide over it. """
+	if pad_type is None:
+		col_left_pad = col_right_pad = row_up_pad = row_down_pad = pad_size
+	else:
+		if pad_type == 'same':
+			nopad_out_cols = math.ceil(float(shape_array_1[1]) / float(stride))
+			pad_cols_needed = max((nopad_out_cols - 1) * stride + shape_array_2[1] - shape_array_1[1], 0)
+			nopad_out_rows = math.ceil(float(shape_array_1[0]) / float(stride))
+			pad_rows_needed = max((nopad_out_rows - 1) * stride + shape_array_2[0] - shape_array_1[0], 0)
+		elif pad_type == 'valid':
+			# TensoFlow definition of this is "no padding". The input is just processed as-is.
+			pad_rows_needed = pad_cols_needed = 0
+		elif pad_type == 'include':
+			# Here we will implement the padding method to avoid input data being excluded/ missed by the convolution.
+			# - This happens when, (I_dim - F_dim) % stride != 0
+			if (shape_array_1[0] - shape_array_2[0]) % stride != 0:
+				pad_rows_needed = shape_array_2[0] - ((shape_array_1[0] - shape_array_2[0]) % stride)
+			else:
+				pad_rows_needed = 0
+			if (shape_array_1[1] - shape_array_2[1]) % stride != 0:
+				pad_cols_needed = shape_array_2[1] - ((shape_array_1[1] - shape_array_2[1]) % stride)
+			else:
+				pad_cols_needed = 0
+
+		col_left_pad = pad_cols_needed // 2	# // Floor division
+		col_right_pad = math.ceil(pad_cols_needed / 2)
+		row_up_pad = pad_rows_needed // 2	# // Floor division
+		row_down_pad = math.ceil(pad_rows_needed / 2)
+	return col_left_pad, col_right_pad, row_up_pad, row_down_pad

From 8212dce8a4a308a4a9c2c2abfd85a181c7ea29a2 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Wed, 29 Dec 2021 21:04:58 +0000
Subject: [PATCH 10/24] Fix Pool layer bug: output shape calculation

---
 cnn/layers/pool.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py
index 85b9474..1d6347d 100644
--- a/cnn/layers/pool.py
+++ b/cnn/layers/pool.py
@@ -47,9 +47,8 @@ def prepare_layer(self) -> np.ndarray:
 		self.COL_LEFT_PAD, self.COL_RIGHT_PAD, self.ROW_UP_PAD, self.ROW_DOWN_PAD = utils.array.determine_padding(
 			self.PAD_TYPE, self.PADDING, self.INPUT_SHAPE, self.FILT_SHAPE, self.STRIDE
 		)
-
-		col_out = int((self.INPUT_SHAPE[1] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
-		row_out = int((self.INPUT_SHAPE[0] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
+		col_out = int((self.INPUT_SHAPE[2] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
+		row_out = int((self.INPUT_SHAPE[1] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
 
 		self.OUTPUT_SHAPE = (self.INPUT_SHAPE[0],row_out,col_out)
 		if self.PAD_TYPE == 'same':

From 0d965f97e060832bff454f0f0de06d32ea43d6bc Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Wed, 29 Dec 2021 21:05:53 +0000
Subject: [PATCH 11/24] Pool Layer test case 1

---
 tests/test_pool_layer/test_case_1.py | 133 +++++++++++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 tests/test_pool_layer/test_case_1.py

diff --git a/tests/test_pool_layer/test_case_1.py b/tests/test_pool_layer/test_case_1.py
new file mode 100644
index 0000000..8fc4e65
--- /dev/null
+++ b/tests/test_pool_layer/test_case_1.py
@@ -0,0 +1,133 @@
+"""
+TC1:
+- filt shape (3,3)
+- stride 2
+- pool type max
+- input shape (2,5,5)
+"""
+import pytest
+from cnn.layers import Pool
+import numpy as np
+
+@pytest.fixture
+def input_shape():
+	return (2,5,5)
+
+@pytest.fixture
+def pool_layer():
+	layer = Pool(
+		filt_shape=(3,3),
+		stride=2,
+		pool_type='max',
+		input_shape=(2,5,5),
+		vectorised=False,
+		track_history=False
+	)
+	layer.prepare_layer()
+	return layer
+
+@pytest.fixture
+def forwards_input(input_shape):
+	batch_size = 2
+	return np.arange(batch_size*np.prod(input_shape)).reshape((batch_size,*input_shape))
+
+@pytest.fixture
+def forwards_expected_result():
+	return np.array(
+		[
+			[
+				[
+					[12, 14],
+					[22, 24]
+				],
+				[
+					[37, 39],
+					[47, 49]
+				]
+			],
+			[
+				[
+					[62, 64],
+					[72, 74]
+				],
+				[
+					[87, 89],
+					[97, 99]
+				]
+			]
+		]
+	)
+
+@pytest.fixture
+def backwards_input(forwards_expected_result):
+	return np.arange(np.prod(forwards_expected_result.shape)).reshape((forwards_expected_result.shape))
+
+@pytest.fixture
+def backwards_expected_result():
+	return np.array(
+		[
+			[
+				[
+					[0,0,0,0,0],
+					[0,0,0,0,0],
+					[0,0,0,0,1],
+					[0,0,0,0,0],
+					[0,0,2,0,3]
+				],
+				[
+					[0,0,0,0,0],
+					[0,0,0,0,0],
+					[0,0,4,0,5],
+					[0,0,0,0,0],
+					[0,0,6,0,7]
+				]
+			],
+			[
+				[
+					[0,0,0,0,0],
+					[0,0,0,0,0],
+					[0,0,8,0,9],
+					[0,0,0,0,0],
+					[0,0,10,0,11]
+				],
+				[
+					[0,0,0,0,0],
+					[0,0,0,0,0],
+					[0,0,12,0,13],
+					[0,0,0,0,0],
+					[0,0,14,0,15]
+				]
+			]
+		]
+	)
+
+def test_forwards(pool_layer,forwards_input,forwards_expected_result):
+	assert np.array_equal(
+		pool_layer._forwards(forwards_input),
+		forwards_expected_result
+	)
+
+def test_vectorised_forwards(pool_layer,forwards_input,forwards_expected_result):
+	pool_layer.VECTORISED = True
+	assert np.array_equal(
+		pool_layer._forwards(forwards_input),
+		forwards_expected_result
+	)
+
+def test_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result):
+	pool_layer.input = forwards_input
+	pool_layer.padded_input = forwards_input
+	pool_layer.output = forwards_expected_result
+	assert np.array_equal(
+		pool_layer._backwards(backwards_input),
+		backwards_expected_result
+	)
+
+def test_vectorised_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result):
+	pool_layer.VECTORISED = True
+	pool_layer._forwards(forwards_input)
+	pool_layer.output = forwards_expected_result
+	assert np.array_equal(
+		pool_layer._backwards(backwards_input),
+		backwards_expected_result
+	)

From eb3adb6754cb6a6c6fc722c30cd8de3f5d84eac2 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Thu, 30 Dec 2021 17:53:08 +0000
Subject: [PATCH 12/24] Fix Pool layer backprop  bug with 'mean' method

---
 cnn/layers/pool.py | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py
index 1d6347d..87bddef 100644
--- a/cnn/layers/pool.py
+++ b/cnn/layers/pool.py
@@ -120,22 +120,30 @@ def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray:
 		assert cost_gradient.shape == self.output.shape
 		if self.TRACK_HISTORY: self._track_metrics(cost_gradient=cost_gradient)
 		# Initiate to input shape.
-		dC_dIpad = np.zeros_like(self.padded_input)
+		dC_dIpad = np.zeros(self.padded_input.shape,dtype=np.float64)
 
 		batch_size, channels, padded_rows, padded_cols = dC_dIpad.shape
 
 		if self.VECTORISED:
+			# Distribution array represents a boolean array indicating which data points the cost gradient should flow back through. 
 			if self.POOL_TYPE == 'max':
 				distribution_arr = (np.max(self.Xsliced,axis=2,keepdims=True) == self.Xsliced).astype(int)
 			elif self.POOL_TYPE == 'min':
 				distribution_arr = (np.min(self.Xsliced,axis=2,keepdims=True) == self.Xsliced).astype(int)
 			elif self.POOL_TYPE == 'mean':
-				distribution_arr = np.ones_like(self.Xsliced)
+				distribution_arr = np.ones(self.Xsliced.shape)
+
+			# The cost gradient array is 'flattened' so that each column corresponds to the sub array from the forwards propagation
 			cg_flat = cost_gradient.reshape((*self.Xsliced.shape[:2],1,self.Xsliced.shape[-1])) * distribution_arr
+
+			# Here the cost gradient values are combined to form the cost gradient values corresponding to each of the values in 
+			# the padded input.
 			col_index = 0
 			for vstart in range(0,self.padded_input.shape[-2] - self.FILT_SHAPE[0] + 1, self.STRIDE):
 				for hstart in range(0, self.padded_input.shape[-1] - self.FILT_SHAPE[1] + 1, self.STRIDE):
-					dC_dIpad[:,:,vstart:vstart+self.FILT_SHAPE[0],hstart:hstart+self.FILT_SHAPE[1]] += np.transpose(cg_flat[:,:,:,col_index].reshape((*self.padded_input.shape[:2],*self.FILT_SHAPE[::-1])),axes=(0,1,3,2))
+					dC_dIpad[:,:,vstart:vstart+self.FILT_SHAPE[0],hstart:hstart+self.FILT_SHAPE[1]] += np.transpose(
+						cg_flat[:,:,:,col_index].reshape((*self.padded_input.shape[:2],*self.FILT_SHAPE[::-1])),
+						axes=(0,1,3,2))
 					col_index += 1
 		else:
 			# Step over the array similarly to the forwards pass and compute the expanded cost gradients.
@@ -150,24 +158,22 @@ def _backwards(self,cost_gradient: np.ndarray) -> np.ndarray:
 							if self.POOL_TYPE == 'max':
 								# Set value of node that corresponds with the max value node of the input to the cost gradient value at (cost_y,cost_x)
 								max_node_y, max_node_x = np.array( np.unravel_index( np.argmax( sub_arr ), sub_arr.shape ) ) + np.array([curr_y, curr_x])	# addition of curr_y & curr_x is to get position in padded_input array (not just local sub_arr).
-
 								dC_dIpad[i, channel_index, max_node_y, max_node_x] += cost_val
 							elif self.POOL_TYPE == 'min':
 								# Set value of node that corresponds with the min value node of the input to the cost gradient value at (cost_y,cost_x)
 								min_node_y, min_node_x = np.array( np.unravel_index( np.argmin( sub_arr ), sub_arr.shape ) ) + np.array([curr_y, curr_x])	# addition of curr_y & curr_x is to get position in padded_input array (not just local sub_arr).
-
 								dC_dIpad[i, channel_index, min_node_y, min_node_x] += cost_val
 							elif self.POOL_TYPE == 'mean':
-								sub_arr_props = sub_arr / sub_arr.sum()
-
-								dC_dIpad[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x + self.FILT_SHAPE[1] ] += sub_arr_props * cost_val
+								# Set all of the values associated with each sub-array from forwards pass as the corresponding cost gradient value;
+								# summing values where sub-arrays overlap.
+								dC_dIpad[i, channel_index, curr_y : curr_y + self.FILT_SHAPE[0], curr_x : curr_x + self.FILT_SHAPE[1] ] += cost_val
 
 						curr_x += self.STRIDE
 						cost_x += 1
 					curr_y += self.STRIDE
 					cost_y += 1
 
-		# Remove padding that was added to the input array.
+		# Remove padding that was added to the input array to obtain the cost gradient array for the layer input.
 		dC_dI = dC_dIpad[ :, : , self.ROW_UP_PAD : dC_dIpad.shape[-2] - self.ROW_DOWN_PAD , self.COL_LEFT_PAD : dC_dIpad.shape[-1] - self.COL_RIGHT_PAD ]
 		assert dC_dI.shape == self.input.shape, f'dC/dI shape [{dC_dI.shape}] does not match layer input shape [{self.input.shape}].'
 		return dC_dI

From 726d74f45f5fa77e9861d81bd5fbf112cb9ef604 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Thu, 30 Dec 2021 18:03:45 +0000
Subject: [PATCH 13/24] Fix Conv2D layer bug in calculating output shape

---
 cnn/layers/conv.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py
index 2f5e414..e0f48f4 100644
--- a/cnn/layers/conv.py
+++ b/cnn/layers/conv.py
@@ -57,12 +57,12 @@ def prepare_layer(self) -> None:
 		)
 
 		# Need to account for padding.
-		self.COL_LEFT_PAD, self.COL_RIGHT_PAD, self.ROW_UP_PAD, self.ROW_DOWN_PAD = utils.array.determine_padding(
+		self._COL_LEFT_PAD, self._COL_RIGHT_PAD, self._ROW_UP_PAD, self._ROW_DOWN_PAD = utils.array.determine_padding(
 			self.PAD_TYPE, self.PADDING, self.INPUT_SHAPE, self.FILT_SHAPE, self.STRIDE
 		)
 
-		col_out = int((self.INPUT_SHAPE[1] + (self.COL_LEFT_PAD + self.COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
-		row_out = int((self.INPUT_SHAPE[0] + (self.ROW_DOWN_PAD + self.ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
+		col_out = int((self.INPUT_SHAPE[2] + (self._COL_LEFT_PAD + self._COL_RIGHT_PAD) - self.FILT_SHAPE[1]) / self.STRIDE) + 1
+		row_out = int((self.INPUT_SHAPE[1] + (self._ROW_DOWN_PAD + self._ROW_UP_PAD) - self.FILT_SHAPE[0]) / self.STRIDE) + 1
 
 		self.OUTPUT_SHAPE = (self.NUM_FILTERS,row_out,col_out)
 		

From ed32f9e2b6dd3636e6b94b61748fde83473d12df Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Thu, 30 Dec 2021 18:04:53 +0000
Subject: [PATCH 14/24] Add TC2 and 3 for Pool layer

---
 tests/test_pool_layer/test_case_2.py | 133 ++++++++++++++++++++++++++
 tests/test_pool_layer/test_case_3.py | 136 +++++++++++++++++++++++++++
 2 files changed, 269 insertions(+)
 create mode 100644 tests/test_pool_layer/test_case_2.py
 create mode 100644 tests/test_pool_layer/test_case_3.py

diff --git a/tests/test_pool_layer/test_case_2.py b/tests/test_pool_layer/test_case_2.py
new file mode 100644
index 0000000..c5ff640
--- /dev/null
+++ b/tests/test_pool_layer/test_case_2.py
@@ -0,0 +1,133 @@
+"""
+TC1:
+- filt shape (3,3)
+- stride 2
+- pool type min
+- input shape (2,5,5)
+"""
+import pytest
+from cnn.layers import Pool
+import numpy as np
+
+@pytest.fixture
+def input_shape():
+	return (2,5,5)
+
+@pytest.fixture
+def pool_layer():
+	layer = Pool(
+		filt_shape=(3,3),
+		stride=2,
+		pool_type='min',
+		input_shape=(2,5,5),
+		vectorised=False,
+		track_history=False
+	)
+	layer.prepare_layer()
+	return layer
+
+@pytest.fixture
+def forwards_input(input_shape):
+	batch_size = 2
+	return np.arange(batch_size*np.prod(input_shape)).reshape((batch_size,*input_shape))
+
+@pytest.fixture
+def forwards_expected_result():
+	return np.array(
+		[
+			[
+				[
+					[0, 2],
+					[10, 12]
+				],
+				[
+					[25, 27],
+					[35, 37]
+				]
+			],
+			[
+				[
+					[50, 52],
+					[60, 62]
+				],
+				[
+					[75, 77],
+					[85, 87]
+				]
+			]
+		]
+	)
+
+@pytest.fixture
+def backwards_input(forwards_expected_result):
+	return np.arange(np.prod(forwards_expected_result.shape)).reshape((forwards_expected_result.shape))
+
+@pytest.fixture
+def backwards_expected_result():
+	return np.array(
+		[
+			[
+				[
+					[0,0,1,0,0],
+					[0,0,0,0,0],
+					[2,0,3,0,0],
+					[0,0,0,0,0],
+					[0,0,0,0,0]
+				],
+				[
+					[4,0,5,0,0],
+					[0,0,0,0,0],
+					[6,0,7,0,0],
+					[0,0,0,0,0],
+					[0,0,0,0,0]
+				]
+			],
+			[
+				[
+					[8,0,9,0,0],
+					[0,0,0,0,0],
+					[10,0,11,0,0],
+					[0,0,0,0,0],
+					[0,0,0,0,0]
+				],
+				[
+					[12,0,13,0,0],
+					[0,0,0,0,0],
+					[14,0,15,0,0],
+					[0,0,0,0,0],
+					[0,0,0,0,0]
+				]
+			]
+		]
+	)
+
+def test_forwards(pool_layer,forwards_input,forwards_expected_result):
+	assert np.array_equal(
+		pool_layer._forwards(forwards_input),
+		forwards_expected_result
+	)
+
+def test_vectorised_forwards(pool_layer,forwards_input,forwards_expected_result):
+	pool_layer.VECTORISED = True
+	assert np.array_equal(
+		pool_layer._forwards(forwards_input),
+		forwards_expected_result
+	)
+
+def test_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result):
+	pool_layer.input = forwards_input
+	pool_layer.padded_input = forwards_input
+	pool_layer.output = forwards_expected_result
+	assert np.array_equal(
+		pool_layer._backwards(backwards_input),
+		backwards_expected_result
+	)
+
+def test_vectorised_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result):
+	pool_layer.VECTORISED = True
+	pool_layer._forwards(forwards_input)
+	pool_layer.output = forwards_expected_result
+	assert np.array_equal(
+		pool_layer._backwards(backwards_input),
+		backwards_expected_result
+	)
diff --git a/tests/test_pool_layer/test_case_3.py b/tests/test_pool_layer/test_case_3.py
new file mode 100644
index 0000000..ce217d7
--- /dev/null
+++ b/tests/test_pool_layer/test_case_3.py
@@ -0,0 +1,136 @@
+"""
+TC1:
+- filt shape (3,3)
+- stride 2
+- pool type mean
+- input shape (2,5,5)
+"""
+import pytest
+from cnn.layers import Pool
+import numpy as np
+
+@pytest.fixture
+def input_shape():
+	return (2,5,5)
+
+@pytest.fixture
+def pool_layer():
+	layer = Pool(
+		filt_shape=(3,3),
+		stride=2,
+		pool_type='mean',
+		input_shape=(2,5,5),
+		vectorised=False,
+		track_history=False
+	)
+	layer.prepare_layer()
+	return layer
+
+@pytest.fixture
+def forwards_input(input_shape):
+	batch_size = 2
+	return np.arange(batch_size*np.prod(input_shape)).reshape((batch_size,*input_shape))
+
+@pytest.fixture
+def forwards_expected_result():
+	return np.array(
+		[
+			[
+				[
+					[6, 8],
+					[16, 18]
+				],
+				[
+					[31, 33],
+					[41, 43]
+				]
+			],
+			[
+				[
+					[56, 58],
+					[66, 68]
+				],
+				[
+					[81, 83],
+					[91, 93]
+				]
+			]
+		]
+	)
+
+@pytest.fixture
+def backwards_input(forwards_expected_result):
+	return np.arange(np.prod(forwards_expected_result.shape)).reshape((forwards_expected_result.shape))
+
+@pytest.fixture
+def backwards_expected_result():
+	return np.array(
+		[
+			[
+				[
+					[0,0,1,1,1],
+					[0,0,1,1,1],
+					[2,2,6,4,4],
+					[2,2,5,3,3],
+					[2,2,5,3,3]
+				],
+				[
+					[4,4,9,5,5],
+					[4,4,9,5,5],
+					[10,10,22,12,12],
+					[6,6,13,7,7],
+					[6,6,13,7,7]
+				]
+			],
+			[
+				[
+					[8,8,17,9,9],
+					[8,8,17,9,9],
+					[18,18,38,20,20],
+					[10,10,21,11,11],
+					[10,10,21,11,11]
+				],
+				[
+					[12,12,25,13,13],
+					[12,12,25,13,13],
+					[26,26,54,28,28],
+					[14,14,29,15,15],
+					[14,14,29,15,15]
+				]
+			]
+		]
+	)
+
+def test_forwards(pool_layer,forwards_input,forwards_expected_result):
+	assert np.array_equal(
+		pool_layer._forwards(forwards_input),
+		forwards_expected_result
+	)
+
+def test_vectorised_forwards(pool_layer,forwards_input,forwards_expected_result):
+	pool_layer.VECTORISED = True
+	assert np.array_equal(
+		pool_layer._forwards(forwards_input),
+		forwards_expected_result
+	)
+
+def test_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result):
+	pool_layer.input = forwards_input
+	pool_layer.padded_input = forwards_input
+	pool_layer.output = forwards_expected_result
+	print(pool_layer._backwards(backwards_input))
+	assert np.array_equal(
+		pool_layer._backwards(backwards_input),
+		backwards_expected_result
+	)
+
+def test_vectorised_backwards(pool_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result):
+	pool_layer.VECTORISED = True
+	pool_layer._forwards(forwards_input)
+	pool_layer.output = forwards_expected_result
+	print(backwards_expected_result)
+	print(pool_layer._backwards(backwards_input))
+	assert np.array_equal(
+		pool_layer._backwards(backwards_input),
+		backwards_expected_result
+	)

From c3942d5428c2b66ce90205a5ac26a4fdd3cf54a4 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Thu, 30 Dec 2021 18:04:59 +0000
Subject: [PATCH 15/24] Add init files in test modules to resolve filename
 conflicts (global namespace)

---
 tests/test_conv_layer/__init__.py | 0
 tests/test_fc_layer/__init__.py   | 0
 tests/test_pool_layer/__init__.py | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 tests/test_conv_layer/__init__.py
 create mode 100644 tests/test_fc_layer/__init__.py
 create mode 100644 tests/test_pool_layer/__init__.py

diff --git a/tests/test_conv_layer/__init__.py b/tests/test_conv_layer/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_fc_layer/__init__.py b/tests/test_fc_layer/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_pool_layer/__init__.py b/tests/test_pool_layer/__init__.py
new file mode 100644
index 0000000..e69de29

From 32b61fdd805a4824896f2d7d3a57bcafb7da82db Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Mon, 3 Jan 2022 17:25:31 +0000
Subject: [PATCH 16/24] Clean param counting

---
 cnn/layers/activation.py |  2 --
 cnn/layers/flatten.py    |  2 --
 cnn/layers/layer.py      | 37 ++++++++++++++++++-------------------
 cnn/layers/pool.py       |  2 --
 cnn/model.py             | 31 ++-----------------------------
 5 files changed, 20 insertions(+), 54 deletions(-)

diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py
index 9ac1af7..a842cba 100644
--- a/cnn/layers/activation.py
+++ b/cnn/layers/activation.py
@@ -12,8 +12,6 @@ def __init__(self,function: str=None,alpha=0.01,input_shape=None):
 
 		self.FUNCTION = None if function is None else function.lower()
 
-		self.NUM_PARAMS = 0
-
 	def prepare_layer(self):
 		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
 			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
diff --git a/cnn/layers/flatten.py b/cnn/layers/flatten.py
index 6cb20b3..59a33c4 100644
--- a/cnn/layers/flatten.py
+++ b/cnn/layers/flatten.py
@@ -19,8 +19,6 @@ def __init__(self,input_shape=None):
 			assert len(input_shape) == 3, f'ERROR: Expected input_shape to be a tuple of length 3; (channels, height, width).'
 		self.INPUT_SHAPE = input_shape
 
-		self.NUM_PARAMS = 0
-
 	def prepare_layer(self) -> None:
 		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
 			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
diff --git a/cnn/layers/layer.py b/cnn/layers/layer.py
index 81d20dc..bd0898e 100644
--- a/cnn/layers/layer.py
+++ b/cnn/layers/layer.py
@@ -1,5 +1,7 @@
 import numpy as np
 import sys
+from cnn.params import CNNParam
+from typing import Tuple, Union
 
 class Layer:
 	'''
@@ -12,7 +14,6 @@ def __init__(self):
 		self.prev_layer = None
 
 		self.output = None
-		self.params = {}
 
 	def _initiate_history(self):
 		out_init_arr = np.zeros(self.model.EPOCHS * self.model.N)
@@ -75,23 +76,21 @@ def define_details(self):
 		
 		return details
 
-	def count_params(self):
-		'''
-			params = {
-				'param_name': {
-					'trainable':True,
-					'values':[....]	<--- np.ndarray
-				}
-			}
-		'''
+	def count_params(self,split_trainable=True) -> Union(Tuple, int):
+		""" Sums sizes of any parameter attributes of the layer object.
+		'parameter' is defined as any attribute that is of type 'CNNParam'.
+
+		Returns: Tuple(trainable, non trainable) [if split_trainable is True]; total params otherwise.
+		"""
 		trainable = 0
 		non_trainable = 0
-
-
-		for param in self.params:
-			if param.trainable:
-				trainable += param.values.size
-			else:
-				non_trainable += param.values.size
-
-		return trainable, non_trainable
\ No newline at end of file
+		for att in self.__dict__.values():
+			if isinstance(att,CNNParam):
+				if att.trainable:
+					trainable += att.size
+				else:
+					non_trainable += att.size
+		if split_trainable:
+			return trainable, non_trainable
+		else:
+			return trainable + non_trainable
diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py
index 87bddef..d1bbcd1 100644
--- a/cnn/layers/pool.py
+++ b/cnn/layers/pool.py
@@ -33,8 +33,6 @@ def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padd
 		self.VECTORISED = vectorised
 		self.TRACK_HISTORY = track_history
 
-		self.NUM_PARAMS = 0
-
 	def prepare_layer(self) -> np.ndarray:
 		""" This needs to be done after the input has been identified - currently happens when train() is called. """
 		if self.prev_layer == None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
diff --git a/cnn/model.py b/cnn/model.py
index 36dc119..17a19f1 100644
--- a/cnn/model.py
+++ b/cnn/model.py
@@ -99,18 +99,12 @@ def prepare_model(self,optimiser: Any='gd',learning_rate=None):
 
 				curr_layer.MODEL_STRUCTURE_INDEX = index
 
-				# print(f'Preparing Layer:: Type = {curr_layer.LAYER_TYPE} | Structure index = {curr_layer.MODEL_STRUCTURE_INDEX}')
 				curr_layer.prepare_layer()
-				# print('--> Num params:',curr_layer.NUM_PARAMS)
-				# print('--> Expected output shape:',curr_layer.OUTPUT_SHAPE)
 				if curr_layer.MODEL_STRUCTURE_INDEX == 0:
 					# First layer; set model input shape.
 					self.INPUT_SHAPE = curr_layer.INPUT_SHAPE
-				# self.details['param_counts'].append(curr_layer.NUM_PARAMS)
-				# self.details['output_shapes'].append(curr_layer.OUTPUT_SHAPE)
 
 		self.is_prepared = True
-		# print(self.details)
 		self.print_summary()
 		print(f'Model Prepared: {self.is_prepared}')
 
@@ -211,12 +205,8 @@ def _iterate_forwards(self) -> None:
 				ind_upper = self.N
 			self.current_batch_size = ind_upper - ind_lower
 
-			# print('Lower index:',ind_lower,'Upper index:',ind_upper)
-			# print(self.Xs)
-			# print(self.BATCH_COUNT, self.Xs.shape)
 			batch_Xs = self.Xs[ ind_lower : ind_upper ].copy()
 			batch_ys = self.ys[ ind_lower : ind_upper ].copy()
-			# print(batch_Xs.shape,batch_ys.shape)
 
 			predictions = self.predict(batch_Xs,training=True)
 
@@ -225,14 +215,7 @@ def _iterate_forwards(self) -> None:
 
 			batch_correct = np.sum((np.argmax(batch_ys.T,axis=0) == np.argmax(predictions,axis=0)))
 			self.epoch_accuracy = (self.epoch_accuracy * ind_lower + batch_correct) / (ind_upper+1)
-			# for ex_ind , X in enumerate(batch_Xs):	# For each example (observation)
-			# 	print(X.shape)
-			# 	prediction = self.predict(X,training=True)
-
-			# 	self.iteration_cost += self.cost(prediction, batch_ys[ex_ind],batch_size=batch_size)
-			# 	self.iteration_cost_gradient += self.cost(prediction, batch_ys[ex_ind],batch_size=batch_size,derivative=True)
-
-			# print(f'-- Epoch: {self.epoch_ind+1}/{self.EPOCHS } | Batch: {batch_ind+1}/{self.BATCH_COUNT} | Cost: {self.iteration_cost}')
+			
 			self._print_train_progress(batch_ind)
 
 			self._iterate_backwards()
@@ -252,8 +235,6 @@ def predict(self,Xs: np.ndarray,training: bool=False) -> np.ndarray:
 		if training: self.feed_forwards_cycle_index += 1
 		for layer in self.structure:
 			Xs = layer._forwards(Xs)
-			# print('Layer index:',layer.MODEL_STRUCTURE_INDEX)
-			# print('Output:',X)
 		return Xs
 
 	def evaluate(self,Xs: np.ndarray,ys: np.ndarray) -> int:
@@ -290,9 +271,7 @@ def cost(self,predictions: np.ndarray,labels: np.ndarray,derivative: bool=False)
 				return -( 2 * error ) / batch_size	# Vector
 		elif self.COST_FN == 'cross_entropy':
 			if not derivative:
-				# print('logprobs:',np.log(predictions))
 				cost = -np.sum(labels * np.log(predictions)) / batch_size
-				# print('Cost:',cost)
 				return cost
 			else:
 				return - np.divide(labels,predictions) / batch_size
@@ -321,13 +300,7 @@ def print_summary(self):
 			index = str(layer.MODEL_STRUCTURE_INDEX)
 			type_ = layer.LAYER_TYPE
 			out_shape = layer.OUTPUT_SHAPE
-			trainable_params = 0
-			non_trainable_params = 0
-			for _,param in layer.params.items():
-				if param['trainable']:
-					trainable_params += param['values'].size
-				else:
-					non_trainable_params += param['values'].size
+			trainable_params, non_trainable_params = layer.count_params(split_trainable=True)
 			total_trainable += trainable_params
 			total_non_trainable += non_trainable_params
 			info_str = ' ' + index + ' '*(field_lengths[0] - len(index)-1) + \

From 53f3beb4bf1cb93f1178878e313e99f4d9bb02e3 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Mon, 3 Jan 2022 17:51:34 +0000
Subject: [PATCH 17/24] Clean up layer 'trainable' attribute and establish link
 with params

---
 cnn/layers/activation.py |  3 +--
 cnn/layers/conv.py       |  1 -
 cnn/layers/fc.py         |  1 -
 cnn/layers/flatten.py    |  2 +-
 cnn/layers/layer.py      | 40 +++++++++++++++++++++++++++++++++-------
 cnn/layers/pool.py       |  2 +-
 6 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py
index a842cba..08ae202 100644
--- a/cnn/layers/activation.py
+++ b/cnn/layers/activation.py
@@ -6,7 +6,7 @@ def __init__(self,function: str=None,alpha=0.01,input_shape=None):
 		super().__init__()
 
 		self.LAYER_TYPE = self.__class__.__name__ + ' (' + function + ')'
-		self.TRAINABLE = False
+		self.trainable = False
 		self.alpha = alpha
 		self.INPUT_SHAPE = input_shape
 
@@ -18,7 +18,6 @@ def prepare_layer(self):
 		else:
 			self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE
 		self.OUTPUT_SHAPE = self.INPUT_SHAPE
-		# self.output = np.zeros(shape=self.INPUT_SHAPE )
 
 	def _forwards(self,_input: np.ndarray) -> np.ndarray:
 		if self.prev_layer.LAYER_TYPE == 'FC':
diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py
index e0f48f4..031ee19 100644
--- a/cnn/layers/conv.py
+++ b/cnn/layers/conv.py
@@ -20,7 +20,6 @@ def __init__(self,filt_shape: tuple or int,num_filters: int=5,stride: int=1,padd
 		super().__init__()
 
 		self.LAYER_TYPE = self.__class__.__name__
-		self.TRAINABLE = True
 		if type(filt_shape) == tuple:
 			assert len(filt_shape) == 2, 'Expected 2 dimensional tuple in form: (rows,cols)'
 			self.FILT_SHAPE = filt_shape	# 2D tuple describing num rows and cols
diff --git a/cnn/layers/fc.py b/cnn/layers/fc.py
index 2550b97..3aa3d24 100644
--- a/cnn/layers/fc.py
+++ b/cnn/layers/fc.py
@@ -19,7 +19,6 @@ def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_met
 		super().__init__()
 
 		self.LAYER_TYPE = self.__class__.__name__
-		self.TRAINABLE = True
 		self.NUM_NODES = num_nodes
 		self.ACTIVATION = None if activation is None else activation.lower()
 		self.RANDOM_SEED = random_seed
diff --git a/cnn/layers/flatten.py b/cnn/layers/flatten.py
index 59a33c4..fada80b 100644
--- a/cnn/layers/flatten.py
+++ b/cnn/layers/flatten.py
@@ -14,7 +14,7 @@ def __init__(self,input_shape=None):
 		super().__init__()
 
 		self.LAYER_TYPE = self.__class__.__name__
-		self.TRAINABLE = False
+		self.trainable = False
 		if input_shape is not None:
 			assert len(input_shape) == 3, f'ERROR: Expected input_shape to be a tuple of length 3; (channels, height, width).'
 		self.INPUT_SHAPE = input_shape
diff --git a/cnn/layers/layer.py b/cnn/layers/layer.py
index bd0898e..fa3bdfe 100644
--- a/cnn/layers/layer.py
+++ b/cnn/layers/layer.py
@@ -76,7 +76,7 @@ def define_details(self):
 		
 		return details
 
-	def count_params(self,split_trainable=True) -> Union(Tuple, int):
+	def count_params(self,split_trainable=True) -> Union[Tuple, int]:
 		""" Sums sizes of any parameter attributes of the layer object.
 		'parameter' is defined as any attribute that is of type 'CNNParam'.
 
@@ -84,13 +84,39 @@ def count_params(self,split_trainable=True) -> Union(Tuple, int):
 		"""
 		trainable = 0
 		non_trainable = 0
-		for att in self.__dict__.values():
-			if isinstance(att,CNNParam):
-				if att.trainable:
-					trainable += att.size
-				else:
-					non_trainable += att.size
+		for param in self.get_params():
+			if param.trainable:
+				trainable += param.size
+			else:
+				non_trainable += param.size
 		if split_trainable:
 			return trainable, non_trainable
 		else:
 			return trainable + non_trainable
+
+	def get_params(self):
+		params = []
+		for att in self.__dict__.values():
+			if isinstance(att,CNNParam):
+				params.append(att)
+		return params
+
+	@property
+	def trainable(self):
+		try:
+			return self._trainable
+		except AttributeError as e:
+			# Defaults to 'True'
+			self.trainable = True
+			return self._trainable 
+
+	@trainable.setter
+	def trainable(self,value):
+		""" When setting to trainability of the layer, this should link with the param trainability; 
+		i.e. set the value for each param.
+		This relationship is one-directional.
+		"""
+		assert isinstance(value,bool), f"{self}.trainable must be a boolean value."
+		self._trainable = value
+		for param in self.get_params():
+			param.trainable = value
diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py
index d1bbcd1..f722690 100644
--- a/cnn/layers/pool.py
+++ b/cnn/layers/pool.py
@@ -16,7 +16,7 @@ def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padd
 		super().__init__()
 
 		self.LAYER_TYPE = self.__class__.__name__
-		self.TRAINABLE = False
+		self.trainable = False
 		if type(filt_shape) == tuple:
 			assert len(filt_shape) == 2, 'Expected 2 dimensional tuple in form: (rows,cols)'
 			self.FILT_SHAPE = filt_shape	# 2D tuple describing num rows and cols

From ee766d0524b42ea0ff7203d292270470b648d436 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Mon, 3 Jan 2022 18:33:33 +0000
Subject: [PATCH 18/24] Clean up LAYER_TYPE attribute

---
 cnn/layers/activation.py |  1 -
 cnn/layers/conv.py       |  1 -
 cnn/layers/fc.py         |  1 -
 cnn/layers/flatten.py    |  1 -
 cnn/layers/layer.py      |  1 +
 cnn/layers/pool.py       |  1 -
 cnn/model.py             | 14 +++++++-------
 7 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py
index 08ae202..8495643 100644
--- a/cnn/layers/activation.py
+++ b/cnn/layers/activation.py
@@ -5,7 +5,6 @@ class Activation(Layer):
 	def __init__(self,function: str=None,alpha=0.01,input_shape=None):
 		super().__init__()
 
-		self.LAYER_TYPE = self.__class__.__name__ + ' (' + function + ')'
 		self.trainable = False
 		self.alpha = alpha
 		self.INPUT_SHAPE = input_shape
diff --git a/cnn/layers/conv.py b/cnn/layers/conv.py
index 031ee19..d1c8ea7 100644
--- a/cnn/layers/conv.py
+++ b/cnn/layers/conv.py
@@ -19,7 +19,6 @@ def __init__(self,filt_shape: tuple or int,num_filters: int=5,stride: int=1,padd
 		assert num_filters > 0, 'Cannot use less than 1 filter in Conv Layer.'
 		super().__init__()
 
-		self.LAYER_TYPE = self.__class__.__name__
 		if type(filt_shape) == tuple:
 			assert len(filt_shape) == 2, 'Expected 2 dimensional tuple in form: (rows,cols)'
 			self.FILT_SHAPE = filt_shape	# 2D tuple describing num rows and cols
diff --git a/cnn/layers/fc.py b/cnn/layers/fc.py
index 3aa3d24..705052d 100644
--- a/cnn/layers/fc.py
+++ b/cnn/layers/fc.py
@@ -18,7 +18,6 @@ def __init__(self, num_nodes, activation: str=None,random_seed=42,initiation_met
 		"""
 		super().__init__()
 
-		self.LAYER_TYPE = self.__class__.__name__
 		self.NUM_NODES = num_nodes
 		self.ACTIVATION = None if activation is None else activation.lower()
 		self.RANDOM_SEED = random_seed
diff --git a/cnn/layers/flatten.py b/cnn/layers/flatten.py
index fada80b..57273d2 100644
--- a/cnn/layers/flatten.py
+++ b/cnn/layers/flatten.py
@@ -13,7 +13,6 @@ def __init__(self,input_shape=None):
 
 		super().__init__()
 
-		self.LAYER_TYPE = self.__class__.__name__
 		self.trainable = False
 		if input_shape is not None:
 			assert len(input_shape) == 3, f'ERROR: Expected input_shape to be a tuple of length 3; (channels, height, width).'
diff --git a/cnn/layers/layer.py b/cnn/layers/layer.py
index fa3bdfe..47692a0 100644
--- a/cnn/layers/layer.py
+++ b/cnn/layers/layer.py
@@ -8,6 +8,7 @@ class Layer:
 	ABSTRACT LAYER CLASS FOR ALL LAYER TYPES
 	'''
 	def __init__(self):
+		self.LAYER_TYPE = self.__class__.__name__
 		self.model = None
 
 		self.next_layer = None
diff --git a/cnn/layers/pool.py b/cnn/layers/pool.py
index f722690..752e795 100644
--- a/cnn/layers/pool.py
+++ b/cnn/layers/pool.py
@@ -15,7 +15,6 @@ def __init__(self,filt_shape: tuple or int,stride: int,pool_type: str='max',padd
 		'''
 		super().__init__()
 
-		self.LAYER_TYPE = self.__class__.__name__
 		self.trainable = False
 		if type(filt_shape) == tuple:
 			assert len(filt_shape) == 2, 'Expected 2 dimensional tuple in form: (rows,cols)'
diff --git a/cnn/model.py b/cnn/model.py
index 17a19f1..d50efc2 100644
--- a/cnn/model.py
+++ b/cnn/model.py
@@ -34,7 +34,7 @@ def __init__(self,optimiser_method='gd'):
 		self.layer_counts = dict(zip(['total'] + layers.layers,[0]*(len(layers.layers)+1)))	# dict for counting number of each layer type
 
 	def add_layer(self,layer: Layer) -> None:
-		if layer.LAYER_TYPE == 'ACTIVATION' and self.structure[-1].LAYER_TYPE == 'ACTIVATION':
+		if layer.LAYER_TYPE == 'Activation' and self.structure[-1].LAYER_TYPE == 'Activation':
 			print('-- WARNING:: Two Activation Layers in subsequent positions in the model.')
 			if layer.FUNCTION == self.structure[-1].FUNCTION:
 				print('--- INFO:: Both Activation Layers are the same, skipping creation of second layer.')
@@ -43,15 +43,15 @@ def add_layer(self,layer: Layer) -> None:
 		layer.model = self
 
 		if len(self.structure) > 0:
-			if layer.__class__.__name__ == 'FC' and self.structure[-1].__class__.__name__ not in ('Flatten','FC','Activation'):
+			if layer.LAYER_TYPE == 'FC' and self.structure[-1].LAYER_TYPE not in ('Flatten','FC','Activation'):
 				# If no Flatten layer added before adding first FC layer, one will be added automatically.
 				self.add_layer(layers.Flatten())
 
 		self.structure.append(layer)
-		self.layer_counts[layer.__class__.__name__] += 1
+		self.layer_counts[layer.LAYER_TYPE] += 1
 		self.layer_counts['total'] += 1
 
-		if layer.__class__.__name__ == 'FC':
+		if layer.LAYER_TYPE == 'FC':
 			# Create the Activation Layer (transparent to user).
 			self.add_layer(
 				layers.Activation(function=layer.ACTIVATION)
@@ -128,7 +128,7 @@ def train(self,Xs: np.ndarray,ys: np.ndarray,epochs: int,max_batch_size: int=32,
 		ys = ys.reshape(-1,1) if ys.ndim == 1 else ys
 		# --------- ASSERTIONS -----------
 		# Check shapes and orientation are as expected
-		assert self.structure[-1].__class__.__name__ in ('FC','Activation'), 'Model must have either FC or ACTIVATION as final layer.'
+		assert self.structure[-1].LAYER_TYPE in ('FC','Activation'), 'Model must have either FC or Activation as final layer.'
 		assert Xs.shape[0] == ys.shape[0], f'Dimension (0) of input data [{Xs.shape}] and labels [{ys.shape}] does not match.'
 		assert Xs.ndim in (2,4), 'Xs must be either 2 dimensions (for NN) or 4 dimensions (for Model).'
 		if Xs.ndim == 4:
@@ -237,7 +237,7 @@ def predict(self,Xs: np.ndarray,training: bool=False) -> np.ndarray:
 			Xs = layer._forwards(Xs)
 		return Xs
 
-	def evaluate(self,Xs: np.ndarray,ys: np.ndarray) -> int:
+	def evaluate(self,Xs: np.ndarray,ys: np.ndarray) -> float:
 		predictions = self.predict(Xs,training=False)
 		accuracy = np.sum((np.argmax(ys.T,axis=0) == np.argmax(predictions,axis=0))) / len(Xs)
 		return accuracy
@@ -298,7 +298,7 @@ def print_summary(self):
 		total_non_trainable = 0
 		for layer in self.structure:
 			index = str(layer.MODEL_STRUCTURE_INDEX)
-			type_ = layer.LAYER_TYPE
+			type_ = layer.LAYER_TYPE + ' (' + layer.FUNCTION + ')' if layer.LAYER_TYPE == "Activation" else layer.LAYER_TYPE
 			out_shape = layer.OUTPUT_SHAPE
 			trainable_params, non_trainable_params = layer.count_params(split_trainable=True)
 			total_trainable += trainable_params

From 468a8ffeda1fff974b4d48486957b558171f2b7f Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Sat, 21 May 2022 18:37:17 +0100
Subject: [PATCH 19/24] feat: add activations module group including ReLU.

---
 cnn/layers/activations/__init__.py            |  2 +
 cnn/layers/activations/base.py                | 19 ++++++
 cnn/layers/activations/relu.py                | 22 +++++++
 tests/test_activations/__init__.py            |  0
 tests/test_activations/test_relu/__init__.py  |  0
 .../test_activations/test_relu/test_case_1.py | 65 +++++++++++++++++++
 6 files changed, 108 insertions(+)
 create mode 100644 cnn/layers/activations/__init__.py
 create mode 100644 cnn/layers/activations/base.py
 create mode 100644 cnn/layers/activations/relu.py
 create mode 100644 tests/test_activations/__init__.py
 create mode 100644 tests/test_activations/test_relu/__init__.py
 create mode 100644 tests/test_activations/test_relu/test_case_1.py

diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py
new file mode 100644
index 0000000..5c01e85
--- /dev/null
+++ b/cnn/layers/activations/__init__.py
@@ -0,0 +1,2 @@
+from .relu import ReLU
+
diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py
new file mode 100644
index 0000000..f6045f1
--- /dev/null
+++ b/cnn/layers/activations/base.py
@@ -0,0 +1,19 @@
+from ..layer import Layer
+
+class BaseActivation(Layer):
+	def __init__(self,function: str=None,alpha=0.01,input_shape=None):
+		super().__init__()
+
+		self.trainable = False
+		self.alpha = alpha
+		self.INPUT_SHAPE = input_shape
+
+		self.FUNCTION = None if function is None else function.lower()
+
+	def prepare_layer(self):
+		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
+			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
+		else:
+			self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE
+		self.OUTPUT_SHAPE = self.INPUT_SHAPE
+
diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py
new file mode 100644
index 0000000..b52e9e8
--- /dev/null
+++ b/cnn/layers/activations/relu.py
@@ -0,0 +1,22 @@
+import numpy as np
+from .base import BaseActivation
+
+class ReLU(BaseActivation):
+	
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		self.output = np.maximum(self.input,0)
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		# Init dAdZ as square array representing all connections between input and output nodes
+		dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0]))	# TODO: Will need varifying for Conv Activation.
+
+		# Insert layer input along dAdZ diagonals - values > 0 -> 1; values <= 0 -> 0
+		ix,iy = np.diag_indices_from(dAdZ[0,:,:])
+		dAdZ[:,iy,ix] = (self.input.T > 0).astype(int)
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
diff --git a/tests/test_activations/__init__.py b/tests/test_activations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_activations/test_relu/__init__.py b/tests/test_activations/test_relu/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/test_activations/test_relu/test_case_1.py b/tests/test_activations/test_relu/test_case_1.py
new file mode 100644
index 0000000..64583f6
--- /dev/null
+++ b/tests/test_activations/test_relu/test_case_1.py
@@ -0,0 +1,65 @@
+"""
+TC1:
+- input (2,5)
+"""
+import numpy as np
+import pytest
+from cnn.layers.activations import ReLU
+
+@pytest.fixture
+def input_shape():
+	return (2,5)
+
+@pytest.fixture
+def relu_layer(input_shape):
+	layer = ReLU(input_shape=input_shape)
+	class DummyPrevLayer:
+		output = np.zeros(input_shape)
+		OUTPUT_SHAPE = input_shape
+	layer.prev_layer = DummyPrevLayer()
+	layer.prepare_layer()
+	return layer
+
+@pytest.fixture
+def forwards_input(input_shape):
+	arr = np.arange(np.prod(input_shape)).reshape(input_shape).astype(np.float)
+	median = np.median(arr)
+	arr -= median
+	return arr
+
+@pytest.fixture
+def forwards_expected_result():
+	return np.array(
+		[
+			[0, 0, 0, 0, 0],
+			[0.5, 1.5, 2.5, 3.5, 4.5]
+		]
+	)
+
+@pytest.fixture
+def backwards_input(relu_layer):
+	out_shape = relu_layer.OUTPUT_SHAPE
+	return np.arange(np.prod(out_shape)).reshape(out_shape)
+
+@pytest.fixture
+def backwards_expected_result():
+	return np.array(
+		[
+			[0, 0, 0, 0, 0],
+			[5, 6, 7, 8, 9]
+		]
+	)
+
+def test_forwards(relu_layer,forwards_input,forwards_expected_result):
+	assert np.array_equal(
+		relu_layer._forwards(forwards_input),
+		forwards_expected_result
+	)
+
+def test_backwards(relu_layer,backwards_input,forwards_input,forwards_expected_result,backwards_expected_result):
+	relu_layer.input = forwards_input
+	relu_layer.output = forwards_expected_result
+	assert np.array_equal(
+		relu_layer._backwards(backwards_input),
+		backwards_expected_result
+	)

From 17c34fd0597f87bb0dba8cc19551188145881ce4 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Sat, 21 May 2022 18:45:17 +0100
Subject: [PATCH 20/24] feat: add softmax and sigmoid to activations/ --
 untested

---
 cnn/layers/activations/__init__.py |  3 ++-
 cnn/layers/activations/sigmoid.py  | 23 +++++++++++++++++++++++
 cnn/layers/activations/softmax.py  | 26 ++++++++++++++++++++++++++
 3 files changed, 51 insertions(+), 1 deletion(-)
 create mode 100644 cnn/layers/activations/sigmoid.py
 create mode 100644 cnn/layers/activations/softmax.py

diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py
index 5c01e85..292e1e0 100644
--- a/cnn/layers/activations/__init__.py
+++ b/cnn/layers/activations/__init__.py
@@ -1,2 +1,3 @@
 from .relu import ReLU
-
+from .softmax import Softmax
+from .sigmoid import Sigmoid
diff --git a/cnn/layers/activations/sigmoid.py b/cnn/layers/activations/sigmoid.py
new file mode 100644
index 0000000..78d012f
--- /dev/null
+++ b/cnn/layers/activations/sigmoid.py
@@ -0,0 +1,23 @@
+import numpy as np
+from .base import BaseActivation
+
+class Sigmoid(BaseActivation):
+	
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		# The sigmoid function has a smooth gradient and outputs values between zero and one. For very high or low values of the input parameters, the network can be very slow to reach a prediction, called the vanishing gradient problem.
+		self.output = 1 / (1 + np.exp(-X))
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		# Init dAdZ as square array representing all connections between input and output nodes
+		dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0]))	# TODO: Will need varifying for Conv Activation.
+
+		# sig (1 - sig) across diagonals
+		ix,iy = np.diag_indices_from(dAdZ[0,:,:])
+		dAdZ[:,iy,ix] = (self.output * (1 - self.output)).T	# Element-wise multiplication.
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
diff --git a/cnn/layers/activations/softmax.py b/cnn/layers/activations/softmax.py
new file mode 100644
index 0000000..02e68f9
--- /dev/null
+++ b/cnn/layers/activations/softmax.py
@@ -0,0 +1,26 @@
+import numpy as np
+from .base import BaseActivation
+
+class Softmax(BaseActivation):
+	
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		assert self.prev_layer.LAYER_TYPE == 'FC', 'Softmax activation function is not supported for non-FC inputs.'
+		# Softmax is a special activation function used for output neurons. It normalizes outputs for each class between 0 and 1, and returns the probability that the input belongs to a specific class.
+		exp = np.exp(X - np.max(X,axis=0))	# Normalises by max value - provides "numerical stability"
+		self.output = exp / np.sum(exp,axis=0)
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		# Vectorised implementation from https://stackoverflow.com/questions/59286911/vectorized-softmax-gradient
+		# NOTE: Transpose is required to create the square matrices of each set of node values.
+		outputT = self.output.T
+		diag_matrices = outputT.reshape(outputT.shape[0],-1,1) * np.diag(np.ones(outputT.shape[1]))	# Diagonal Matrices
+		outer_product = np.matmul(outputT.reshape(outputT.shape[0],-1,1), outputT.reshape(outputT.shape[0],1,-1))	# Outer product
+		Jsm = diag_matrices - outer_product
+		dAdZ = Jsm	# NOTE: Even though this equation uses softmax transpose at start, the output does not require transposing because the softmax derivative is symmetrical along diagonal.
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T

From 34d1e15f9995e2e546e41993cb9f84b6ec82037d Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Sat, 21 May 2022 18:55:02 +0100
Subject: [PATCH 21/24] feat: add Tanh & LeakReLU to activations/ -- untested

---
 cnn/layers/activations/__init__.py |  3 ++-
 cnn/layers/activations/relu.py     | 21 +++++++++++++++++++++
 cnn/layers/activations/tanh.py     | 18 ++++++++++++++++++
 3 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 cnn/layers/activations/tanh.py

diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py
index 292e1e0..db192a3 100644
--- a/cnn/layers/activations/__init__.py
+++ b/cnn/layers/activations/__init__.py
@@ -1,3 +1,4 @@
-from .relu import ReLU
+from .relu import ReLU, LeakyReLU
 from .softmax import Softmax
 from .sigmoid import Sigmoid
+from .tanh import Tanh
diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py
index b52e9e8..a1c0e89 100644
--- a/cnn/layers/activations/relu.py
+++ b/cnn/layers/activations/relu.py
@@ -20,3 +20,24 @@ def _backwards(self,dCdA:np.ndarray):
 		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
 
 		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
+
+class LeakyReLU(BaseActivation):
+	
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		# The Leaky ReLu function has a small positive slope in its negative area, enabling it to process zero or negative values.
+		self.output = X
+		self.output[self.output <= 0] = self.alpha * self.output[self.output <= 0]
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		# Init dAdZ as square array representing all connections between input and output nodes
+		dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0]))	# TODO: Will need varifying for Conv Activation.
+
+		ix,iy = np.diag_indices_from(dAdZ[0,:,:])
+		dAdZ[:,iy,ix] = ( (self.input > 0).astype(int) + ((self.input < 0).astype(int) * self.alpha ) ).T
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
diff --git a/cnn/layers/activations/tanh.py b/cnn/layers/activations/tanh.py
new file mode 100644
index 0000000..6faf71b
--- /dev/null
+++ b/cnn/layers/activations/tanh.py
@@ -0,0 +1,18 @@
+import numpy as np
+from .base import BaseActivation
+
+class Tanh(BaseActivation):
+	
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		# The TanH function is zero-centered making it easier to model inputs that are strongly negative strongly positive or neutral.
+		self.output = ( np.exp(X) - np.exp(-X) ) / ( np.exp(X) + np.exp(-X) )
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		dAdZ = np.diag((1 - np.square( self.output )).flatten())
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T

From 13ed2e02d9194e8f22da805841191a273f14228b Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Sat, 21 May 2022 19:02:08 +0100
Subject: [PATCH 22/24] feat: add wrapping forwards/ backwards methods for
 BaseActivation

---
 cnn/layers/activations/base.py | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py
index f6045f1..3b9f4e4 100644
--- a/cnn/layers/activations/base.py
+++ b/cnn/layers/activations/base.py
@@ -17,3 +17,27 @@ def prepare_layer(self):
 			self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE
 		self.OUTPUT_SHAPE = self.INPUT_SHAPE
 
+	def forwards(self, X):
+		if self.prev_layer.LAYER_TYPE == 'FC':
+			assert len(X.shape) == 2 and X.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(X.shape[0],1)}'
+		self.input = X
+
+		self._forwards(X)
+
+		assert self.output.shape == X.shape, f'Output shape, {self.output.shape}, not the same as input shape, {X.shape}.'
+		self._track_metrics(output=self.output)
+
+		return self.output
+
+	def backwards(self, dCdA):
+		assert dCdA.shape == self.output.shape, f'dC/dA shape, {dCdA.shape}, not as expected, {self.output.shape}.'
+		self._track_metrics(cost_gradient=dCdA)
+
+		dCdZ = self._backwards(dCdA)
+
+		assert dCdZ.shape == self.prev_layer.output.shape, f'Back propagating dC_dZ has shape: {dCdZ.shape} when previous layer output has shape {self.prev_layer.output.shape}'
+
+		return dCdZ
+
+
+

From 40b94dfe20d61a7433f33370b90b56864997a62b Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Sat, 21 May 2022 19:06:40 +0100
Subject: [PATCH 23/24] fix: cleanup unnecessary init args in BaseActivation

---
 cnn/layers/activations/base.py | 5 +----
 cnn/layers/activations/relu.py | 3 +++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py
index 3b9f4e4..92f9452 100644
--- a/cnn/layers/activations/base.py
+++ b/cnn/layers/activations/base.py
@@ -1,15 +1,12 @@
 from ..layer import Layer
 
 class BaseActivation(Layer):
-	def __init__(self,function: str=None,alpha=0.01,input_shape=None):
+	def __init__(self,input_shape=None):
 		super().__init__()
 
 		self.trainable = False
-		self.alpha = alpha
 		self.INPUT_SHAPE = input_shape
 
-		self.FUNCTION = None if function is None else function.lower()
-
 	def prepare_layer(self):
 		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
 			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py
index a1c0e89..43da5d2 100644
--- a/cnn/layers/activations/relu.py
+++ b/cnn/layers/activations/relu.py
@@ -22,6 +22,9 @@ def _backwards(self,dCdA:np.ndarray):
 		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
 
 class LeakyReLU(BaseActivation):
+	def __init__(self, alpha=0.01, input_shape=None):
+		super().__init__(input_shape=input_shape)
+		self.alpha = alpha
 	
 	def _forwards(self,X:np.ndarray):
 		self.input = X.copy()

From 5e15790121813df2acec317c90eb263b8042a1b3 Mon Sep 17 00:00:00 2001
From: JamesQuirk <james1201019@gmail.com>
Date: Sat, 21 May 2022 20:41:17 +0100
Subject: [PATCH 24/24] feat: cleanup model.py; remove bad assertion; remove
 activation.py

---
 cnn/layers/__init__.py             |   7 +-
 cnn/layers/activation.py           | 120 -----------------------------
 cnn/layers/activations/__init__.py |  14 ++++
 cnn/layers/activations/base.py     |   2 +
 cnn/layers/activations/relu.py     |   3 +
 cnn/layers/activations/sigmoid.py  |   1 +
 cnn/layers/activations/softmax.py  |   1 +
 cnn/layers/activations/tanh.py     |   1 +
 cnn/layers/layer.py                |  10 +--
 cnn/model.py                       |  47 ++++-------
 cnn/optimisers/__init__.py         |  11 +--
 11 files changed, 45 insertions(+), 172 deletions(-)
 delete mode 100644 cnn/layers/activation.py

diff --git a/cnn/layers/__init__.py b/cnn/layers/__init__.py
index c3029be..d918d16 100644
--- a/cnn/layers/__init__.py
+++ b/cnn/layers/__init__.py
@@ -1,11 +1,6 @@
-from .activation import Activation
+from . import activations
 from .conv import Conv2D
 from .fc import FC
 from .flatten import Flatten
 from .pool import Pool
 
-
-# Expose list of all optimiser class names.
-import inspect
-import sys
-layers = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)]
\ No newline at end of file
diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py
deleted file mode 100644
index 8495643..0000000
--- a/cnn/layers/activation.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import numpy as np
-from .layer import Layer
-
-class Activation(Layer):
-	def __init__(self,function: str=None,alpha=0.01,input_shape=None):
-		super().__init__()
-
-		self.trainable = False
-		self.alpha = alpha
-		self.INPUT_SHAPE = input_shape
-
-		self.FUNCTION = None if function is None else function.lower()
-
-	def prepare_layer(self):
-		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
-			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
-		else:
-			self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE
-		self.OUTPUT_SHAPE = self.INPUT_SHAPE
-
-	def _forwards(self,_input: np.ndarray) -> np.ndarray:
-		if self.prev_layer.LAYER_TYPE == 'FC':
-			assert len(_input.shape) == 2 and _input.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(_input.shape[0],1)}'
-		self.input = _input
-		
-		if self.FUNCTION is None:
-			self.output = _input
-		elif self.FUNCTION == 'relu':	# NOTE: This would work for Conv activation.
-			# The ReLu function is highly computationally efficient but is not able to process inputs that approach zero or negative.
-			self.output = np.maximum(_input,0)
-		elif self.FUNCTION == 'softmax':
-			assert self.prev_layer.LAYER_TYPE == 'FC', 'Softmax activation function is not supported for non-FC inputs.'
-			# Softmax is a special activation function used for output neurons. It normalizes outputs for each class between 0 and 1, and returns the probability that the input belongs to a specific class.
-			exp = np.exp(_input - np.max(_input,axis=0))	# Normalises by max value - provides "numerical stability"
-			self.output = exp / np.sum(exp,axis=0)
-			# print(_input)
-			# print(self.output)
-			# assert round(self.output.sum()) == 1, f'Output array sum {self.output.sum()} is not equal to 1.\nInput Array: {self.input.reshape((1,-1))}\nOuput Array: {self.output.reshape((1,-1))}'
-		elif self.FUNCTION == 'sigmoid':	# NOTE: This would work for Conv activation.
-			# The sigmoid function has a smooth gradient and outputs values between zero and one. For very high or low values of the input parameters, the network can be very slow to reach a prediction, called the vanishing gradient problem.
-			self.output = 1 / (1 + np.exp(-_input))
-		elif self.FUNCTION == 'step': # TODO: Define "step function" activation
-			pass
-		elif self.FUNCTION == 'tanh':
-			# The TanH function is zero-centered making it easier to model inputs that are strongly negative strongly positive or neutral.
-			self.output = ( np.exp(_input) - np.exp(-_input) ) / ( np.exp(_input) + np.exp(-_input) )
-		elif self.FUNCTION == 'swish': # TODO: Define "Swish function" activation
-			# Swish is a new activation function discovered by Google researchers. It performs better than ReLu with a similar level of computational efficiency.
-			pass
-		elif self.FUNCTION == 'leaky relu':
-			# The Leaky ReLu function has a small positive slope in its negative area, enabling it to process zero or negative values.
-			_input[_input <= 0] = self.alpha * _input[_input <= 0]
-			self.output = _input
-		elif self.FUNCTION == 'parametric relu': # TODO: Define "Parametric ReLu"
-			#  The Parametric ReLu function allows the negative slope to be learned, performing backpropagation to learn the most effective slope for zero and negative input values.
-			pass
-		
-		assert self.output.shape == _input.shape, f'Output shape, {self.output.shape}, not the same as input shape, {_input.shape}.'
-		self._track_metrics(output=self.output)
-		# print(f'Layer: {self.MODEL_STRUCTURE_INDEX} output:',self.output)
-		return self.output
-
-	def _backwards(self,dC_dA: np.ndarray) -> np.ndarray:
-		"""Compute derivative of Activation w.r.t. Z
-		NOTE: CURRENTLY NOT SUPPORTED FOR CONV/POOL LAYERS.
-		"""
-		assert dC_dA.shape == self.output.shape, f'dC/dA shape, {dC_dA.shape}, not as expected, {self.output.shape}.'
-		self._track_metrics(cost_gradient=dC_dA)
-		dA_dZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0]))	# TODO: Will need varifying for Conv Activation.
-		if self.FUNCTION is None: # a = z
-			dA_dZ = np.broadcast_to(np.diag(np.ones(dA_dZ.shape[-1])),dA_dZ.shape )
-		elif self.FUNCTION == 'relu':
-			# Insert layer input along dA_dZ diagonals - values > 0 -> 1; values <= 0 -> 0
-			ix,iy = np.diag_indices_from(dA_dZ[0,:,:])
-			dA_dZ[:,iy,ix] = (self.input.T > 0).astype(int)
-		elif self.FUNCTION == 'softmax':
-			# Vectorised implementation from https://stackoverflow.com/questions/59286911/vectorized-softmax-gradient
-			# NOTE: Transpose is required to create the square matrices of each set of node values.
-			outputT = self.output.T
-			diag_matrices = outputT.reshape(outputT.shape[0],-1,1) * np.diag(np.ones(outputT.shape[1]))	# Diagonal Matrices
-			outer_product = np.matmul(outputT.reshape(outputT.shape[0],-1,1), outputT.reshape(outputT.shape[0],1,-1))	# Outer product
-			Jsm = diag_matrices - outer_product
-			dA_dZ = Jsm	# NOTE: Even though this equation uses softmax transpose at start, the output does not require transposing because the softmax derivative is symmetrical along diagonal.
-
-		elif self.FUNCTION == 'sigmoid':
-			# sig (1 - sig) across diagonals
-			ix,iy = np.diag_indices_from(dA_dZ[0,:,:])
-			dA_dZ[:,iy,ix] = (self.output * (1 - self.output)).T	# Element-wise multiplication.
-		elif self.FUNCTION == 'step': # TODO: Define "step function" derivative
-			dA_dZ = None
-		elif self.FUNCTION == 'tanh':
-			dA_dZ = np.diag((1 - np.square( self.output )).flatten())
-		elif self.FUNCTION == 'swish': # TODO: Define "Swish function" derivative
-			dA_dZ = None
-		elif self.FUNCTION == 'leaky relu':
-			ix,iy = np.diag_indices_from(dA_dZ[0,:,:])
-			dA_dZ[:,iy,ix] = ( (self.input > 0).astype(int) + ((self.input < 0).astype(int) * self.alpha ) ).T
-
-			# input_diag = np.diag(self.input.flatten())
-			# input_diag[input_diag > 0] = 1
-			# input_diag[input_diag < 0] = self.alpha
-			# dA_dZ = input_diag
-		elif self.FUNCTION == 'parametric relu': # TODO: Define "Parametric ReLu" derivative
-			dA_dZ = None
-		
-		assert dA_dZ is not None, f'No derivative defined for chosen activation function "{self.FUNCTION}"'
-		assert dA_dZ.shape[1:] == (self.output.shape[0],self.output.shape[0]), 'dA/dZ is expected to be a square matrix (for each example in batch) containing gradient between each activation node and each input node.'
-		# print('Layer: ', self.LAYER_TYPE)
-		# print('Local gradient shape:',dA_dZ.shape)
-		# print('Cost gradient shape:',dC_dA.shape)
-
-		dC_dAexpanded = dC_dA.T.reshape((dC_dA.T.shape[0],-1,1))
-		dC_dZexpanded = np.matmul(dA_dZ,dC_dAexpanded)
-		dC_dZ = dC_dZexpanded.reshape(dC_dA.shape[1],-1).T
-		
-		assert dC_dZ.shape == self.prev_layer.output.shape, f'Back propagating dC_dZ has shape: {dC_dZ.shape} when previous layer output has shape {self.prev_layer.output.shape}'
-		if self.FUNCTION is None:
-			assert np.array_equal(dC_dZ,dC_dA), 'For activation: None; dC/dZ is expected to be the same as dC/dA.'
-		
-		return dC_dZ
diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py
index db192a3..efd531a 100644
--- a/cnn/layers/activations/__init__.py
+++ b/cnn/layers/activations/__init__.py
@@ -2,3 +2,17 @@
 from .softmax import Softmax
 from .sigmoid import Sigmoid
 from .tanh import Tanh
+
+# ------------- BELOW IS DYNAMIC TO AVAILABLE ACTIVATION CLASSES ----------------
+
+# Expose list of all activation class names.
+import inspect
+import sys
+available_activations = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)]
+
+__activation_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)]
+
+def from_name(name):
+	for activation in __activation_classes:
+		if activation.ALIAS == name or activation.__name__ == name:
+			return activation()
diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py
index 92f9452..62357be 100644
--- a/cnn/layers/activations/base.py
+++ b/cnn/layers/activations/base.py
@@ -1,6 +1,8 @@
 from ..layer import Layer
 
 class BaseActivation(Layer):
+	ALIAS = "base"
+	
 	def __init__(self,input_shape=None):
 		super().__init__()
 
diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py
index 43da5d2..51eeb79 100644
--- a/cnn/layers/activations/relu.py
+++ b/cnn/layers/activations/relu.py
@@ -2,6 +2,7 @@
 from .base import BaseActivation
 
 class ReLU(BaseActivation):
+	ALIAS = "relu"
 	
 	def _forwards(self,X:np.ndarray):
 		self.input = X.copy()
@@ -22,6 +23,8 @@ def _backwards(self,dCdA:np.ndarray):
 		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
 
 class LeakyReLU(BaseActivation):
+	ALIAS = "leaky_relu"
+
 	def __init__(self, alpha=0.01, input_shape=None):
 		super().__init__(input_shape=input_shape)
 		self.alpha = alpha
diff --git a/cnn/layers/activations/sigmoid.py b/cnn/layers/activations/sigmoid.py
index 78d012f..e2f9d57 100644
--- a/cnn/layers/activations/sigmoid.py
+++ b/cnn/layers/activations/sigmoid.py
@@ -2,6 +2,7 @@
 from .base import BaseActivation
 
 class Sigmoid(BaseActivation):
+	ALIAS = "sigmoid"
 	
 	def _forwards(self,X:np.ndarray):
 		self.input = X.copy()
diff --git a/cnn/layers/activations/softmax.py b/cnn/layers/activations/softmax.py
index 02e68f9..6a0adb6 100644
--- a/cnn/layers/activations/softmax.py
+++ b/cnn/layers/activations/softmax.py
@@ -2,6 +2,7 @@
 from .base import BaseActivation
 
 class Softmax(BaseActivation):
+	ALIAS = "softmax"
 	
 	def _forwards(self,X:np.ndarray):
 		self.input = X.copy()
diff --git a/cnn/layers/activations/tanh.py b/cnn/layers/activations/tanh.py
index 6faf71b..d274903 100644
--- a/cnn/layers/activations/tanh.py
+++ b/cnn/layers/activations/tanh.py
@@ -2,6 +2,7 @@
 from .base import BaseActivation
 
 class Tanh(BaseActivation):
+	ALIAS = "tanh"
 	
 	def _forwards(self,X:np.ndarray):
 		self.input = X.copy()
diff --git a/cnn/layers/layer.py b/cnn/layers/layer.py
index 47692a0..4a9541e 100644
--- a/cnn/layers/layer.py
+++ b/cnn/layers/layer.py
@@ -52,17 +52,17 @@ def define_details(self):
 			'LAYER_INDEX':self.MODEL_STRUCTURE_INDEX,
 			'LAYER_TYPE':self.LAYER_TYPE
 		}
-		if self.LAYER_TYPE is 'CONV':
+		if self.LAYER_TYPE is 'Conv2D':
 			details.update({
 				'NUM_FILTERS':self.NUM_FILTERS,
 				'STRIDE':self.STRIDE
 			})
-		elif self.LAYER_TYPE is 'POOL':
+		elif self.LAYER_TYPE is 'Pool':
 			details.update({
 				'STRIDE':self.STRIDE,
 				'POOL_TYPE':self.POOL_TYPE
 			})
-		elif self.LAYER_TYPE is 'FLATTEN':
+		elif self.LAYER_TYPE is 'Flatten':
 			details.update({
 			})
 		elif self.LAYER_TYPE is 'FC':
@@ -70,10 +70,6 @@ def define_details(self):
 				'NUM_NODES':self.NUM_NODES,
 				'ACTIVATION':self.ACTIVATION
 			})
-		elif self.LAYER_TYPE is 'ACTIVATION':
-			details.update({
-				'FUNCTION':self.FUNCTION
-			})
 		
 		return details
 
diff --git a/cnn/model.py b/cnn/model.py
index d50efc2..d8ad27a 100644
--- a/cnn/model.py
+++ b/cnn/model.py
@@ -20,41 +20,35 @@ class Model():
 	This is the top level class.
 	"""
 
-	def __init__(self,optimiser_method='gd'):
+	def __init__(self):
 		'''
 		- optimiser_method (str): Options: ('gd','momentum','rmsprop','adam'). Default is 'gd'.
 		'''
-		assert optimiser_method.lower() in Model.SUPPORTED_OPTIMISERS, f'You must provide an optimiser that is supported. The options are: {Model.SUPPORTED_OPTIMISERS}'
 
 		self.is_prepared = False
 
-		self.OPTIMISER_METHOD = optimiser_method.lower()
-
 		self.structure = []	# defines order of model (list of layer objects) - EXCLUDES INPUT DATA
-		self.layer_counts = dict(zip(['total'] + layers.layers,[0]*(len(layers.layers)+1)))	# dict for counting number of each layer type
 
 	def add_layer(self,layer: Layer) -> None:
-		if layer.LAYER_TYPE == 'Activation' and self.structure[-1].LAYER_TYPE == 'Activation':
+		if layer.LAYER_TYPE in layers.activations.available_activations and self.structure[-1].LAYER_TYPE in layers.activations.available_activations:
 			print('-- WARNING:: Two Activation Layers in subsequent positions in the model.')
-			if layer.FUNCTION == self.structure[-1].FUNCTION:
+			if layer.LAYER_TYPE == self.structure[-1].LAYER_TYPE:
 				print('--- INFO:: Both Activation Layers are the same, skipping creation of second layer.')
 				return
 
 		layer.model = self
 
 		if len(self.structure) > 0:
-			if layer.LAYER_TYPE == 'FC' and self.structure[-1].LAYER_TYPE not in ('Flatten','FC','Activation'):
+			if layer.LAYER_TYPE == 'FC' and self.structure[-1].LAYER_TYPE not in ('Flatten','FC',*layers.activations.available_activations):
 				# If no Flatten layer added before adding first FC layer, one will be added automatically.
 				self.add_layer(layers.Flatten())
 
 		self.structure.append(layer)
-		self.layer_counts[layer.LAYER_TYPE] += 1
-		self.layer_counts['total'] += 1
 
 		if layer.LAYER_TYPE == 'FC':
 			# Create the Activation Layer (transparent to user).
 			self.add_layer(
-				layers.Activation(function=layer.ACTIVATION)
+				layers.activations.from_name(layer.ACTIVATION)
 			)
 
 	def remove_layer(self,index: int) -> None:
@@ -62,31 +56,24 @@ def remove_layer(self,index: int) -> None:
 		if self.is_prepared:
 			print('-- INFO:: Re-compiling model...')
 			self.prepare_model()
-			
-	def get_model_details(self):
-		details = []
-		for layer in self.structure:
-			details.append(layer.define_details())
-
-		return details
 		
-	def prepare_model(self,optimiser: Any='gd',learning_rate=None):
+	def prepare_model(self,optimiser: Any='gd'):
 		""" Called once final layer is added, each layer can now initiate its weights and biases. """
 		print('Preparing model...')
 
 		if type(optimiser) == str:
-			assert optimiser.lower() in optimisers.optimiser_names, f'Unrecognised optimiser name: {optimiser}; choose from: {optimisers.optimiser_names}'
-			self.OPTIMISER = optimisers.from_name(optimiser,learning_rate)
+			assert optimiser.lower() in optimisers.optimiser_identifiers, f'Unrecognised optimiser name: {optimiser}; choose from: {optimisers.optimiser_identifiers}'
+			self.OPTIMISER = optimisers.from_name(optimiser)
 		else:
-			assert (isinstance(optimiser,optimisers.BaseOptimiser) and optimiser.__class__.__name__ in optimisers.optimiser_names), f'Invalid optimiser: {optimiser}'
+			assert (isinstance(optimiser,optimisers.BaseOptimiser) and optimiser.__class__.__name__ in optimisers.optimiser_identifiers), f'Invalid optimiser: {optimiser}'
 			self.OPTIMISER = optimiser
 
 		self.details = {
 			'param_counts': [],
 			'output_shapes': []
 		}
-		if self.layer_counts['total'] > 1:
-			for index in range(self.layer_counts['total']):
+		if len(self.structure) > 1:
+			for index, curr_layer in enumerate(self.structure):
 				curr_layer = self.structure[index]
 				if index != len(self.structure) - 1:
 					next_layer = self.structure[index + 1]
@@ -100,7 +87,7 @@ def prepare_model(self,optimiser: Any='gd',learning_rate=None):
 				curr_layer.MODEL_STRUCTURE_INDEX = index
 
 				curr_layer.prepare_layer()
-				if curr_layer.MODEL_STRUCTURE_INDEX == 0:
+				if index == 0:
 					# First layer; set model input shape.
 					self.INPUT_SHAPE = curr_layer.INPUT_SHAPE
 
@@ -128,7 +115,6 @@ def train(self,Xs: np.ndarray,ys: np.ndarray,epochs: int,max_batch_size: int=32,
 		ys = ys.reshape(-1,1) if ys.ndim == 1 else ys
 		# --------- ASSERTIONS -----------
 		# Check shapes and orientation are as expected
-		assert self.structure[-1].LAYER_TYPE in ('FC','Activation'), 'Model must have either FC or Activation as final layer.'
 		assert Xs.shape[0] == ys.shape[0], f'Dimension (0) of input data [{Xs.shape}] and labels [{ys.shape}] does not match.'
 		assert Xs.ndim in (2,4), 'Xs must be either 2 dimensions (for NN) or 4 dimensions (for Model).'
 		if Xs.ndim == 4:
@@ -195,8 +181,6 @@ def _print_train_progress(self,batch_index: int) -> None:
 		else:
 			print(print_string,end='\r')
 
-	SUPPORTED_OPTIMISERS = ('gd','momentum','rmsprop','adam')
-
 	def _iterate_forwards(self) -> None:
 		for batch_ind in range(self.BATCH_COUNT):
 			ind_lower = batch_ind * self.MAX_BATCH_SIZE	# Lower bound of index range
@@ -296,14 +280,13 @@ def print_summary(self):
 		# Add layer info...
 		total_trainable = 0
 		total_non_trainable = 0
-		for layer in self.structure:
-			index = str(layer.MODEL_STRUCTURE_INDEX)
-			type_ = layer.LAYER_TYPE + ' (' + layer.FUNCTION + ')' if layer.LAYER_TYPE == "Activation" else layer.LAYER_TYPE
+		for index, layer in enumerate(self.structure):
+			type_ = layer.LAYER_TYPE
 			out_shape = layer.OUTPUT_SHAPE
 			trainable_params, non_trainable_params = layer.count_params(split_trainable=True)
 			total_trainable += trainable_params
 			total_non_trainable += non_trainable_params
-			info_str = ' ' + index + ' '*(field_lengths[0] - len(index)-1) + \
+			info_str = ' ' + str(index) + ' '*(field_lengths[0] - len(str(index))-1) + \
 				' ' + type_ + ' '*(field_lengths[1] - len(type_) -1) + \
 				' ' + str(out_shape) + ' '*(field_lengths[2] - len(str(out_shape))-1) + \
 				' ' + str(trainable_params) + ' '*(field_lengths[3] - len(str(trainable_params))-1) + \
diff --git a/cnn/optimisers/__init__.py b/cnn/optimisers/__init__.py
index ef04035..33ee71a 100644
--- a/cnn/optimisers/__init__.py
+++ b/cnn/optimisers/__init__.py
@@ -10,17 +10,14 @@
 # Expose list of all optimiser class names.
 import inspect
 import sys
-__optimiser_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], lambda cls: isinstance(cls,BaseOptimiser))]
+__optimiser_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], lambda cls: inspect.isclass(cls) and issubclass(cls,BaseOptimiser))]
 
 # Following includes both class name and alias property.
-optimiser_names = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] + [opt.ALIAS for opt in __optimiser_classes]
+optimiser_identifiers = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)] + [opt.ALIAS for opt in __optimiser_classes]
 
-def from_name(name,learning_rate):
+def from_name(name):
 	for optimiser in __optimiser_classes:
 		if optimiser.ALIAS == name or optimiser.__name__ == name:
-			if learning_rate is None:
-				return optimiser()
-			else:
-				return optimiser(learning_rate=learning_rate)
+			return optimiser()