JamesQuirk · JamesQuirk · Dec 27, 2021 · Dec 27, 2021 · Dec 27, 2021 · Dec 27, 2021
diff --git a/cnn/layers/__init__.py b/cnn/layers/__init__.py
@@ -1,11 +1,6 @@
-from .activation import Activation
+from . import activations
 from .conv import Conv2D
 from .fc import FC
 from .flatten import Flatten
 from .pool import Pool
 
-
-# Expose list of all optimiser class names.
-import inspect
-import sys
-layers = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)]
diff --git a/cnn/layers/activation.py b/cnn/layers/activation.py
diff --git a/cnn/layers/activations/__init__.py b/cnn/layers/activations/__init__.py
@@ -0,0 +1,18 @@
+from .relu import ReLU, LeakyReLU
+from .softmax import Softmax
+from .sigmoid import Sigmoid
+from .tanh import Tanh
+
+# ------------- BELOW IS DYNAMIC TO AVAILABLE ACTIVATION CLASSES ----------------
+
+# Expose list of all activation class names.
+import inspect
+import sys
+available_activations = [c[0] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)]
+
+__activation_classes = [c[1] for c in inspect.getmembers(sys.modules[__name__], inspect.isclass)]
+
+def from_name(name):
+	for activation in __activation_classes:
+		if activation.ALIAS == name or activation.__name__ == name:
+			return activation()
diff --git a/cnn/layers/activations/base.py b/cnn/layers/activations/base.py
@@ -0,0 +1,42 @@
+from ..layer import Layer
+
+class BaseActivation(Layer):
+	ALIAS = "base"
+
+	def __init__(self,input_shape=None):
+		super().__init__()
+
+		self.trainable = False
+		self.INPUT_SHAPE = input_shape
+
+	def prepare_layer(self):
+		if self.prev_layer is None:	# This means this is the first layer in the structure, so 'input' is the only thing before.
+			assert self.INPUT_SHAPE is not None, 'ERROR: Must define input shape for first layer.'
+		else:
+			self.INPUT_SHAPE = self.prev_layer.OUTPUT_SHAPE
+		self.OUTPUT_SHAPE = self.INPUT_SHAPE
+
+	def forwards(self, X):
+		if self.prev_layer.LAYER_TYPE == 'FC':
+			assert len(X.shape) == 2 and X.shape[0] == self.INPUT_SHAPE[0], f'Expected input of shape {self.INPUT_SHAPE} instead got {(X.shape[0],1)}'
+		self.input = X
+
+		self._forwards(X)
+
+		assert self.output.shape == X.shape, f'Output shape, {self.output.shape}, not the same as input shape, {X.shape}.'
+		self._track_metrics(output=self.output)
+
+		return self.output
+
+	def backwards(self, dCdA):
+		assert dCdA.shape == self.output.shape, f'dC/dA shape, {dCdA.shape}, not as expected, {self.output.shape}.'
+		self._track_metrics(cost_gradient=dCdA)
+
+		dCdZ = self._backwards(dCdA)
+
+		assert dCdZ.shape == self.prev_layer.output.shape, f'Back propagating dC_dZ has shape: {dCdZ.shape} when previous layer output has shape {self.prev_layer.output.shape}'
+
+		return dCdZ
+
+
+
diff --git a/cnn/layers/activations/relu.py b/cnn/layers/activations/relu.py
@@ -0,0 +1,49 @@
+import numpy as np
+from .base import BaseActivation
+
+class ReLU(BaseActivation):
+	ALIAS = "relu"
+
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		self.output = np.maximum(self.input,0)
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		# Init dAdZ as square array representing all connections between input and output nodes
+		dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0]))	# TODO: Will need varifying for Conv Activation.
+
+		# Insert layer input along dAdZ diagonals - values > 0 -> 1; values <= 0 -> 0
+		ix,iy = np.diag_indices_from(dAdZ[0,:,:])
+		dAdZ[:,iy,ix] = (self.input.T > 0).astype(int)
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
+
+class LeakyReLU(BaseActivation):
+	ALIAS = "leaky_relu"
+
+	def __init__(self, alpha=0.01, input_shape=None):
+		super().__init__(input_shape=input_shape)
+		self.alpha = alpha
+
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		# The Leaky ReLu function has a small positive slope in its negative area, enabling it to process zero or negative values.
+		self.output = X
+		self.output[self.output <= 0] = self.alpha * self.output[self.output <= 0]
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		# Init dAdZ as square array representing all connections between input and output nodes
+		dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0]))	# TODO: Will need varifying for Conv Activation.
+
+		ix,iy = np.diag_indices_from(dAdZ[0,:,:])
+		dAdZ[:,iy,ix] = ( (self.input > 0).astype(int) + ((self.input < 0).astype(int) * self.alpha ) ).T
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
diff --git a/cnn/layers/activations/sigmoid.py b/cnn/layers/activations/sigmoid.py
@@ -0,0 +1,24 @@
+import numpy as np
+from .base import BaseActivation
+
+class Sigmoid(BaseActivation):
+	ALIAS = "sigmoid"
+
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		# The sigmoid function has a smooth gradient and outputs values between zero and one. For very high or low values of the input parameters, the network can be very slow to reach a prediction, called the vanishing gradient problem.
+		self.output = 1 / (1 + np.exp(-X))
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		# Init dAdZ as square array representing all connections between input and output nodes
+		dAdZ = np.zeros(shape=(self.output.shape[1],self.output.shape[0],self.prev_layer.output.shape[0]))	# TODO: Will need varifying for Conv Activation.
+
+		# sig (1 - sig) across diagonals
+		ix,iy = np.diag_indices_from(dAdZ[0,:,:])
+		dAdZ[:,iy,ix] = (self.output * (1 - self.output)).T	# Element-wise multiplication.
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
diff --git a/cnn/layers/activations/softmax.py b/cnn/layers/activations/softmax.py
@@ -0,0 +1,27 @@
+import numpy as np
+from .base import BaseActivation
+
+class Softmax(BaseActivation):
+	ALIAS = "softmax"
+
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		assert self.prev_layer.LAYER_TYPE == 'FC', 'Softmax activation function is not supported for non-FC inputs.'
+		# Softmax is a special activation function used for output neurons. It normalizes outputs for each class between 0 and 1, and returns the probability that the input belongs to a specific class.
+		exp = np.exp(X - np.max(X,axis=0))	# Normalises by max value - provides "numerical stability"
+		self.output = exp / np.sum(exp,axis=0)
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		# Vectorised implementation from https://stackoverflow.com/questions/59286911/vectorized-softmax-gradient
+		# NOTE: Transpose is required to create the square matrices of each set of node values.
+		outputT = self.output.T
+		diag_matrices = outputT.reshape(outputT.shape[0],-1,1) * np.diag(np.ones(outputT.shape[1]))	# Diagonal Matrices
+		outer_product = np.matmul(outputT.reshape(outputT.shape[0],-1,1), outputT.reshape(outputT.shape[0],1,-1))	# Outer product
+		Jsm = diag_matrices - outer_product
+		dAdZ = Jsm	# NOTE: Even though this equation uses softmax transpose at start, the output does not require transposing because the softmax derivative is symmetrical along diagonal.
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T
diff --git a/cnn/layers/activations/tanh.py b/cnn/layers/activations/tanh.py
@@ -0,0 +1,19 @@
+import numpy as np
+from .base import BaseActivation
+
+class Tanh(BaseActivation):
+	ALIAS = "tanh"
+
+	def _forwards(self,X:np.ndarray):
+		self.input = X.copy()
+		# The TanH function is zero-centered making it easier to model inputs that are strongly negative strongly positive or neutral.
+		self.output = ( np.exp(X) - np.exp(-X) ) / ( np.exp(X) + np.exp(-X) )
+		return self.output
+
+	def _backwards(self,dCdA:np.ndarray):
+		dAdZ = np.diag((1 - np.square( self.output )).flatten())
+
+		dC_dAexpanded = dCdA.T.reshape((dCdA.T.shape[0],-1,1))
+		dC_dZexpanded = np.matmul(dAdZ,dC_dAexpanded)
+
+		return dC_dZexpanded.reshape(dCdA.shape[1],-1).T