# Monthly Archives: February 2021

## Backpropagation Method

The backpropagation method is an extension of the perceptron method for acyclic artificial neural networks. Acyclic artificial neural networks are defined in terms of the following:

functions f1, f2, f3, …, fN

weight matrices W1, W2, W3, …, WN

bias vectors b1, b2, b3, …, bN

such that the result for an input vector i involves:

o0 = i

oj  = (fj (aj1), fj (aj2), fj (aj3), …, fj (ajN)) for j = 1, 2, 3, …, N

aj   = Wj oj -1 + bj for j = 1, 2, 3, …, N

where oN is the result.

In the backpropagation method, each weight matrix and bias vector is updated for each input output vector pair (i, o) by subtracting a small fraction of the corresponding partial derivative of the error function Eo = (o – oN)2 / 2. The small fraction is referred to as the learning rate. Here is a derivation of the formulas to calculate these partial derivatives:

Here is sample Python backpropagation method code:

```#!/usr/bin/env python3

"""
Implements the backpropagation method.

Usage:
./backprop <data file>                        \
<data split>                       \
<number of hidden layers>          \
<number of hidden layer functions> \
<number of categories>             \
<learning rate>                    \
<number of epochs>

Data files must be space delimited with one input output pair per line.

Every hidden layer has the same number of functions.

The hidden layer functions are rectified linear unit functions.The outer layer functions are identity functions.

initialization steps:
The input output pairs are shuffled and the inputs mix max normalized.
The weights and biases are set to random values.

Requires NumPy.
"""

import numpy
import sys

def minmax(data):
"""
Finds the min max normalizations of data.
"""

return (data - numpy.min(data)) / (numpy.max(data) - numpy.min(data))

def init_data(data_file, data_split, n_cat):
"""
Creates the training and testing data.
"""

numpy.random.shuffle(data)
data[:, :-1] = minmax(data[:, :-1])
outputs      = numpy.identity(n_cat)[data[:, -1].astype("int")]
data         = numpy.hstack((data[:, :-1], outputs))
data_split   = int((data_split / 100) * data.shape[0])

return data[:data_split, :], data[data_split:, :]

def accuracy(data, weights, biases, n_cat):
"""
Calculates the accuracies of models.
"""

results = model(data[:, :-n_cat], weights, biases)

return 100 * (results == answers).astype(int).mean()

def model_(inputs, weights, biases, relu = True):
"""
model helper function
"""

outputs = numpy.matmul(weights, inputs.T).T + biases
if relu:
outputs = numpy.maximum(outputs, 0)

return outputs

def model(inputs, weights, biases):
"""
Finds the model outputs.
"""

output = model_(inputs, weights[0], biases[0])
for e in zip(weights[1:-1], biases[1:-1]):
output = model_(output, e[0], e[1])
output = model_(output, weights[-1], biases[-1], False)
output = numpy.argmax(output, 1)

return output

def adjust(weights, biases, input_, output, func_inps, func_outs, learn_rate):
"""
"""

d_e_f_i = [func_outs[-1] - output]
d_e_w   = [numpy.outer(d_e_f_i[-1], func_outs[-2])]
for i in reversed(range(len(weights) - 1)):
func_deriv = numpy.clip(numpy.sign(func_inps[i]), 0, 1)
vector     = numpy.matmul(weights[i + 1].T, d_e_f_i[-1])
func_out   = func_outs[i - 1] if i else input_
d_e_f_i.append(numpy.multiply(vector, func_deriv))
d_e_w.append(numpy.outer(d_e_f_i[-1], func_out))
for i, e in enumerate(reversed(list(zip(d_e_w, d_e_f_i)))):
weights[i] -= learn_rate * e[0]
biases[i]  -= learn_rate * e[1]

def learn(train_data, n_hls, n_hl_funcs, n_cat, learn_rate, n_epochs):
"""
Learns the weights and biases from the training data.
"""

weights = [numpy.random.randn(n_hl_funcs, train_data.shape[1] - n_cat)]
for i in range(n_hls - 1):
weights.append(numpy.random.randn(n_hl_funcs, n_hl_funcs))
weights.append(numpy.random.randn(n_cat, n_hl_funcs))
weights = [e / numpy.sqrt(e.shape[0]) for e in weights]
biases  = [numpy.random.randn(n_hl_funcs) for i in range(n_hls)]
biases.append(numpy.random.randn(n_cat))
biases  = [e / numpy.sqrt(e.shape[0]) for e in biases]
for i in range(n_epochs):
for e in train_data:
input_    = e[:-n_cat]
func_inps = []
func_outs = []
for l in range(n_hls + 1):
input__   = func_outs[l - 1] if l else input_
func_inp  = numpy.matmul(weights[l], input__)
func_inp += biases[l]
relu      = numpy.maximum(func_inp, 0)
func_out  = relu if l != n_hls else func_inp
func_inps.append(func_inp)
func_outs.append(func_out)
biases,
e[:-n_cat],
e[-n_cat:],
func_inps,
func_outs,
learn_rate)

return weights, biases

n_cat                 = int(sys.argv[5])
train_data, test_data = init_data(sys.argv[1], float(sys.argv[2]), n_cat)
weights, biases       = learn(train_data,
int(sys.argv[3]),
int(sys.argv[4]),
n_cat,
float(sys.argv[6]),
int(sys.argv[7]))
print(f"weights and biases:     {weights}, {biases}")
accuracy_             = accuracy(train_data, weights, biases, n_cat)
print(f"training data accuracy: {accuracy_:.2f}%")
accuracy_             = accuracy(test_data,  weights, biases, n_cat)
print(f"testing  data accuracy: {accuracy_:.2f}%")
```

Here are sample results for the MNIST_dataset (Modified National Institute Of Standards And Technology dataset) available from many sources such as Kaggle:

```% ./backprop MNIST_dataset.csv 80 2 64 10 0.001 100
weights and biases:     [array([[ 0.10866304,  0.0041912 , -0.23560872, ...,  0.03364987,
-0.19519161, -0.00068468],
[ 0.12745399,  0.12268858, -0.13698254, ...,  0.19508343,
0.20920324,  0.1970561 ],

...

-0.24605896,  0.02329749, -0.16363297, -0.24085487, -0.14819292,
-0.19237153, -0.21772553, -0.19817858,  0.50966376,  0.14384857,
0.10621777,  0.64537735,  0.77337279,  0.01737619]), array([0.03938714, 0.0574965 , 0.16544762, 0.13164358, 0.04927753,
0.12365563, 0.0401857 , 0.18105514, 0.10016533, 0.11111991])]
training data accuracy: 97.96%
testing  data accuracy: 96.59%
```

Here is a plot of the accuracy versus the number of epochs for a data split of 80 / 20, two hidden layers, 64 functions per hidden layer, 10 categories, and, a learning rate of 0.001. Blue denotes the training data accuracy and orange denotes the testing data accuracy:

## Private Blockchains

Dr. Christian Seberino of RestonLogic explains what private blockchains are. He shows the advantages and a disadvantage private blockchains have compared to public blockchains. A use case and possible alternate terminology are also mentioned.