16 Supervised Learning

import numpy as np
import matplotlib.pyplot as plt

# NOTE: Example 1

# Here, we build a fully interconnected 2-layer artificial neural network. We
# suppose that layer 1 has random activation but is constant over trials. We
# let our goal be for layer 2 to have its first half elements at 1 and its
# second half elements at zero.

n = 10
x1 = np.random.uniform(0, 1, n)
x2 = np.zeros(n)
w12 = np.random.uniform(0, 1, (x1.shape[0], x2.shape[0]))
delta_w12 = np.zeros(w12.shape)

x2_desired = np.zeros(n)
x2_desired[:n // 2] = 1

alpha = 0.1

n_trials = 100

for i in range(n_trials):

    # compute activation in x2 units
    for j in range(x2.shape[0]):
        for k in range(x1.shape[0]):
            y = w12[k, j] * x1[k]
            x2[j] += y
            delta_w12[k, j] = y - x2_desired[j]
        x2[j] = 1 / (1 + np.exp(-(x2[j] - 0.5) * 20))

    # update weights against the error gradient
    for j in range(x2.shape[0]):
        for k in range(x1.shape[0]):
            w12[k, j] += -alpha * delta_w12[k, j]

    # plt.plot(x2, label='actual')
    # plt.plot(x2_desired, label='desired')
    # plt.legend()
    # plt.show()

    x2[:] = 0

# NOTE: Example 2

# Here, we build a fully interconnected 3-layer artificial neural network. We
# suppose that layer 1 has random activation but is constant over trials. We
# let our goal be for layer 3 to have its first half elements at 1 and its
# second half elements at zero. Layer 2 can be anything it needs to be meet the
# goal of layer 3.

n = 10
x1 = np.random.uniform(0, 1, n)
x2 = np.zeros(n)
x3 = np.zeros(n)
w12 = np.random.uniform(0, 1, (x1.shape[0], x2.shape[0]))
w23 = np.random.uniform(0, 1, (x2.shape[0], x3.shape[0]))
delta_w12 = np.zeros(w12.shape)
delta_w23 = np.zeros(w12.shape)

x3_desired = np.zeros(n)
x3_desired[:n // 2] = 1

alpha = 0.2

n_trials = 500

for i in range(n_trials):

    # compute activation in x2 units
    for j in range(x2.shape[0]):
        for k in range(x1.shape[0]):
            x2[j] += w12[k, j] * x1[k]
        x2[j] = 1 / (1 + np.exp(-(x2[j] - 0.5) * 20))

    # compute activation in x3 units
    for j in range(x3.shape[0]):
        for k in range(x2.shape[0]):
            x3[j] += w23[k, j] * x2[k]
        x3[j] = 1 / (1 + np.exp(-(x3[j] - 0.5) * 20))

    # compute error gradients
    for j in range(x3.shape[0]):
        for k in range(x2.shape[0]):
            for l in range(x1.shape[0]):
                y = w23[k, j] * w12[l, k] * x1[l]
                delta = y - x3_desired[j]

                delta_w12[l, k] = w23[k, j] * x1[l] * delta
                delta_w23[k, j] = w12[l, k] * x1[l] * delta

    # update weights against the error gradient
    for j in range(x3.shape[0]):
        for k in range(x2.shape[0]):
            w23[k, j] += -alpha * delta_w23[k, j]

    for k in range(x2.shape[0]):
        for l in range(x1.shape[0]):
            w12[l, k] += -alpha * delta_w12[l, k]

    # plt.plot(x1, label='x1')
    # plt.plot(x2, label='x2')
    # plt.plot(x3, label='x3')
    # plt.plot(x3_desired, label='x3 desired')
    # plt.legend()
    # plt.show()

    x2[:] = 0
    x3[:] = 0