如何用 NumPy 实现人工神经网络

传统的人工神经网络（Neural Network）是一个三层模型。这里借助矩阵运算实现一个最最传统的神经网络模型。这里作为演示。

说明

多分类模型，最终的输出为标签的编号。
每行为一个输入，不同于传统写法是每列，所以这里矩阵采用右乘。
用 numpy 进行矩阵运算。
使用 scipy.optimize.minimize 进行优化。
为了显示出效果，这里测试集划分得比较大。
损失函数用交叉熵还是均方差都是可以的。
可以打印学习过程中损失函数的变化。
这里使用准确率作为评价指标。

import numpy as np
from scipy.optimize import minimize
from scipy.special import expit
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

class NeuralNetwork:
    def __init__(self, input_layer_size, hidden_layer_size, num_labels,):
        self.input_layer_size = input_layer_size
        self.hidden_layer_size = hidden_layer_size
        self.num_labels = num_labels
        self.theta1 = np.random.randn(input_layer_size + 1, hidden_layer_size) * 0.1
        self.theta2 = np.random.randn(hidden_layer_size + 1, num_labels) * 0.1

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
        

    def forward(self, X):
        m = X.shape[0]
        X = np.hstack((np.ones((m, 1)), X))
        z2 = np.dot(X, self.theta1)
        a2 = self.sigmoid(z2)
        a2 = np.hstack((np.ones((m, 1)), a2))
        z3 = np.dot(a2, self.theta2)
        h = self.sigmoid(z3)
        return h

    def cost_function(self, nn_params, X, y):
        self.theta1 = nn_params[:self.hidden_layer_size * (self.input_layer_size + 1)].reshape(
            self.hidden_layer_size, self.input_layer_size + 1)
        self.theta2 = nn_params[self.hidden_layer_size * (self.input_layer_size + 1):].reshape(
            self.num_labels, self.hidden_layer_size + 1)
        m = X.shape[0]
        h = self.forward(X)
        y_one_hot = np.eye(self.num_labels)[y]
        J = (-1 / m) * np.sum(y_one_hot * np.log(h) + (1 - y_one_hot) * np.log(1 - h))
        # print(J)
        return J

    def train(self, X, y):
        initial_nn_params = np.concatenate((self.theta1.flatten(), self.theta2.flatten()))
        options = {'maxiter': 100}
        result = minimize(self.cost_function, initial_nn_params, args=(X, y), method='CG', options=options)
        nn_params = result.x
        self.theta1 = nn_params[:self.hidden_layer_size * (self.input_layer_size + 1)].reshape(
            self.input_layer_size + 1, self.hidden_layer_size)
        self.theta2 = nn_params[self.hidden_layer_size * (self.input_layer_size + 1):].reshape(
             self.hidden_layer_size + 1, self.num_labels)

    def predict(self, X):
        predictions = self.forward(X)
        return np.argmax(predictions, axis=1)


# 加载 Iris 数据集
iris = load_iris()
X = iris.data
y = iris.target

# 数据标准化（可选，预处理，好像这里没起作用）
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

# 神经网络参数设置
input_layer_size = 4
hidden_layer_size = 5
num_labels = 3

# 创建神经网络实例
nn = NeuralNetwork(input_layer_size, hidden_layer_size, num_labels)

# 训练神经网络
nn.train(X_train, y_train)

# 在测试集上进行预测
predicted_labels = nn.predict(X_test)

# 计算准确率
accuracy = np.mean(predicted_labels == y_test)
print(f"测试集准确率: {accuracy * 100:.2f}%")
    

附录

用 pytorch 重写以上代码

说明

对应以上代码，把 numpy + scipy 转换为 pytorch。
深度学习中可以利用更复杂的网络结构和更多的训练数据。
也可以用 pytorch-lighting 这样的库来简化训练代码。
在 pytorch 中不需要显式地写出输出层的 softmax，该层包含在优化器中了。
可以尝试随机种子和固定输入样本后手工进行参数的调整。

import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 加载 Iris 数据集
iris = load_iris()
X = iris.data
y = iris.target

数据标准化
scaler = StandardScaler()
X = scaler.fit_transform(X)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

# 将数据转换为 PyTorch 张量
X_train = torch.FloatTensor(X_train)
y_train = torch.LongTensor(y_train)
X_test = torch.FloatTensor(X_test)
y_test = torch.LongTensor(y_test)


# 定义神经网络模型
class IrisNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(IrisNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


# 初始化模型、损失函数和优化器
input_size = 4
hidden_size = 5
output_size = 3
model = IrisNet(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# 训练模型
num_epochs = 100
for epoch in range(num_epochs):
    # 前向传播
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # 反向传播和优化
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

# 在测试集上进行评估
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_test).sum().item() / y_test.size(0)
    print(f'测试集准确率: {accuracy * 100:.2f}%')
    

结果的可视化

通过对模型的可视化，可以一种直观的评价的方式，可以感受结果的好坏，并且补充了一些细节层面包含的影响。

对于分类模型，可以使用一下图表类型

混淆矩阵，不同样本上的指标
精准召回曲线，在不同阈值下的对应。
散点聚类图