深度学习基础
2022-04-09
参考:https://zh-v2.d2l.ai/chapter_multilayer-perceptrons/mlp-scratch.html
首先给一个可以跑的起来的例子,然后对该例子进行解析:
import os, sys
import torch as tc
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets
# download and load FashionMNIST dataset
def load_data(batch_size=64):
train_dataset = datasets.FashionMNIST(
root='~/.pytorch/F_MNIST_data',
train=True,
transform=transforms.ToTensor(),
download=True
)
test_dataset = datasets.FashionMNIST(
root='~/.pytorch/F_MNIST_data',
train=False,
transform=transforms.ToTensor(),
download=True
)
# get iterator
train_iter = DataLoader(
dataset=train_dataset,
batch_size=batch_size,
shuffle=True,
)
test_iter = DataLoader(
dataset=test_dataset,
batch_size=batch_size,
shuffle=True,
)
return train_iter, test_iter
def relu(X):
return tc.max(input=X, other=tc.tensor(0.0))
def evaluate_accuracy(data_iter, net):
acc_sum, n = 0.0, 0
for X, y in data_iter:
X = X.view(-1, num_inputs)
acc_sum += (net(X).argmax(dim=1) == y).sum().item()
n += y.shape[0]
return acc_sum / n
if __name__ == '__main__':
# create data loader
batch_size = 64
train_loader, test_loader = load_data(batch_size=batch_size)
num_inputs, num_outputs, num_hiddens = 784, 10, 256
# H(Hidden) = ReLU(XW1 + b1)
W1 = nn.Parameter(tc.randn(
num_inputs, num_hiddens, requires_grad=True) * 0.01)
b1 = nn.Parameter(tc.zeros(num_hiddens, requires_grad=True))
# O(Output) = HW2 + b2
W2 = nn.Parameter(tc.randn(
num_hiddens, num_outputs, requires_grad=True) * 0.01)
b2 = nn.Parameter(tc.zeros(num_outputs, requires_grad=True))
params = [W1, b1, W2, b2]
def net(X):
h = X@W1 + b1
h = relu(h)
o = h@W2 + b2
return o
loss = nn.CrossEntropyLoss(reduction='none')
nums_epochs, lr = 20, 0.001
updater = tc.optim.SGD(params, lr=lr)
for epoch in range(nums_epochs):
train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
for X, y in train_loader:
X = X.view(-1, num_inputs)
y_hat = net(X)
l = loss(y_hat, y).sum()
updater.zero_grad()
l.backward()
updater.step()
train_l_sum += l.item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
n += y.shape[0]
test_acc = evaluate_accuracy(test_loader, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
% (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))
最终的效果:
epoch 20, loss 0.2523, train acc 0.909, test acc 0.886
网络结构
参考:https://zh-v2.d2l.ai/chapter_multilayer-perceptrons/mlp.html