%matplotlib inline
import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
print(torch.__version__)
1.11.0+cu113
def dropout(X, drop_prob):
X = X.float()
assert 0 <= drop_prob <= 1
keep_prob = 1 - drop_prob
# 这种情况下把全部元素都丢弃
if keep_prob == 0:
return torch.zeros_like(X)
mask = (torch.rand(X.shape) < keep_prob).float()
return mask * X / keep_prob
X = torch.arange(16).view(2, 8)
dropout(X, 0)
tensor([[ 0., 1., 2., 3., 4., 5., 6., 7.], [ 8., 9., 10., 11., 12., 13., 14., 15.]])
dropout(X, 0.5)
tensor([[ 0., 0., 4., 6., 0., 0., 12., 14.], [ 0., 18., 20., 22., 0., 0., 28., 0.]])
dropout(X, 1.0)
tensor([[0., 0., 0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 0., 0., 0., 0.]])
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256
W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True)
W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True)
W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True)
b3 = torch.zeros(num_outputs, requires_grad=True)
params = [W1, b1, W2, b2, W3, b3]
# def evaluate_accuracy(data_iter, net):
# acc_sum, n = 0.0, 0
# for X, y in data_iter:
# if isinstance(net, torch.nn.Module):
# net.eval() # 评估模式, 这会关闭dropout
# acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
# net.train() # 改回训练模式
# else: # 自定义的模型
# if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# # 将is_training设置成False
# acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
# else:
# acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
# n += y.shape[0]
# return acc_sum / n
num_epochs, lr, batch_size = 5, 100.0, 256 # 这里的学习率设置的很大,原因同3.9.6节。
loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
epoch 1, loss 0.0045, train acc 0.561, test acc 0.662 epoch 2, loss 0.0023, train acc 0.783, test acc 0.786 epoch 3, loss 0.0019, train acc 0.823, test acc 0.773 epoch 4, loss 0.0017, train acc 0.838, test acc 0.847 epoch 5, loss 0.0016, train acc 0.848, test acc 0.809
net = nn.Sequential(
d2l.FlattenLayer(),
nn.Linear(num_inputs, num_hiddens1),
nn.ReLU(),
nn.Dropout(drop_prob1),
nn.Linear(num_hiddens1, num_hiddens2),
nn.ReLU(),
nn.Dropout(drop_prob2),
nn.Linear(num_hiddens2, 10)
)
for param in net.parameters():
nn.init.normal_(param, mean=0, std=0.01)
optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
epoch 1, loss 0.0048, train acc 0.526, test acc 0.743 epoch 2, loss 0.0023, train acc 0.779, test acc 0.764 epoch 3, loss 0.0020, train acc 0.815, test acc 0.819 epoch 4, loss 0.0018, train acc 0.836, test acc 0.814 epoch 5, loss 0.0016, train acc 0.848, test acc 0.842
Comments NOTHING