torch之dropout

酥酥 发布于 2022-04-15 943 次阅读


				
					%matplotlib inline
import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l

print(torch.__version__)
				
			
1.11.0+cu113
				
					def dropout(X, drop_prob):
     X = X.float()
     assert 0 <= drop_prob <= 1
     keep_prob = 1 - drop_prob
     # 这种情况下把全部元素都丢弃
     if keep_prob == 0:
         return torch.zeros_like(X)
     mask = (torch.rand(X.shape) < keep_prob).float()
    
     return mask * X / keep_prob
				
			
				
					X = torch.arange(16).view(2, 8)
dropout(X, 0)
				
			
tensor([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11., 12., 13., 14., 15.]])
				
					dropout(X, 0.5)
				
			
tensor([[ 0.,  0.,  4.,  6.,  0.,  0., 12., 14.],
        [ 0., 18., 20., 22.,  0.,  0., 28.,  0.]])
				
					dropout(X, 1.0)
				
			
tensor([[0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0.]])
				
					num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256

W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True)
b1 = torch.zeros(num_hiddens1, requires_grad=True)
W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True)
b2 = torch.zeros(num_hiddens2, requires_grad=True)
W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True)
b3 = torch.zeros(num_outputs, requires_grad=True)

params = [W1, b1, W2, b2, W3, b3]
				
			
				
					# def evaluate_accuracy(data_iter, net):
#     acc_sum, n = 0.0, 0
#     for X, y in data_iter:
#         if isinstance(net, torch.nn.Module):
#             net.eval() # 评估模式, 这会关闭dropout
#             acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
#             net.train() # 改回训练模式
#         else: # 自定义的模型
#             if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
#                 # 将is_training设置成False
#                 acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
#             else:
#                 acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
#         n += y.shape[0]
#     return acc_sum / n
				
			
				
					num_epochs, lr, batch_size = 5, 100.0, 256 # 这里的学习率设置的很大,原因同3.9.6节。
loss = torch.nn.CrossEntropyLoss()
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
				
			
epoch 1, loss 0.0045, train acc 0.561, test acc 0.662
epoch 2, loss 0.0023, train acc 0.783, test acc 0.786
epoch 3, loss 0.0019, train acc 0.823, test acc 0.773
epoch 4, loss 0.0017, train acc 0.838, test acc 0.847
epoch 5, loss 0.0016, train acc 0.848, test acc 0.809
				
					net = nn.Sequential(
        d2l.FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens1),
        nn.ReLU(),
        nn.Dropout(drop_prob1),
        nn.Linear(num_hiddens1, num_hiddens2), 
        nn.ReLU(),
        nn.Dropout(drop_prob2),
        nn.Linear(num_hiddens2, 10)
        )

for param in net.parameters():
    nn.init.normal_(param, mean=0, std=0.01)
				
			
				
					optimizer = torch.optim.SGD(net.parameters(), lr=0.5)
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)
				
			
epoch 1, loss 0.0048, train acc 0.526, test acc 0.743
epoch 2, loss 0.0023, train acc 0.779, test acc 0.764
epoch 3, loss 0.0020, train acc 0.815, test acc 0.819
epoch 4, loss 0.0018, train acc 0.836, test acc 0.814
epoch 5, loss 0.0016, train acc 0.848, test acc 0.842