残差网络(ResNet)
import time
import torch
from torch import nn, optim
import torch.nn.functional as F
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.__version__)
print(device)
1.11.0+cu113 cuda
残差块
class Residual(nn.Module): # 本类已保存在d2lzh_pytorch包中方便以后使用
def __init__(self, in_channels, out_channels, use_1x1conv=False, stride=1):
super(Residual, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
if use_1x1conv:
self.conv3 = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride)
else:
self.conv3 = None
self.bn1 = nn.BatchNorm2d(out_channels)
self.bn2 = nn.BatchNorm2d(out_channels)
def forward(self, X):
Y = F.relu(self.bn1(self.conv1(X)))
Y = self.bn2(self.conv2(Y))
if self.conv3:
X = self.conv3(X)
return F.relu(Y + X)
blk = Residual(3, 3)
X = torch.rand((4, 3, 6, 6))
blk(X).shape
blk = Residual(3, 6, use_1x1conv=True, stride=2)
blk(X).shape
torch.Size([4, 3, 6, 6])
ResNet模型
net = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(),
nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
def resnet_block(in_channels, out_channels, num_residuals, first_block=False):
if first_block:
assert in_channels == out_channels # 第一个模块的通道数同输入通道数一致
blk = []
for i in range(num_residuals):
if i == 0 and not first_block:
blk.append(Residual(in_channels, out_channels, use_1x1conv=True, stride=2))
else:
blk.append(Residual(out_channels, out_channels))
return nn.Sequential(*blk)
net.add_module("resnet_block1", resnet_block(64, 64, 2, first_block=True))
net.add_module("resnet_block2", resnet_block(64, 128, 2))
net.add_module("resnet_block3", resnet_block(128, 256, 2))
net.add_module("resnet_block4", resnet_block(256, 512, 2))
net.add_module("global_avg_pool", d2l.GlobalAvgPool2d()) # GlobalAvgPool2d的输出: (Batch, 512, 1, 1)
net.add_module("fc", nn.Sequential(d2l.FlattenLayer(), nn.Linear(512, 10)))
X = torch.rand((1, 1, 224, 224))
for name, layer in net.named_children():
X = layer(X)
print(name, ' output shape:\t', X.shape)
0 output shape: torch.Size([1, 64, 112, 112]) 1 output shape: torch.Size([1, 64, 112, 112]) 2 output shape: torch.Size([1, 64, 112, 112]) 3 output shape: torch.Size([1, 64, 56, 56]) resnet_block1 output shape: torch.Size([1, 64, 56, 56]) resnet_block2 output shape: torch.Size([1, 128, 28, 28]) resnet_block3 output shape: torch.Size([1, 256, 14, 14]) resnet_block4 output shape: torch.Size([1, 512, 7, 7]) global_avg_pool output shape: torch.Size([1, 512, 1, 1]) fc output shape: torch.Size([1, 10])
获取数据和训练模型
batch_size = 256
# 如出现“out of memory”的报错信息,可减小batch_size或resize
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
lr, num_epochs = 0.001, 5
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
d2l.train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
training on cuda epoch 1, loss 0.0015, train acc 0.853, test acc 0.885, time 31.0 sec epoch 2, loss 0.0010, train acc 0.910, test acc 0.899, time 31.8 sec epoch 3, loss 0.0008, train acc 0.926, test acc 0.911, time 31.6 sec epoch 4, loss 0.0007, train acc 0.936, test acc 0.916, time 31.8 sec epoch 5, loss 0.0006, train acc 0.944, test acc 0.926, time 31.5 sec
Comments NOTHING