1 Tensor运算特点

我们先来看看Tensor的运算特点吧

				
					import torch
from time import time

print(torch.__version__)

1.10.0+cu113

				
					a = torch.ones(1000)
b = torch.ones(1000)

将这两个向量按元素逐一做标量加法:

				
					start = time()
c = torch.zeros(1000)
for i in range(1000):
    c[i] = a[i] + b[i]
print(time() - start)

0.020173072814941406

将这两个向量直接做矢量加法:

				
					start = time()
d = a + b
print(time() - start)

8.20159912109375e-05

结果很明显，后者比前者更省时。因此，我们应该尽可能采用矢量计算，以提升计算效率。

广播机制例子🌰：

				
					a = torch.ones(3)
b = 10
print(a + b)

tensor([11., 11., 11.])

2 线性回归的从零开始实现

				
					%matplotlib inline
import torch
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random

print(torch.__version__)

1.10.0+cu113

生成数据集

				
					num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = torch.randn(num_examples, num_inputs,
                      dtype=torch.float32)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()),
                       dtype=torch.float32)
print(features[0], labels[0])

tensor([0.1706, 1.0724]) tensor(0.8927)

				
					def use_svg_display():
    # 用矢量图显示
    display.set_matplotlib_formats('svg')

def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    # 设置图的尺寸
    plt.rcParams['figure.figsize'] = figsize

# # 在../d2lzh_pytorch里面添加上面两个函数后就可以这样导入
# import sys
# sys.path.append("..")
# from d2lzh_pytorch import * 

set_figsize()
plt.scatter(features[:, 1].numpy(), labels.numpy(), 1);

				
					# 本函数已保存在d2lzh包中方便以后使用
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    random.shuffle(indices)  # 样本的读取顺序是随机的
    for i in range(0, num_examples, batch_size):
        j = torch.LongTensor(indices[i: min(i + batch_size, num_examples)]) # 最后一次可能不足一个batch
        yield  features.index_select(0, j), labels.index_select(0, j)

				
					batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break

tensor([[-1.5561,  1.5366],
        [ 0.7773, -0.4141],
        [-0.7296,  0.2837],
        [-0.1572, -0.1902],
        [-1.1338,  0.0436],
        [ 1.5135, -2.5492],
        [ 0.5583,  0.2310],
        [-1.3505, -1.5909],
        [-0.2826,  1.3457],
        [ 0.2002, -2.1443]]) 
 tensor([-4.1393,  7.1701,  1.7999,  4.5387,  1.7729, 15.8816,  4.5391,  6.9128,
        -0.9564, 11.9046])

初始化模型参数

				
					w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, 1)), dtype=torch.float32)
b = torch.zeros(1, dtype=torch.float32)
w.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

tensor([0.], requires_grad=True)

定义模型

				
					def linreg(X, w, b):  # 本函数已保存在d2lzh包中方便以后使用
     return torch.mm(X, w) + b

定义损失函数

				
					def squared_loss(y_hat, y):  # 本函数已保存在pytorch_d2lzh包中方便以后使用
     return (y_hat - y.view(y_hat.size())) ** 2 / 2

定义优化算法

				
					def sgd(params, lr, batch_size):  # 本函数已保存在d2lzh_pytorch包中方便以后使用
     for param in params:
         param.data -= lr * param.grad / batch_size # 注意这里更改param时用的param.data

训练模型

				
					lr = 0.03
num_epochs = 3
net = linreg
loss = squared_loss

for epoch in range(num_epochs):  # 训练模型一共需要num_epochs个迭代周期
    # 在每一个迭代周期中，会使用训练数据集中所有样本一次（假设样本数能够被批量大小整除）。X
    # 和y分别是小批量样本的特征和标签
    for X, y in data_iter(batch_size, features, labels):
        l = squared_loss(net(X, w, b), y).sum()  # l是有关小批量X和y的损失
        l.backward()  # 小批量的损失对模型参数求梯度
        sgd([w, b], lr, batch_size)  # 使用小批量随机梯度下降迭代模型参数
        
        # 不要忘了梯度清零
        w.grad.data.zero_()
        b.grad.data.zero_()
    train_l = loss(net(features, w, b), labels)
    print('epoch %d, loss %f' % (epoch + 1, train_l.mean().item()))

epoch 1, loss 0.031926
epoch 2, loss 0.000115
epoch 3, loss 0.000054

				
					print(true_w, '\n', w)
print(true_b, '\n', b)

[2, -3.4] 
 tensor([[ 2.0000],
        [-3.3996]], requires_grad=True)
4.2 
 tensor([4.1990], requires_grad=True)

3 线性回归的简洁实现

				
					import torch
from torch import nn
import numpy as np
torch.manual_seed(1)

print(torch.__version__)
torch.set_default_tensor_type('torch.FloatTensor')

生成数据集

				
					num_inputs = 2
num_examples = 1000
true_w = [2, -3.4]
true_b = 4.2
features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)
labels = true_w[0] * features[:, 0] + true_w[1] * features[:, 1] + true_b
labels += torch.tensor(np.random.normal(0, 0.01, size=labels.size()), dtype=torch.float)

读取数据

				
					import torch.utils.data as Data

batch_size = 10

# 将训练数据的特征和标签组合
dataset = Data.TensorDataset(features, labels)

# 把 dataset 放入 DataLoader
data_iter = Data.DataLoader(
    dataset=dataset,      # torch TensorDataset format
    batch_size=batch_size,      # mini batch size
    shuffle=True,               # 要不要打乱数据 (打乱比较好)
    num_workers=2,              # 多线程来读数据
)

				
					for X, y in data_iter:
       print(X, '\n', y)
    break

tensor([[-0.0163, -1.0072],
        [-0.3554, -0.1807],
        [-1.2406, -2.3683],
        [ 1.3847,  1.9209],
        [-0.7570, -0.3135],
        [ 0.3181, -0.8122],
        [-0.3864,  0.0382],
        [ 1.0939, -0.1225],
        [ 0.7272,  0.4801],
        [ 0.6706, -0.7972]]) 
 tensor([7.6005, 4.1017, 9.7864, 0.4568, 3.7355, 7.5675, 3.2881, 6.7967, 4.0404,
        8.2513])

定义模型

				
					class LinearNet(nn.Module):
    def __init__(self, n_feature):
        super(LinearNet, self).__init__()
        self.linear = nn.Linear(n_feature, 1)

    def forward(self, x):
        y = self.linear(x)
        return y
    
net = LinearNet(num_inputs)
print(net) # 使用print可以打印出网络的结构

LinearNet(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)

				
					# 写法一
net = nn.Sequential(
    nn.Linear(num_inputs, 1)
    # 此处还可以传入其他层
    )

# 写法二
net = nn.Sequential()
net.add_module('linear', nn.Linear(num_inputs, 1))
# net.add_module ......

# 写法三
from collections import OrderedDict
net = nn.Sequential(OrderedDict([
          ('linear', nn.Linear(num_inputs, 1))
          # ......
        ]))

print(net)
print(net[0])

Sequential(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)
Linear(in_features=2, out_features=1, bias=True)

				
					for param in net.parameters():
     print(param)

Parameter containing:
tensor([[0.5347, 0.7057]], requires_grad=True)
Parameter containing:
tensor([0.6873], requires_grad=True)

初始化模型参数

				
					from torch.nn import init

init.normal_(net[0].weight, mean=0.0, std=0.01)
init.constant_(net[0].bias, val=0.0)  # 也可以直接修改bias的data: net[0].bias.data.fill_(0)

Parameter containing:
tensor([0.], requires_grad=True)

				
					for param in net.parameters():
     print(param)

Parameter containing:
tensor([[-0.0142, -0.0161]], requires_grad=True)
Parameter containing:
tensor([0.], requires_grad=True)

定义损失函数

				
					loss = nn.MSELoss()

定义优化算法

				
					import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.03)
print(optimizer)

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.03
    momentum: 0
    nesterov: False
    weight_decay: 0
)

				
					# 为不同子网络设置不同的学习率
# optimizer =optim.SGD([
#                 # 如果对某个参数不指定学习率，就使用最外层的默认学习率
#                 {'params': net.subnet1.parameters()}, # lr=0.03
#                 {'params': net.subnet2.parameters(), 'lr': 0.01}
#             ], lr=0.03)

				
					# # 调整学习率
# for param_group in optimizer.param_groups:
#     param_group['lr'] *= 0.1 # 学习率为之前的0.1倍

训练模型

				
					num_epochs = 3
for epoch in range(1, num_epochs + 1):
    for X, y in data_iter:
        output = net(X)
        l = loss(output, y.view(-1, 1))
        optimizer.zero_grad() # 梯度清零，等价于net.zero_grad()
        l.backward()
        optimizer.step()
    print('epoch %d, loss: %f' % (epoch, l.item()))

epoch 1, loss: 0.000457
epoch 2, loss: 0.000081
epoch 3, loss: 0.000198

				
					dense = net[0]
print(true_w, dense.weight.data)
print(true_b, dense.bias.data)

[2, -3.4] tensor([[ 1.9999, -3.4005]])
4.2 tensor([4.2011])

梦回温柔乡

Pytorch中的线性回归

1 Tensor运算特点

2 线性回归的从零开始实现

生成数据集

初始化模型参数

定义模型

定义损失函数

定义优化算法

训练模型

3 线性回归的简洁实现

生成数据集

读取数据

定义模型

初始化模型参数

定义损失函数

定义优化算法

训练模型

Pytorch之autograd

torch之softmax分类mnist

Comments NOTHING

取消回复