Pytorch 搭建网络

官方文档 中文 https://www.pytorchtutorial.com/docs/

官方文档 https://pytorch.org/docs/stable/index.html

1 神经网络

官方文档:https://pytorch.org/docs/1.8.1/nn.html

2 Containers框架

  • 官方示例
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# 示例
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):
def __init__(self):
super(Model, self).__init__()
self.conv1 = nn.Conv2d(1, 20, 5)
self.conv2 = nn.Conv2d(20, 20, 5)

# 前向传播
def forward(self, x):
x = F.relu(self.conv1(x)) # 卷积conv1、非线性relu
return F.relu(self.conv2(x)) # 卷积conv2、非线性relu
  • 简单尝试
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import torch
from torch import nn


class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()

def forward(self, input):
output = input + 1
return output


my_model = MyModel()
x = torch.tensor(1.0)
output = my_model(x)
print(output)

3 stride & padding

torch.nn.functional.conv2d

1. stride

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import torch
import torch.nn.functional as F

# 输入图像 5x5 每个数字表示颜色
input = torch.tensor([
[1, 2, 0, 3, 1],
[0, 1, 2, 3, 1],
[1, 2, 1, 0, 0],
[5, 2, 3, 1, 1],
[2, 1, 0, 1, 1]
])

# 卷积核 3x3
kernel = torch.tensor([
[1, 2, 1],
[0, 1, 0],
[2, 1, 0],
])

# 因为torch.nn.functional.conv2d默认输入为4维的,所以转化
input = torch.reshape(input, (1, 1, input.shape[0], input.shape[1]))
kernel = torch.reshape(kernel, (1, 1, kernel.shape[0], kernel.shape[1]))

'''
torch.nn.functional.conv2d(input, weight, bias=None, stride=1, padding=0, dilation=1, groups=1) → Tensor
input: 就是输入数据集
weight: 卷积核
stride: 卷积核每次移动的步数
padding: 对输入图像的填充
'''
output_stride1 = F.conv2d(input, kernel, stride=1)
print(output_stride1)

output_stride2 = F.conv2d(input, kernel, stride=2)
print(output_stride2)

2. padding

对输入图像的填充

1
2
output_padding1 = F.conv2d(input, kernel, padding=1, stride=2)
print(output_padding1)

4 卷积层

1. Convolution

2. 调用和参数

  • 调用
1
class torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros')
  • 参数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# in_channels: 输入
- in_channels (int) – Number of channels in the input image

# out_channels: 输出
- out_channels (int) – Number of channels produced by the convolution

# kernel_size: 卷积核的大小
- kernel_size (int or tuple) – Size of the convolving kernel

# stride: 步数
- stride (int or tuple, optional) – Stride of the convolution. Default: 1

# padding: 填充层数
- padding (int or tuple, optional) – Zero-padding added to both sides of the input. Default: 0

# padding_mode: 填充方式
- padding_mode (string, optional) – 'zeros', 'reflect', 'replicate' or 'circular'. Default: 'zeros'

# dilation: 卷积核中元素的对应位置
- dilation (int or tuple, optional) – Spacing between kernel elements. Default: 1

- groups (int, optional) – Number of blocked connections from input channels to output channels. Default: 1

- bias (bool, optional) – If True, adds a learnable bias to the output. Default: True
  • dilation

3. 简单原理

  • in_channels & out_channels

4. 示例代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import torch
import torchvision
from torch.nn import Conv2d
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=False)

dataloader = DataLoader(dataset, batch_size=64)


class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

def forward(self, x):
x = self.conv1(x)
return x

my_net = Net()
print(my_net)
1
2
3
4
# 网络结构
Net(
(conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
)
1
2
3
4
5
6
7
8
9
10
11
12
# 在tensorboard展示
writer = SummaryWriter("./logs_conv")
step = 0
my_net = Net()
for data in dataloader:
imgs, targets = data
output = my_net(imgs)
writer.add_images("input", imgs, step)
# writer.add_images("output", output, step) # torch.Size([64, 6, 30, 30]) output的channel=6,报错
output = torch.reshape(output, (-1, 3, 30, 30))
writer.add_images("output", output, step)
step += 1

如果希望卷积后,通道变多,但尺寸不变,则需要填充padding,公式

5 池化层

1. Pooling

2. 调用和参数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
torch.nn.MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)

# 取最大值的窗口
- kernel_size – the size of the window to take a max over

# 横向纵向的步长,default = kernel_size
- stride – the stride of the window. Default value is kernel_size

# 补充图像边缘
- padding – implicit zero padding to be added on both sides

# 空洞
- dilation – a parameter that controls the stride of elements in the window

- return_indices – if True, will return the max indices along with the outputs. Useful for torch.nn.MaxUnpool2d later

# floor向下取整 ceil向上取整,例如ceil_mode = True,保留超出部分
- ceil_mode – when True, will use ceil instead of floor to compute the output shape

  • 注意输入的input和输出output均为
1
(N, C, H, W)  # (batch_size层数, Channel通道数, Height高, Width宽)
  • 注意池化不可对long操作,故
1
2
3
4
5
6
7
input = torch.tensor([
[1, 2, 0, 2, 1, ],
[0, 1, 3, 1, 1, ],
[1, 2, 1, 0, 0, ],
[5, 2, 3, 1, 1, ],
[2, 1, 0, 1, 1, ],
], dtype=torch.float) # 转为float

3. 示例代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import torch
from torch import nn
from torch.nn import MaxPool2d

input = torch.tensor([
[1, 2, 0, 2, 1, ],
[0, 1, 3, 1, 1, ],
[1, 2, 1, 0, 0, ],
[5, 2, 3, 1, 1, ],
[2, 1, 0, 1, 1, ],
], dtype=torch.float)

# 1层,1通道,5x5大小,-1表示自动计算
input = torch.reshape(input, (-1, 1, 5, 5))


# 神经网络
class MaxPoolNet(nn.Module):
def __init__(self):
super(MaxPoolNet, self).__init__()
self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=True)

def forward(self, input):
output = self.maxpool1(input)
return output


my_net = MaxPoolNet()
output = my_net(input)
print(output)

>> tensor([[[[3., 2.],
[5., 1.]]]])

6 非线性激活(激活函数)

1. Non-linear Activations

relu, sigmoid…

2. 调用和参数

1
2
3
4
5
6
7
8
9
10
11
# 是否内存拷贝
inplace – can optionally do the operation in-place. Default: False

input = -1

ReLu(input, inplace = True)
>> input = 0

output = ReLu(input, inplace = False)
>> input = -1
>> output = 0

3. 示例代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import torch
from torch import nn
from torch.nn import ReLU

input = torch.tensor([
[1, -0.5],
[-1, 3],
])


class NonLinearActivationsRelu(nn.Module):
def __init__(self):
super(NonLinearActivationsRelu, self).__init__()
self.relu1 = ReLU(inplace=False)

def forward(self, input):
output = self.relu1(input)
return output


my_net = NonLinearActivationsRelu()
output = my_net(input)

print(input)
>> tensor([[ 1.0000, -0.5000],
[-1.0000, 3.0000]])

print(output)
>> tensor([[1., 0.],
[0., 3.]])

7 正则化层

1. Normalization

2. 调用和参数

1
2
3
4
5
6
7
8
9
10
11
torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)

- num_features – C from an expected input of size (N,C,H,W)

- eps – a value added to the denominator for numerical stability. Default: 1e-5

- momentum – the value used for the running_mean and running_var computation. Can be set to None for cumulative moving average (i.e. simple average). Default: 0.1

- affine – a boolean value that when set to True, this module has learnable affine parameters. Default: True

- track_running_stats – a boolean value that when set to True, this module tracks the running mean and variance, and when set to False, this module does not track such statistics, and initializes statistics buffers running_mean and running_var as None. When these buffers are None, this module always uses batch statistics. in both training and eval modes. Default: True
1
2
- input - (N, C, H, W)
- output - (N, C, H, W)

3. 示例代码

1
2
3
4
5
6
# With Learnable Parameters
m = nn.BatchNorm2d(100)
# Without Learnable Parameters
m = nn.BatchNorm2d(100, affine=False)
input = torch.randn(20, 100, 35, 45)
output = m(input)

8 线性层

1. Linear

2. 调用和参数

1
2
3
4
5
6
7
8
9
10
torch.nn.Linear(in_features, out_features, bias=True)

# 输入层的神经元个数
- in_features – size of each input sample

# 输出层的神经元个数
- out_features – size of each output sample

# 是否 w_i*x_i 后加上 b_i
- bias – If set to False, the layer will not learn an additive bias. Default: True

3. 示例代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import torch
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("./dataset", train=False, download=False,
transform=torchvision.transforms.ToTensor())

dataloader = DataLoader(dataset, batch_size=64)


class LinearLayers(nn.Module):
def __init__(self):
super(LinearLayers, self).__init__()
self.linear1 = Linear(in_features=64 * 3 * 32 * 32, out_features=10)

def forward(self, input):
output = self.linear1(input)
return output


my_net = LinearLayers()

for data in dataloader:
imgs, targets = data
# print(imgs.shape) # torch.Size([64, 3, 32, 32]) 64一组,3个通道,32x32图片
# output = torch.reshape(imgs, (1, 1, 1, -1)) # 每64个图片平铺为一个一维向量
# output = my_net(output) # torch.Size([1, 1, 1, 10])

output = torch.flatten(imgs) # torch.Size([196608])
output = my_net(output) # torch.Size([10])
print(output.shape)

9 pytorch提供的模型

https://pytorch.org/docs/1.8.1/nn.html

10 Sequential

1. Sequential简化

torch.nn -> container -> Sequential

1
torch.nn.Sequential(*args)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Example of using Sequential
model = nn.Sequential(
nn.Conv2d(1,20,5),
nn.ReLU(),
nn.Conv2d(20,64,5),
nn.ReLU()
)

# Example of using Sequential with OrderedDict
model = nn.Sequential(OrderedDict([
('conv1', nn.Conv2d(1,20,5)),
('relu1', nn.ReLU()),
('conv2', nn.Conv2d(20,64,5)),
('relu2', nn.ReLU())
]))

2. 案例:CIFAR分类

  • CIFAR Model 结构

Structure-of-CIFAR10-quick-model

  • Note: 计算padding和stride

  • tensorboard可视化结构
1
2
3
4
5
x_test = torch.ones((64, 3, 32, 32))

writer = SummaryWriter("logs_cifar")
writer.add_graph(cifar_net, x_test)
writer.close()