Hg's Blog

学习 分享 进步

0%

神经网络模型

神经网络模型

Lenet

最初应用为识别手写数字

早期成功的神经网络

先使用卷积层学习图片的空间信息,池化层(平均)降低图片敏感度,然后使用全连接层来转换到类别空间

image-20220725190350958

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
class LeNet(nn.Module):
def __init__(self):
super(LeNet, self).__init__()
self.relu = nn.ReLU()
self.pool = nn.AvgPool2d((2,2), (2,2))
self.conv1 = nn.Conv2d(1, 6, (5,5), (1,1), (0,0))
self.conv2 = nn.Conv2d(6, 16, (5,5), (1,1), (0,0))
self.conv3 = nn.Conv2d(16, 120, (5,5), (1,1), (0,0))
self.linear1 = nn.Linear(120, 84)
self.linear2 = nn.Linear(84, 10)

def forward(self, x):
x = self.relu(self.conv1(x))
x = self.pool(x)
x = self.relu(self.conv2(x))
x = self.pool(x)
x = self.relu(self.conv3(x))
x = x.reshape(x.shape[0], -1)
x = self.relu(self.linear1(x))
x = self.linear2(x)
return x

AlexNet

won 2012 ImageNet

本质上是更深更大的 LeNet

改进了:

  • 丢弃法
  • ReLu
  • MaxPooling
  • Overlapping Pooling (stride=2, kernel_size=3)

架构:

image-20220727185015997

写代码的时候出现了一个问题:AlexNet 的输入图像的宽高到底是224还是227?因为这影响到卷积层参数的计算。

网络上的相关讨论

这里我写代码的时候暂且按照 227 227 3 的输入作处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet,self).__init__()
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d((3,3), (2,2))
self.dropout = nn.Dropout(0.5)
self.flatten = nn.Flatten()
self.conv1 = nn.Conv2d(3, 48, (11,11), (4,4), (0,0))
self.conv2 = nn.Conv2d(48, 128, (5,5), (1,1), (2,2))
self.conv3 = nn.Conv2d(128, 192, (3,3), (1,1), (1,1))
self.conv4 = nn.Conv2d(192, 192, (3,3), (1,1), (1,1))
self.conv5 = nn.Conv2d(192, 128, (3,3), (1,1), (1,1))
self.linear1 = nn.Linear(6*6*128, 2048)
self.linear2 = nn.Linear(2048, 2048)
self.linear3 = nn.Linear(2048, 1000)

def forward(self, x):
x = self.relu(self.conv1(x))
x = self.pool(x)
x = self.relu(self.conv2(x))
x = self.pool(x)
x = self.relu(self.conv3(x))
x = self.relu(self.conv4(x))
x = self.relu(self.conv5(x))
x = self.pool(x)

x = self.flatten(x)
x = self.relu(self.linear1(x))
x = self.dropout(x)
x = self.relu(self.linear2(x))
x = self.dropout(x)
x = self.linear3(x)
return x

VGG

image-20220731073437523

特点:

  • 小卷积核、多卷积子层
  • 小池化核
  • 通道数多

vgg代码(这里简单实现了一个VGG16)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class VGG16(nn.Module):
def __init__(self):
super(VGG16,self).__init__()
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d((2,2), (2,2))
self.dropout = nn.Dropout(0.5)
self.flatten = nn.Flatten()
self.conv1_1 = nn.Conv2d(3, 64, (3,3), (1,1), (1,1))
self.conv1_2 = nn.Conv2d(64, 64, (3,3), (1,1), (1,1))
self.conv2_1 = nn.Conv2d(64, 128, (3,3), (1,1), (1,1))
self.conv2_2 = nn.Conv2d(128, 128, (3,3), (1,1), (1,1))
self.conv3_1 = nn.Conv2d(128, 256, (3,3), (1,1), (1,1))
self.conv3_2 = nn.Conv2d(256, 256, (3,3), (1,1), (1,1))
self.conv4_1 = nn.Conv2d(256, 512, (3,3), (1,1), (1,1))
self.conv4_2 = nn.Conv2d(512, 512, (3,3), (1,1), (1,1))
self.linear1 = nn.Linear(7*7*512, 4096)
self.linear2 = nn.Linear(4096, 4096)
self.linear3 = nn.Linear(4096, 1000)

def forward(self, x):
x = self.relu(self.conv1_1(x))
x = self.relu(self.conv1_2(x))
x = self.pool(x)
x = self.relu(self.conv2_1(x))
x = self.relu(self.conv2_2(x))
x = self.pool(x)
x = self.relu(self.conv3_1(x))
x = self.relu(self.conv3_2(x))
x = self.relu(self.conv3_2(x))
x = self.pool(x)
x = self.relu(self.conv4_1(x))
x = self.relu(self.conv4_2(x))
x = self.relu(self.conv4_2(x))
x = self.pool(x)
x = self.relu(self.conv4_2(x))
x = self.relu(self.conv4_2(x))
x = self.relu(self.conv4_2(x))
x = self.pool(x)

x = self.flatten(x)
x = self.relu(self.linear1(x))
x = self.dropout(x)
x = self.relu(self.linear2(x))
x = self.dropout(x)
x = self.relu(self.linear3(x))
return x

to be continued…