视频地址:https://www.bilibili.com/video/BV1hE411t7RN

Dataset

  • Dataset 获取数据并编号
  • Dataloader 对数据打包
    |875
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from torch.utils.data import Dataset, DataLoader  
import numpy as np
from PIL import Image
import os
from torchvision import transforms
from torchvision.utils import make_grid


class MyDataset(Dataset):
def __init__(self, root_dir, label_dir):
self.root_dir = root_dir
self.label_dir = label_dir
self.path = os.path.join(self.root_dir, self.label_dir)
self.img_path = os.listdir(self.path)


def __getitem__(self, idx):
img_name = self.img_path[idx]
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
img = Image.open(img_item_path)
label = self.label_dir
# img.show()
return img, label

def __len__(self):
return len(self.img_path)

root_dir = "../dataset/train"
ants_label_dir = "ants"
ants_dataset = MyDataset(root_dir, ants_label_dir)
print(ants_dataset.__getitem__(1))

Tensorboard

  • 打开log
    • tensorboard --logdir=logs --port=
    • logs: 文件夹名
    • |625

Transform

  • 作用:
    • python的用法 -> tensor数据类型
    • 就是神经网络专用的数据类型,包含了许多神经网络需要的参数
    • """Convert a PIL Image or ndarray to tensor and scale the values accordingly.
    • |700
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
from PIL import Image  
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms

img_path= "../data/train/ants_image/0013035.jpg"
img = Image.open(img_path)
print(img)
tensor_trans = transforms.ToTensor()
tensor_img = tensor_trans(img)
print(tensor_img)

writer = SummaryWriter(log_dir='./logs')
# 使用tensor数据类型
writer.add_image("Tensor_img", tensor_img)
writer.close()

常见的Transform

|500
Normalize

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from PIL import Image  
from torchvision import transforms
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter(log_dir='./logs')
img_path = "../data/train/ants_image/0013035.jpg"
img = Image.open(img_path)

trans_tensor = transforms.ToTensor()
img_tensor = trans_tensor(img)
writer.add_image("ToTenser", img_tensor)

# Normalize归一化
print("归一化之前:", img_tensor[0][0][0]) # 0.3137
trans_norm = transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
img_norm = trans_norm(img_tensor)
print("归一化之后:", img_norm[0][0][0]) # 2 * 0.3137 - 1
writer.add_image("Normalize", img_norm)

writer.close()

Resize

1
2
3
4
5
6
7
8
9
10
# Resize  
print("Resize" * 10)
print(img.size)
trans_resize = transforms.Resize((512, 512))
# img PIL ----> resize ---> img_resize PIL
img_resize = trans_resize(img)
# img PIL ----> tensor ---> img_resize PIL
img_resize = trans_tensor(img_resize)
writer.add_image("Resize", img_resize)
print(img_resize)

Compose
|550

1
2
3
4
5
6
7
8
9
10
11
12
# Compose  
trans_resize_2 = transforms.Resize(512)
trans_compose = transforms.Compose([trans_resize_2
img_resize_2 = trans_compose(img)
writer.add_image("Compose", img_resize_2)

#
trans_random = transforms.RandomCrop((256, 128))
trans_compose_2 = transforms.Compose([trans_random
for i in range(10):
img_crop = trans_compose_2(img)
writer.add_image("RandomCrop", img_crop, i)

torchvision中的数据集使用

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import torchvision  
from torch.utils.tensorboard import SummaryWriter

dataset_transform = torchvision.transforms.Compose([
torchvision.transforms.ToTensor()
])

train_set = torchvision.datasets.CIFAR10(root='./dataset',
train=True,
transform=dataset_transform,
download=True)
test_set = torchvision.datasets.CIFAR10(root='./dataset',
train=False,
transform=dataset_transform,
download=True)

print(test_set[0])
print((test_set.classes)) # ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

img, target = test_set[0]
print(img)
print(target) # 3
print(test_set.classes[target]) # cat
img.show()

writer = SummaryWriter("4_dataset_transform")
for i in range(10):
img, target = test_set[i]
writer.add_image("test_set", img, i)

writer.close()

DataLoader

  • batch_size 打包成组
    • |625
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import torchvision  
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# 测试数据集
test_data = torchvision.datasets.CIFAR10(root='./dataset', train=False, download=True,
transform=torchvision.transforms.ToTensor())

test_loader = DataLoader(dataset=test_data, batch_size=4, shuffle=True, num_workers=0, drop_last=False)

# 测试数据集中第一张图片及target
img, target = test_data[0] # torch.Size([3, 32, 32]) 3通道
print(img.shape)
print(target)

writer = SummaryWriter("dataloader")
for epoch in range(2):
step = 0
for data in test_loader:
imgs, targets = data
print(imgs.shape)
print(targets)
writer.add_images("Epoch: {}".format(epoch), imgs, step)
step = step + 1
writer.close()

board:
|266

神经网络基本骨架(nn.Module)

nn.model

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from sympy.printing.pytorch import torch  
from torch import nn

class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()

def forward(self, input):
output = input + 1
return output


my_net = MyNet()
x = torch.tensor([1, 2, 3])
output = my_net(x)
print(output)

nn.cov

卷积层

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import torch  
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.nn import Conv2d

dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)
dataloader = DataLoader(dataset, batch_size=64)


class testConv2d(nn.Module):
def __init__(self):
super(testConv2d, self).__init__()
# 彩色图 3通道 输出6通道 核3x3
self.conv1 = nn.Conv2d(in_channels=3, out_channels=6, kernel_size=3, stride=1, padding=0)

def forward(self, x):
x = self.conv1(x)
return x


testConv = testConv2d()
print(testConv)
# testConv2d(
# (conv1): Conv2d(3, 6, kernel_size=(3, 3), stride=(1, 1))
# )

for data in dataloader:
imgs, targets = data
output = testConv(imgs)
print(imgs.shape) # torch.Size([64, 3, 32, 32])
print(output.shape) # torch.Size([64, 6, 30, 30]) batch_size=64

池化层

  • https://docs.pytorch.ac.cn/docs/stable/nn.html#pooling-layers
  • MaxPool2d 常用
      • kernel_size – 进行最大值计算的窗口大小
    • stride  – 窗口的步幅。默认值为 kernel_size
    • padding– 在两侧添加的隐式负无穷大填充
    • dilation – 控制窗口中元素步幅的参数
    • return_indices (bool) – 如果为 True,将返回最大值的索引以及输出。对后续的 torch.nn.MaxUnpool2d 有用
    • ceil_mode (bool) – 如果为 True,将使用 ceil 而不是 floor 来计算输出形状
    • dilation 控制元素间隔 空洞卷积
    • |475
    • ceil_mode
    • |600
    • 使用
      • self.maxpool1 = MaxPool2d(kernel_size=3, ceil_mode=False)

非线性激活

  • `self.relu1 = ReLU()
  • self.sigmoid1 = Sigmoid()
  • |300

线性层和其他层

正则化(归一化)

线性层(全连接层)

|575

  • 将[64, 3, 32, 32] 图片平展
  • 得到196608x1
  • 线性层得到 10x1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import torch  
import torchvision
from torch import nn
from torch.nn import Linear
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)

dataloader = DataLoader(dataset, batch_size=64, drop_last=True)

class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.linear1 = Linear(196608, 10)

def forward(self, input):
output = self.linear1(input)
return output

tudui = Tudui()

for data in dataloader:
imgs, targets = data
print(imgs.shape) # torch.Size([64, 3, 32, 32])
output = torch.flatten(imgs)
print(output.shape) # torch.Size([196608]) 64*3*32*32=196608
output = tudui(output)
print(output.shape) # torch.Size([10])

其他


搭建实战和sequential

  • 简化代码
    |750

模型图

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# -*- coding: utf-8 -*-  
# 作者:小土堆
# 公众号:土堆碎念
import torch
from torch import nn
from torch.nn import Conv2d, MaxPool2d, Flatten, Linear, Sequential
from torch.utils.tensorboard import SummaryWriter


class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x

tudui = Tudui()
print(tudui)
input = torch.ones((64, 3, 32, 32)) # batch 通道 w h
output = tudui(input)
print(output.shape)

writer = SummaryWriter("../logs_seq")
writer.add_graph(tudui, input)
writer.close()


损失函数和反向传播

交叉商

|675

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# -*- coding: utf-8 -*-  
# 作者:小土堆
# 公众号:土堆碎念
import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)

dataloader = DataLoader(dataset, batch_size=1)

class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x


loss = nn.CrossEntropyLoss()
tudui = Tudui()
for data in dataloader:
imgs, targets = data
outputs = tudui(imgs)
result_loss = loss(outputs, targets)
result_loss.backward()
print(result_loss.item())
print("ok")

优化器

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# -*- coding: utf-8 -*-  
# 作者:小土堆
# 公众号:土堆碎念
import torch
import torchvision
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader

dataset = torchvision.datasets.CIFAR10("../dataset", train=False, transform=torchvision.transforms.ToTensor(),
download=True)

dataloader = DataLoader(dataset, batch_size=1)

class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x


loss = nn.CrossEntropyLoss()
tudui = Tudui()
# 模型参数 lr:学习率
optim = torch.optim.SGD(tudui.parameters(), lr=0.01) # 随机梯度下降
for epoch in range(20): # 20轮
running_loss = 0.0
for data in dataloader:
imgs, targets = data # 数据x 真实值y
outputs = tudui(imgs) # 预测输出
result_loss = loss(outputs, targets)
optim.zero_grad() # 梯度清零 必写
result_loss.backward() # 反向传播
optim.step() # 对每个参数调优
running_loss = running_loss + result_loss
print(running_loss)

debug查看梯度

现有模型使用和修改

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# -*- coding: utf-8 -*-  
# 作者:小土堆
# 公众号:土堆碎念
import torchvision

train_data = torchvision.datasets.ImageNet("../data_image_net", split='train', download=True,
transform=torchvision.transforms.ToTensor())
from torch import nn

vgg16_false = torchvision.models.vgg16(pretrained=False)
vgg16_true = torchvision.models.vgg16(pretrained=True)

print(vgg16_true)

train_data = torchvision.datasets.CIFAR10('./dataset', train=True, transform=torchvision.transforms.ToTensor(),
download=True)

vgg16_true.classifier.add_module('add_linear', nn.Linear(1000, 10))
print(vgg16_true)

print(vgg16_false)
vgg16_false.classifier[6] = nn.Linear(4096, 10)
print(vgg16_false)

模型保存和加载

保存

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# -*- coding: utf-8 -*-  
# 作者:小土堆
# 公众号:土堆碎念
import torch
import torchvision
from torch import nn

vgg16 = torchvision.models.vgg16(pretrained=False)
# 保存方式1,模型结构+模型参数
torch.save(vgg16, "vgg16_method1.pth")

# 保存方式2,模型参数(官方推荐)
torch.save(vgg16.state_dict(), "vgg16_method2.pth")

# 陷阱
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3)

def forward(self, x):
x = self.conv1(x)
return x

tudui = Tudui()
torch.save(tudui, "tudui_method1.pth")

加载

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# -*- coding: utf-8 -*-  
# 作者:小土堆
# 公众号:土堆碎念
import torch
from model_save import * # 引入模型
# 方式1-》保存方式1,加载模型
import torchvision
from torch import nn

model = torch.load("vgg16_method1.pth")
# print(model)

# 方式2,加载模型
vgg16 = torchvision.models.vgg16(pretrained=False)
vgg16.load_state_dict(torch.load("vgg16_method2.pth"))
# model = torch.load("vgg16_method2.pth")
# print(vgg16)

# 陷阱1 必须要把模型类型放在该文件
class Tudui(nn.Module):
def __init__(self):
super(Tudui, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=3)

def forward(self, x):
x = self.conv1(x)
return x

model = torch.load('tudui_method1.pth')
print(model)

模型训练套路

  • CIFAR10训练集为例

结果转换

  • |500
  • outputs 每行表示该组数据输出每个分类的概率
  • argmax 该组数据最大的那个概率下标 (作为预测结果)
    • 参数1 横向看
    • 参数0 纵向看
    • 得到 preds 预测结构
  • target 真实结果
  • preds 和 target 对比
    • 一样则sum + 1
    • 否则sum + 0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import torch  
import torchvision
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from model import MyModel

# 数据集
train_data = torchvision.datasets.CIFAR10(root="./dataset",
train=True,
transform=torchvision.transforms.ToTensor(),
download=False)

test_data = torchvision.datasets.CIFAR10(root="./dataset",
train=False,
transform=torchvision.transforms.ToTensor(),
download=False)

train_data_size = len(train_data)
test_data_size = len(test_data)
print("训练数据集的长度为:{}".format(train_data_size))
print("测试数据集的长度为:{}".format(test_data_size))

# 利用dataloader加载数据集
train_dataloader = DataLoader(train_data, batch_size=64)
test_dataloader = DataLoader(test_data, batch_size=64)

# 搭建神经网络
mymodel = MyModel()
print(mymodel)

# 损失函数
loss_fn = nn.CrossEntropyLoss()

# 优化器
learning_rate = 1e-2 # 0.01
optimizer = torch.optim.SGD(mymodel.parameters(), lr=learning_rate, momentum=0.9)

# 设置训练网络的一些参数
# 训练次数
total_train_step = 0
# 测试次数
total_test_step = 0
# 训练轮次
epoch = 5

# 添加tensorboard
writer = SummaryWriter(log_dir='../mymodel_logs')

for i in range(epoch):
print("----第 {}轮 训练开始-------".format(i))
# 训练开始
mymodel.train() # 开启训练模式 可以不写
for data in train_dataloader:
imgs, targets = data
outputs = mymodel(imgs)
loss = loss_fn(outputs, targets)

optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播
optimizer.step() # 优化器
total_train_step += 1
if total_train_step % 100 == 0: # 防止信息过多
print("训练次数:{},损失loss:{}".format(total_train_step, loss.item()))
writer.add_scalar('train_loss', loss.item(), total_train_step) # 记录训练损失

# 测试步骤开始
mymodel.eval() # 开启测试模式 可以不写
total_test_loss = 0 # 测试集的损失
total_accuracy = 0 # 正确绿
with torch.no_grad(): # 关闭梯度计算
for data in test_dataloader:
imgs, targets = data
outputs = mymodel(imgs)
loss = loss_fn(outputs, targets)
# print("测试次数:{},损失loss:{}".format(total_test_step, loss.item()))
total_test_loss += loss.item()
accuracy = (outputs.argmax(1) == targets).sum()
total_accuracy += accuracy

print("整体测试集的损失loss:{}".format(total_test_loss))
print("整体测试集上的正确率: {}".format(total_accuracy/test_data_size))
writer.add_scalar('test_loss', total_test_loss, total_test_step) # 记录测试损失
writer.add_scalar("test_accuracy", total_accuracy/test_data_size, total_test_step) # 测试正确率
total_test_step += 1

torch.save(mymodel, "mymodel_{}.pth".format(i)) # 保存模型参数
print("模型参数保存成功!")

writer.close() # 关闭tensorboard

模型验证(测试)

  • 用训练好的模型
  • 给他提供输入
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import torchvision.transforms  
from PIL import Image
from sympy.printing.pytorch import torch
from torch import nn
from torch.nn import Sequential, Conv2d, MaxPool2d, Flatten, Linear


image_path = "../imgs/dog.png"
image = Image.open(image_path)

print(image)
# <PIL.PngImagePlugin.PngImageFile image mode=RGB size=456x336 at 0x10314CFD0>

image = image.convert('RGB') # png是四通道
transform = torchvision.transforms.Compose([
torchvision.transforms.Resize((32, 32)),
torchvision.transforms.ToTensor(),
])

image = transform(image)
print(image.shape) # torch.Size([3, 32, 32])

class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
self.model1 = Sequential(
Conv2d(3, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 32, 5, padding=2),
MaxPool2d(2),
Conv2d(32, 64, 5, padding=2),
MaxPool2d(2),
Flatten(),
Linear(1024, 64),
Linear(64, 10)
)

def forward(self, x):
x = self.model1(x)
return x

model = MyModel()
model.load_state_dict(torch.load('../model_mps/mymodel_9.pth'))
print(model)


# 经过 unsqueeze(0) 操作后,image 的形状变为 1x3x32x32。
# 这个新增的维度代表批次大小(batch size),
# 在这里表示有一个包含单张图像的批次。
# image = image.unsqueeze(0)
image = torch.reshape(image, (1, 3, 32, 32))
print("torch.reshape", image.shape)
with torch.no_grad():
output = model(image)
print(output)
# tensor([[-0.0300, 0.1362, 0.1201, -0.1339, 0.0189, -0.0951, 0.0431, 0.0677, -0.0523, 0.1262]])
print(output.argmax(1)) # 结果: tensor([5]) 属于第5个类别

# CIFAR-10 类别标签
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
predicted_class = classes[output.argmax(1).item()]
print(f"预测结果: {predicted_class}") # 预测结果: dog