Raw input :原始数据 Feature Extraction Module : 特征提取/抽取模块,就是当前的模型要完成的功能 Feature vactor: 特征向量,可用于分类的特征向量 Trainable Classifier Module : 可训练的分类器,就是现在的分类层 机器学习数据特征提取,靠人工 深度学习数据特征提取,Feature Extraction Module,由神经网络(即模型)提取 - 但这并不是说,深度学习就不能人工提取特征了 - 对于一些交易场景,有其独特的业务含义, - 人工特征的提取是有助于神经网络推理的 - 但机器学习是一定需要人工提取特征的 然后通过全连接映射到业务的类别上 - 2分类映射到2维向量上 - n分类映射到n维向量上 深度学习输入数据,比如一张图像,然后映射到某个类别上 深度学习 原始数据 -- 模型 -- 分类 -- 标签类别 机器学习 原始数据 -- 特征工程 -- 模型 -- 标签类别
|
|
深度学习灵魂三问:
输入是什么?
输出是什么?
试图实现什么操作?要赋予网络什么样的功能?
|
|
图像分类模型 LeNet AlexNet VggNet InceptionNet ResNet |
|
深度学习的本质 矩阵相乘,线性变换 要清楚不同维度代表的含义,即矩阵shape的业务含义 |
concat 拼接 -- InceptionNet add 相加 -- ResNet |
输入输出 - 输入是原始数据,一张图片,[B,1,32,32] - 输出是10个类别的向量 特征提取 - 卷积改变特征数,逐渐变大, 1 -- 6 -- 6 -- 16 -- 16 - 特征图下采样,图形逐渐变小 ,32-- 28 -- 14 -- 14 -- 5 分类 - 按批次展平,一张图片的数据展平为一个向量,全连接映射到类别 下采样 每四个格取一个 - 每隔一个取一个 :方式有些古板 - 随机取一个 - 平均值 :信息丢失最少 - 最小值 - 最大值 这也是池化层的操作, - 池化层无参数,只是一种计算方式 - 池化不改变特征层/数,只是一种下采样方式,收缩的是特征图的大小
|
|
LeNet论文复现
import torch
from torch import nn
from torch.nn import functional as F
class LeNet(nn.Module):
"""
定义一个LeNet
"""
def __init__(self):
super(LeNet, self).__init__()
# 1
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=0)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu1 = nn.ReLU()
# 2
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1, padding=0)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu2 = nn.ReLU()
# 展平层
self.flatten = nn.Flatten()
# 分类层,这一步输入的特征数是上一步向量的特征维数,是固定的
self.fc1 = nn.Linear(in_features=576, out_features=256)
self.fc2 = nn.Linear(in_features=256, out_features=128)
self.fc3 = nn.Linear(in_features=128, out_features=10)
def forward(self,x):
# [B, 1, 32, 32] --> [B, 6, 30, 30]
x = self.conv1(x)
# [B, 6, 30, 30] --> [B, 6, 15, 15]
x = self.pool1(x)
x = self.relu1(x)
# [B, 6, 15, 15] --> [B, 16, 13, 13]
x = self.conv2(x)
# [B, 16, 13, 13] --> [B, 16, 6, 6]
x = self.pool2(x)
x = self.relu2(x)
# [B, 16, 6, 6] --> [B, 576]
x = self.flatten(x)
# [B, 576] --> [B, 256]
x = self.fc1(x)
# [B, 256] --> [B, 128]
x = self.fc2(x)
# [B, 128] --> [B, 10]
x = self.fc3(x)
return x
batch_size = 32 #批次 features = 1 #黑白图片 img_h = 32 #高32个像素,就是32个特征 img_w = 32 out_feature = 10 #10分类问题,输出10个特征 data = torch.randn(batch_size,features,img_h,img_w) lenet = LeNet() lenet(data).shape #torch.Size([32, 10])
|
|
数据集 使用封装好的 手写数字数据集
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets
from tpf.params import ImgPath
from tpf import pkl_load,pkl_save
# 数据加载测试
class MyDataset(Dataset):
"""手写数字数据集
"""
def __init__(self, fil):
"""超参数初始化
"""
self.img_list = pkl_load(file_path=fil)
def __getitem__(self, idx):
"""读取图像及标签
"""
return self.img_list[idx][0], self.img_list[idx][1]
def __len__(self):
"""数据集大小
"""
return len(self.img_list)
train_dataset = MyDataset(fil=ImgPath.mnist_data_train)
print(len(train_dataset)) # 60000
test_dataset = MyDataset(fil=ImgPath.mnist_data_test)
print(len(test_dataset)) # 10000
# 训练集加载器
train_dataloader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=True)
# 测试集加载器
test_dataloader = DataLoader(dataset=test_dataset, batch_size=512, shuffle=False)
# 1个批次512个样本,一个像素用一个数值表示,shape为[32,32]
for X,y in train_dataloader:
print(X.shape,y.shape) #torch.Size([512, 1, 32, 32]) torch.Size([512])
break
# 是否有GPU
device = "cuda:0" if torch.cuda.is_available() else "cpu"
class LeNet(nn.Module):
"""
定义一个LeNet
"""
def __init__(self):
super(LeNet, self).__init__()
# 1
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=0)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu1 = nn.ReLU()
# 2
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1, padding=0)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu2 = nn.ReLU()
# 展平层
self.flatten = nn.Flatten()
# 分类层,这一步输入的特征数是上一步向量的特征维数,是固定的
self.fc1 = nn.Linear(in_features=576, out_features=256)
self.fc2 = nn.Linear(in_features=256, out_features=128)
self.fc3 = nn.Linear(in_features=128, out_features=10)
def forward(self,x):
# [B, 1, 32, 32] --> [B, 6, 30, 30]
x = self.conv1(x)
# [B, 6, 30, 30] --> [B, 6, 15, 15]
x = self.pool1(x)
x = self.relu1(x)
# [B, 6, 15, 15] --> [B, 16, 13, 13]
x = self.conv2(x)
# [B, 16, 13, 13] --> [B, 16, 6, 6]
x = self.pool2(x)
x = self.relu2(x)
# [B, 16, 6, 6] --> [B, 576]
x = self.flatten(x)
# [B, 576] --> [B, 256]
x = self.fc1(x)
# [B, 256] --> [B, 128]
x = self.fc2(x)
# [B, 128] --> [B, 10]
x = self.fc3(x)
return x
model = LeNet()
loss_fn = nn.CrossEntropyLoss()
optim = torch.optim.Adam(params=model.parameters(),lr=1e-3)
epoch = 1
for i in range(epoch):
for X,y in train_dataloader:
#模型正向传播
y_pred = model(X)
#损失函数梯度清空后,反向求导
optim.zero_grad()
loss = loss_fn(y_pred,y)
optim.step()
break
y_pred
tensor([[-0.0286, -0.0236, 0.1188, ..., 0.0632, 0.1109, 0.0093],
[-0.0314, -0.0266, 0.1238, ..., 0.0644, 0.1135, 0.0147],
[-0.0187, -0.0252, 0.1177, ..., 0.0884, 0.1284, 0.0191],
...,
[-0.0311, -0.0226, 0.1131, ..., 0.0634, 0.1191, 0.0106],
[-0.0297, -0.0279, 0.1152, ..., 0.0972, 0.1198, 0.0182],
[-0.0207, -0.0258, 0.1170, ..., 0.0877, 0.1053, 0.0122]],
grad_fn=AddmmBackward0)
def get_loss(dataloader, model, loss_fn, device=device):
"""全体数据集/整个分布 模型输出与标签之间的 平均 偏差/损失
"""
# 把模型设置为评估模式
model.eval()
losses = []
with torch.no_grad():
for X,y in dataloader:
X = X.to(device=device)
y = y.to(device=device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
losses.append(loss.item())
return round(np.array(losses).mean(), ndigits=3)
def get_acc(dataloader, model=model, device=device):
"""分类问题精度计算
- 精度是针对整个数据集的
"""
# 把模型设置为评估模式
model.eval()
accs = []
with torch.no_grad():
for X, y in dataloader:
X = X.to(device=device)
y = y.to(device=device)
y_pred = model(X)
y_pred = y_pred.argmax(dim=-1)
acc = (y_pred == y).float().mean()
accs.append(acc.item())
return round(np.array(accs).mean(), ndigits=3)
# 模型参数搬家
model.to(device=device)
def train(dataloader=train_dataloader, model=model, loss_fn=loss_fn,
optimizer=optim, epochs=2):
print(f"自然准确率:Train_Acc:{get_acc(dataloader=train_dataloader)}, Test_Acc: {get_acc(dataloader=test_dataloader)}")
# 循环训练epocs轮次
for epoch in range(1, epochs+1):
# 把模型设置为训练模型
model.train()
# 每轮都按批量训练
for X, y in dataloader:
X = X.to(device=device)
y = y.to(device=device)
# 正向传播
y_pred = model(X)
# 计算损失
loss = loss_fn(y_pred, y)
# 梯度清空
optimizer.zero_grad()
# 反向传播
loss.backward()
# 参数优化
optimizer.step()
print(f"Epoch: {epoch}, Train_Acc:{get_acc(dataloader=train_dataloader)}, Test_Acc: {get_acc(dataloader=test_dataloader)}")
train(epochs=2)
自然准确率:Train_Acc:0.098, Test_Acc: 0.095 Epoch: 1, Train_Acc:0.932, Test_Acc: 0.938 Epoch: 2, Train_Acc:0.966, Test_Acc: 0.969 |
|
通常仅保存参数
model.state_dict()
OrderedDict([('conv1.weight',
tensor([[[[ 0.1514, -0.0253, 0.3065],
[ 0.1495, 0.3303, -0.2056],
[ 0.3438, -0.0484, 0.3366]]],
。。。。。
[ 1.0156e-01, -1.4222e-01, -3.5859e-02]]]])),
('conv2.bias',
tensor([-0.0321, -0.1062, 0.0668, 0.0728, 0.0881, -0.1327, 0.0257, -0.0702,
0.0444, 0.0858, 0.1039, 0.0888, -0.0225, 0.0063, 0.1431, -0.1209])),
('fc1.weight',
tensor([[ 0.0078, 0.0408, 0.0449, ..., 0.0143, 0.0387, 0.0377],
...,
[-0.0238, -0.0331, 0.0028, ..., -0.0578, -0.0317, -0.0269]])),
('fc1.bias',
tensor([-3.6302e-02, -2.9473e-02, -4.6407e-02, 2.9217e-02, -3.2748e-02,
。。。。。。
4.5190e-02])),
('fc2.weight',
tensor([[ 0.0387, 0.0094, -0.0014, ..., -0.0519, -0.0089, -0.0399],
...,
[ 0.0345, -0.0245, -0.0230, ..., -0.0107, -0.0671, 0.0299]])),
('fc2.bias',
tensor([-0.0255, 0.0335, 0.0006, -0.0283, 0.0003, 0.0603, -0.0160, -0.0432,
。。。。。
0.0479, -0.0003, -0.0592, -0.0500, -0.0101, 0.0607, 0.0329, 0.0077])),
('fc3.weight',
tensor([[ 0.0855, 0.0188, 0.0714, ..., -0.0760, 0.0899, 0.0265],
...,
[-0.0751, -0.0957, 0.1219, ..., 0.0270, -0.1015, -0.0457]])),
('fc3.bias',
tensor([-0.0875, -0.0108, 0.0775, -0.0821, -0.0394, -0.0221, -0.0566, 0.0552,
0.0577, -0.0431]))])
torch.save(obj=model.state_dict(), f="lenet.pt") 模型参数加载
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torchvision import transforms
# 是否有GPU
device = "cuda:0" if torch.cuda.is_available() else "cpu"
class LeNet(nn.Module):
"""
定义一个LeNet
"""
def __init__(self):
super(LeNet, self).__init__()
# 1
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=0)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu1 = nn.ReLU()
# 2
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1, padding=0)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu2 = nn.ReLU()
# 展平层
self.flatten = nn.Flatten()
# 分类层,这一步输入的特征数是上一步向量的特征维数,是固定的
self.fc1 = nn.Linear(in_features=576, out_features=256)
self.fc2 = nn.Linear(in_features=256, out_features=128)
self.fc3 = nn.Linear(in_features=128, out_features=10)
def forward(self,x):
# [B, 1, 32, 32] -- [B, 6, 30, 30]
x = self.conv1(x)
# [B, 6, 30, 30] -- [B, 6, 15, 15]
x = self.pool1(x)
x = self.relu1(x)
# [B, 6, 15, 15] -- [B, 16, 13, 13]
x = self.conv2(x)
# [B, 16, 13, 13] -- [B, 16, 6, 6]
x = self.pool2(x)
x = self.relu2(x)
# [B, 16, 6, 6] -- [B, 576]
x = self.flatten(x)
# [B, 576] -- [B, 256]
x = self.fc1(x)
# [B, 256] -- [B, 128]
x = self.fc2(x)
# [B, 128] -- [B, 10]
x = self.fc3(x)
return x
model = LeNet()
# 加载权重
model.load_state_dict(state_dict=torch.load(f="lenet.pt"), )
model.state_dict()
OrderedDict([('conv1.weight',
tensor([[[[ 0.1514, -0.0253, 0.3065],
[ 0.1495, 0.3303, -0.2056],
[ 0.3438, -0.0484, 0.3366]]],
...
...
...
)))
|
|
直接读取图片
# PyTorch 三剑客
import torch
from torch import nn
from torch.nn import functional as F
# 数据打包工具
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
# 图像处理工具
from matplotlib import pyplot as plt
from PIL import Image
import cv2
from torchvision import transforms
# 通用科学计算神器
import numpy as np
# 解决 OMP问题
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
# 定义预处理
trans = transforms.Compose([
transforms.Grayscale(),
transforms.Resize(size=(32,32)),
transforms.ToTensor()
])
class MyDataset(Dataset):
"""
自定义数据集
"""
def __init__(self, anno_info=None, trans=trans):
"""
初始化:负责接收超参
"""
self.anno_info = anno_info
self.trans = trans
self.images = self._read()
def _read(self):
with open(file=self.anno_info, mode="r", encoding="utf8") as f:
images = [line.strip().split(",") for line in f.readlines()]
return images
def __getitem__(self, idx):
"""
实现下标读取(索引)
"""
img_path, label = self.images[idx]
img = Image.open(fp=img_path)
return self.trans(img), torch.tensor(data=int(label)).long()
def __len__(self):
"""
获取数据量
"""
return len(self.images)
# 构建训练集的数据加载器
train_dataset = MyDataset(anno_info="train.csv",)
train_dataloader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=True)
# 构建测试集的数据加载器
test_dataset = MyDataset(anno_info="test.csv")
test_dataloader = DataLoader(dataset=test_dataset, batch_size=512, shuffle=False)
|
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
# 图像处理工具
from matplotlib import pyplot as plt
from PIL import Image
import cv2
from torchvision import transforms
# 是否有GPU
device = "cuda:0" if torch.cuda.is_available() else "cpu"
class LeNet(nn.Module):
"""
定义一个LeNet
"""
def __init__(self):
super(LeNet, self).__init__()
# 1
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=0)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu1 = nn.ReLU()
# 2
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1, padding=0)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu2 = nn.ReLU()
# 展平层
self.flatten = nn.Flatten()
# 分类层,这一步输入的特征数是上一步向量的特征维数,是固定的
self.fc1 = nn.Linear(in_features=576, out_features=256)
self.fc2 = nn.Linear(in_features=256, out_features=128)
self.fc3 = nn.Linear(in_features=128, out_features=10)
def forward(self,x):
# [B, 1, 32, 32] --> [B, 6, 30, 30]
x = self.conv1(x)
# [B, 6, 30, 30] --> [B, 6, 15, 15]
x = self.pool1(x)
x = self.relu1(x)
# [B, 6, 15, 15] --> [B, 16, 13, 13]
x = self.conv2(x)
# [B, 16, 13, 13] --> [B, 16, 6, 6]
x = self.pool2(x)
x = self.relu2(x)
# [B, 16, 6, 6] --> [B, 576]
x = self.flatten(x)
# [B, 576] --> [B, 256]
x = self.fc1(x)
# [B, 256] --> [B, 128]
x = self.fc2(x)
# [B, 128] --> [B, 10]
x = self.fc3(x)
return x
model = LeNet()
# 加载权重
model.load_state_dict(state_dict=torch.load(f="lenet.pt"), )
# 定义预处理
trans = transforms.Compose([
transforms.Grayscale(),
transforms.Resize(size=(32,32)),
transforms.ToTensor()
])
def predict(img_path="./0.jpeg", model=model, trans=trans):
"""
预测图像
"""
# 1,模型设置为推断模式
model.eval()
# 2, 读取图像
img = Image.open(fp=img_path)
img = torch.unsqueeze(input=trans(img), dim=0).to(device=device)
with torch.no_grad():
y_pred = model(img)
y_pred = y_pred.argmax(dim=-1)
return y_pred.item()
# 推断函数
print(predict(img_path="./0.jpeg"))
print(predict(img_path="./1.jpeg"))
print(predict(img_path="./2.jpeg"))
print(predict(img_path="./3.jpeg"))
print(predict(img_path="./4.jpeg"))
|
import torch
from torch import nn
from torch.nn import functional as F
class LeNet2(nn.Module):
def __init__(self,n_classes=10):
"""LeNet优化版
- 输入:32*32固定大小图片,格式[B,C,32,32]
- 输出:批次的类别,格式[B, n_classes]
"""
super().__init__()
self.n_classes = n_classes
# 第 1 层, [B, 1, 32, 32] -- [B, 16, 16, 16]
self.layer1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=6),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
)
# 第 2 层, [B, 6, 16, 16] -- [B, 16, 8, 8], 层数多少会影响模型收敛速度
self.layer2 = nn.Sequential(
nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
)
# 分类
self.classifier = nn.Sequential(
nn.Linear(in_features=16*8*8, out_features=1024),
nn.Dropout(p=0.3),
nn.Linear(in_features=1024, out_features=256),
nn.Dropout(p=0.2),
nn.Linear(in_features=256, out_features=self.n_classes)
)
def forward(self,x):
# [B, 1, 32, 32] -- [B, 6, 16, 16]
h = self.layer1(x)
# [B, 6, 16, 16] -- [B, 16, 8, 8]
h = self.layer2(h)
# 维度转换 [B, 16, 8, 8] -- [B, 16*8*8=1024]
h = h.view(h.size(0), -1)
# [B, 1024] -- [B, 10]
o = self.classifier(h)
return o
def test_LeNet2():
model = LeNet2()
x = torch.randn(size=(32,1,32,32))
y = model(x)
print(y.shape) # torch.Size([32, 10])
|
由于数据集非常足且非常好,没有噪声,脏数据,100%纯 因此此网络去除BN后完全不影响最终精度, 只是第一次收敛比有BN的第一次收敛速度点了一点点 这里提到的数据集的量与纯度也非常重要 |
|
依赖包安装 pip install opencv-python-headless -i https://pypi.tuna.tsinghua.edu.cn/simple pip install opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple 单张图片读取
这是一张28×28尺寸的图像,有三色,但三色都是一样的
这意味着,训练时取三色中的一色即可
resize尺寸处理 灰度处理 归一化+转Tensor,这里的归一是除以 255,并没有拉到标准正态分布
#拉到指定的正态分布
transforms.Normalize(mean=[0.5], std=[0.5])(img)
|
|
依赖包安装 pip install opencv-python-headless -i https://pypi.tuna.tsinghua.edu.cn/simple pip install opencv-python -i https://pypi.tuna.tsinghua.edu.cn/simple
# PyTorch 三剑客
import torch
from torch import nn
from torch.nn import functional as F
# 数据打包工具
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
# 图像处理工具
from matplotlib import pyplot as plt
from PIL import Image
import cv2
from torchvision import transforms
# 通用科学计算神器
import numpy as np
# 解决 OMP问题
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
# 测试是否有GPU
device = "cuda:0" if torch.cuda.is_available() else "cpu"
import os
def get_image_paths(root_dir):
image_paths = []
for root, dirs, files in os.walk(root_dir):
for file in files:
if file.endswith(".jpeg") or file.endswith(".jpg") or file.endswith(".png"):
image_path = os.path.join(root, file)
label = os.path.basename(root)
image_paths.append([image_path, label])
return image_paths
# 指定数据集目录
mnist_dir = "/wks/datasets/MNIST"
# 获取测试集和训练集的图片路径
test_list = get_image_paths(os.path.join(mnist_dir, "test"))
train_list = get_image_paths(os.path.join(mnist_dir, "train"))
print(len(test_list))
print(len(train_list))
# 打印结果
print("测试集图片路径:")
for path, label in test_list:
print(f"图片路径: {path}, 标签: {label}")
break
print("\n训练集图片路径:")
for path, label in train_list:
print(f"图片路径: {path}, 标签: {label}")
break
10000 60000 测试集图片路径: 图片路径: /wks/datasets/MNIST/test/3/715.jpeg, 标签: 3 训练集图片路径: 图片路径: /wks/datasets/MNIST/train/3/1152.jpeg, 标签: 3
class MyDataset(Dataset):
"""
手写数字数据集
"""
def __init__(self, data_list=None):
"""自定义数据集
"""
# 定义预处理
self.trans = transforms.Compose([
transforms.Grayscale(),
transforms.Resize(size=(32,32)),
transforms.ToTensor()
])
self.images = data_list
def __getitem__(self, idx):
"""
实现下标读取(索引)
"""
img_path, label = self.images[idx]
img = Image.open(fp=img_path)
return self.trans(img), torch.tensor(data=int(label)).long()
def __len__(self):
"""
获取数据量
"""
return len(self.images)
# 构建训练集的数据加载器 train_dataset = MyDataset(data_list=train_list) train_dataloader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=True) # 构建测试集的数据加载器 test_dataset = MyDataset(data_list=test_list) test_dataloader = DataLoader(dataset=test_dataset, batch_size=512, shuffle=False)
test_dataset[0]
(tensor([[[0.0118, 0.0039, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[0.0157, 0.0157, 0.0078, ..., 0.0000, 0.0000, 0.0000],
[0.0039, 0.0078, 0.0157, ..., 0.0000, 0.0000, 0.0000],
...,
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000]]]),
tensor(3))
|
class LeNet(nn.Module):
"""
定义一个LeNet
"""
def __init__(self):
super(LeNet, self).__init__()
# 1
self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=0)
self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu1 = nn.ReLU()
# 2
self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1, padding=0)
self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.relu2 = nn.ReLU()
# 展平层
self.flatten = nn.Flatten()
# 分类层
self.fc1 = nn.Linear(in_features=576, out_features=256)
self.fc2 = nn.Linear(in_features=256, out_features=128)
self.fc3 = nn.Linear(in_features=128, out_features=10)
def forward(self,x):
# [B, 1, 32, 32] -- [B, 6, 30, 30]
x = self.conv1(x)
# [B, 6, 30, 30] -- [B, 6, 15, 15]
x = self.pool1(x)
x = self.relu1(x)
# [B, 6, 15, 15] -- [B, 16, 13, 13]
x = self.conv2(x)
# [B, 16, 13, 13] -- [B, 16, 6, 6]
x = self.pool2(x)
x = self.relu2(x)
# [B, 16, 6, 6] -- [B, 576]
x = self.flatten(x)
# [B, 576] -- [B, 256]
x = self.fc1(x)
# [B, 256] -- [B, 128]
x = self.fc2(x)
# [B, 128] -- [B, 10]
x = self.fc3(x)
return x
class LeNet1(nn.Module):
"""
定义一个LeNet
"""
def __init__(self):
super(LeNet1, self).__init__()
# 第 1 个阶段
self.stage1 = nn.Sequential(
nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=6),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
)
# 第 2 个阶段
self.stage2 = nn.Sequential(
nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(num_features=16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
)
# 分类器
self.classifier = nn.Sequential(
nn.Linear(in_features=1024, out_features=512),
nn.Dropout(0.2),
nn.Linear(in_features=512, out_features=128),
nn.Dropout(0.2),
nn.Linear(in_features=128, out_features=10)
)
def forward(self,x):
# [B, 1, 32, 32] -- [B, 6, 16, 16]
h = self.stage1(x)
# [B, 6, 16, 16] -- [B, 16, 8, 8]
h = self.stage2(h)
# 维度转换 [B, 16, 8, 8] -- [B, 1024]
h = h.view(h.size(0), -1)
# [B, 1024] -- [B, 10]
o = self.classifier(h)
return o
|
# 构建模型 lenet = LeNet1() # 参数搬家 lenet.to(device=device)
LeNet1(
(stage1): Sequential(
(0): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(stage2): Sequential(
(0): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU()
(3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(classifier): Sequential(
(0): Linear(in_features=1024, out_features=512, bias=True)
(1): Dropout(p=0.2, inplace=False)
(2): Linear(in_features=512, out_features=128, bias=True)
(3): Dropout(p=0.2, inplace=False)
(4): Linear(in_features=128, out_features=10, bias=True)
)
)
# 定义损失函数 loss_fn = nn.CrossEntropyLoss() # 定义优化器 optimizer = torch.optim.Adam(params=lenet.parameters(), lr=1e-3) 过程监控
def get_loss(dataloader, model=lenet, loss_fn=loss_fn):
"""
计算平均损失
"""
# 把模型设置为评估模式
model.eval()
losses = []
with torch.no_grad():
for X,y in dataloader:
X = X.to(device=device)
y = y.to(device=device)
y_pred = model(X)
loss = loss_fn(y_pred, y)
losses.append(loss.item())
return round(np.array(losses).mean(), ndigits=3)
def get_acc(dataloader, model=lenet):
"""
计算平均准确率
"""
# 把模型设置为评估模式
model.eval()
accs = []
with torch.no_grad():
for X, y in dataloader:
X = X.to(device=device)
y = y.to(device=device)
y_pred = model(X)
y_pred = y_pred.argmax(dim=-1)
acc = (y_pred == y).float().mean()
accs.append(acc.item())
return round(np.array(accs).mean(), ndigits=3)
训练
def train(dataloader=train_dataloader, model=lenet, loss_fn=loss_fn, optimizer=optimizer, epochs=2):
print(f"自然准确率:Train_Acc:{get_acc(dataloader=train_dataloader)}, Test_Acc: {get_acc(dataloader=test_dataloader)}")
# 循环训练epocs轮次
for epoch in range(1, epochs+1):
# 把模型设置为训练模型
model.train()
# 每轮都按批量训练
for X, y in dataloader:
X = X.to(device=device)
y = y.to(device=device)
# 正向传播
y_pred = model(X)
# 计算损失
loss = loss_fn(y_pred, y)
# 梯度清空
optimizer.zero_grad()
# 反向传播
loss.backward()
# 参数优化
optimizer.step()
print(f"Epoch: {epoch}, Train_Acc:{get_acc(dataloader=train_dataloader)}, Test_Acc: {get_acc(dataloader=test_dataloader)}")
train() 自然准确率:Train_Acc:0.104, Test_Acc: 0.104 Epoch: 1, Train_Acc:0.974, Test_Acc: 0.976 Epoch: 2, Train_Acc:0.976, Test_Acc: 0.975 |
|
保存 torch.save(obj=lenet.state_dict(), f="lenet_params.pkl") 加载权重 # 定义模型 my_lenet = LeNet1() # 加载权重 my_lenet.load_state_dict(state_dict=torch.load(f="lenet_params.pkl",weights_only=True), ) |
def predict(img_path, model=lenet):
"""
预测图像
"""
trans=transforms.Compose([
transforms.Grayscale(),
transforms.Resize(size=(32,32)),
transforms.ToTensor()
])
model.to(device=device)
# 模型设置为推断模式
model.eval()
# 读取图像
img = Image.open(fp=img_path)
img = torch.unsqueeze(input=trans(img), dim=0).to(device=device)
with torch.no_grad():
y_pred = model(img)
y_pred = y_pred.argmax(dim=-1)
return y_pred.item()
predict(img_path="/wks/datasets/MNIST/test/7/1001.jpeg",model=my_lenet) 7 |
|
|
## LeNet-5 网络结构
LeNet-5 由 Yann LeCun 于 1998 年提出,是最早的卷积神经网络之一。
**网络结构**:
```
输入(32×32×1) → C1:卷积(6@5×5) → S2:平均池化(2×2) →
C3:卷积(16@5×5) → S4:平均池化(2×2) →
C5:卷积(120@5×5) → F6:全连接(84) → 输出:全连接(10, softmax)
```
**各层详情**:
| 层 | 类型 | 参数 | 输出尺寸 | 参数量 |
|---|------|------|----------|--------|
| C1 | 卷积 | 6个5×5卷积核 | 28×28×6 | 156 |
| S2 | 平均池化 | 2×2窗口, stride=2 | 14×14×6 | 0 |
| C3 | 卷积 | 16个5×5卷积核 | 10×10×16 | 2,416 |
| S4 | 平均池化 | 2×2窗口, stride=2 | 5×5×16 | 0 |
| C5 | 卷积 | 120个5×5卷积核 | 1×1×120 | 48,120 |
| F6 | 全连接 | 84个神经元 | 84 | 10,164 |
| 输出 | 全连接 | 10个神经元 | 10 | 850 |
**总参数量**:约 60,000
|
|
## 参数量计算
**C1 层**:
$$Params = 5 \\times 5 \\times 1 \\times 6 + 6(bias) = 156$$
**C3 层**:
$$Params = 5 \\times 5 \\times 6 \\times 16 + 16(bias) = 2,416$$
**C5 层**:
$$Params = 5 \\times 5 \\times 16 \\times 120 + 120(bias) = 48,120$$
**F6 层**:
$$Params = 120 \\times 84 + 84(bias) = 10,164$$
**输出层**:
$$Params = 84 \\times 10 + 10(bias) = 850$$
**总参数量**:
$$Total = 156 + 2,416 + 48,120 + 10,164 + 850 = 61,706$$
|
|
## 完整代码实现
```python
import tensorflow as tf
from tensorflow.keras.datasets import mnist
# 1. 数据加载
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
# 2. 数据处理: N H W C
train_images = tf.reshape(train_images,
(train_images.shape[0], 28, 28, 1))
test_images = tf.reshape(test_images,
(test_images.shape[0], 28, 28, 1))
# 3. 模型构建 (LeNet-5)
net = tf.keras.models.Sequential([
# C1: 6个5×5卷积核
tf.keras.layers.Conv2D(
filters=6, kernel_size=5, activation='sigmoid',
input_shape=(28, 28, 1)),
# S2: 平均池化
tf.keras.layers.AveragePooling2D(pool_size=2, strides=2),
# C3: 16个5×5卷积核
tf.keras.layers.Conv2D(
filters=16, kernel_size=5, activation='sigmoid'),
# S4: 平均池化
tf.keras.layers.AveragePooling2D(pool_size=2, strides=2),
# C5: 120个5×5卷积核
tf.keras.layers.Conv2D(
filters=120, kernel_size=5, activation='sigmoid'),
# 展平
tf.keras.layers.Flatten(),
# F6: 84个神经元
tf.keras.layers.Dense(84, activation='sigmoid'),
# 输出: 10个神经元
tf.keras.layers.Dense(10, activation='softmax')
])
# 4. 编译
net.compile(
optimizer=tf.keras.optimizers.SGD(learning_rate=0.9),
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
# 5. 训练
net.fit(train_images, train_labels, epochs=5, validation_split=0.1)
# 6. 评估
score = net.evaluate(test_images, test_labels)
print('Test accuracy:', score[1]) # ~98%
```
|
|
## 现代改进版 LeNet
使用现代技术(ReLU、BatchNorm、Adam)改进的 LeNet:
```python
modern_net = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(32, 3, padding='same',
input_shape=(28, 28, 1)),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.ReLU(),
tf.keras.layers.MaxPool2D(2),
tf.keras.layers.Conv2D(64, 3, padding='same'),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.ReLU(),
tf.keras.layers.MaxPool2D(2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128),
tf.keras.layers.BatchNormalization(),
tf.keras.layers.ReLU(),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10, activation='softmax')
])
modern_net.compile(
optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy']
)
```
**预期准确率**:99%+(远高于原始 LeNet 的 97.8%)
**关键改进**:
- ReLU 替代 sigmoid
- BatchNorm 加速收敛
- Adam 优化器
- Dropout 防止过拟合
- MaxPool 替代 AveragePool
|