import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import torch
from torch import nn
# 加载乳腺癌数据集
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.DataFrame(data.target,columns=['target'])
X = torch.tensor(data.data).float()
y = torch.tensor(data.target).reshape(-1,1).float()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
"""
构建数据集
"""
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
from torch import nn
from torch.nn import functional as F
class MyDataSet(Dataset):
def __init__(self,X,y):
"""
构建数据集
"""
self.X = X
self.y = y.reshape(-1)
# self.y = y.reshape(-1,1)
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
x = self.X[idx]
y = self.y[idx]
return torch.tensor(data=x).float(), torch.tensor(data=y).long()
train_dataset = MyDataSet(X=X_train, y=y_train)
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128)
test_dataset = MyDataSet(X=X_test, y=y_test)
test_dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=256)
# 定义训练轮次
epochs = 200
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# 定义过程监控函数
def get_acc(dataloader=train_dataloader, model=None):
accs = []
model.to(device=device)
model.eval()
with torch.no_grad():
for X,y in dataloader:
X=X.to(device=device)
y=y.to(device=device)
y_pred = model(X)
y_pred = y_pred.argmax(dim=1)
acc = (y_pred == y).float().mean().item()
accs.append(acc)
return np.array(accs).mean()
# 定义训练过程
def train(model,
epochs=epochs,
train_dataloader=train_dataloader,
test_dataloader=test_dataloader):
model.to(device=device)
# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)
# 定义损失函数
loss_fn = nn.CrossEntropyLoss()
for epoch in range(1, epochs+1):
print(f"正在进行第 {epoch} 轮训练:")
model.train()
for X,y in train_dataloader:
X=X.to(device=device)
y=y.to(device=device)
# 正向传播
y_pred = model(X)
# 清空梯度
optimizer.zero_grad()
# 计算损失
loss = loss_fn(y_pred, y)
# 梯度下降
loss.backward()
# 优化一步
optimizer.step()
print(f"train_acc: {get_acc(dataloader=train_dataloader,model=model)}, test_acc: {get_acc(dataloader=test_dataloader,model=model)}")
|
from T import train
from T import X_test
import torch
from torch import nn
from torch.nn import functional as F
class TextCNNML(nn.Module):
"""
TextCNN优化,多尺度
"""
def __init__(self,in_features, out_features,L):
super().__init__()
# [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1]
self.gram_2 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=256, kernel_size=2),
nn.BatchNorm1d(num_features=256),
nn.ReLU(),
nn.MaxPool1d(kernel_size=L-1) # [N, C, 1]
)
# [N, C, seq_len, 1] -- [N, C, seq_len-2, 1]
self.gram_3 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=256, kernel_size=3),
nn.BatchNorm1d(num_features=256),
nn.ReLU(),
nn.MaxPool1d(kernel_size=L-2) # [N, C, 1]
)
# [N, C, seq_len, 1] -- [N, C, seq_len-3, 1]
self.gram_4 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=256, kernel_size=4),
nn.BatchNorm1d(num_features=256),
nn.ReLU(),
nn.MaxPool1d(kernel_size=L-3) # [N, C, 1]
)
self.dropout1 = nn.Dropout(p=0.2)
self.fc1 = nn.Linear(in_features=256*3, out_features=out_features)
def forward(self,X):
# [B, seq_len, embedding_dim] -- [B, embedding_dim, seq_len]
# x = torch.permute(input=X, dims=(0, 2, 1))
x=X.unsqueeze(dim=1)
# print(x.shape) # torch.Size([128, 256, 87])
x1 = self.gram_2(x)
# print(f"x1.shape={x1.shape}") #torch.Size([128, 256, 1])
x2 = self.gram_3(x)
x3 = self.gram_4(x)
x = torch.concat(tensors=(x1,x2,x3),dim=1)
# print(x.shape) # torch.Size([128, 768, 1])
x = torch.squeeze(x)
x = self.dropout1(x)
x = self.fc1(x)
return x
model = TextCNNML(in_features=1, out_features=2, L=X_test.shape[1])
#train_acc: 0.9159881174564362, test_acc: 0.9473684430122375 train(model) .... .... .... 正在进行第 195 轮训练: train_acc: 0.8776958584785461, test_acc: 0.9210526347160339 正在进行第 196 轮训练: train_acc: 0.9023162424564362, test_acc: 0.9473684430122375 正在进行第 197 轮训练: train_acc: 0.9089458584785461, test_acc: 0.9385964870452881 正在进行第 198 轮训练: train_acc: 0.9042693674564362, test_acc: 0.9385964870452881 正在进行第 199 轮训练: train_acc: 0.9077629745006561, test_acc: 0.9473684430122375 正在进行第 200 轮训练: train_acc: 0.9159881174564362, test_acc: 0.9473684430122375 |
|
|
|
|
|
|
from T import train from T import X_test
import torch
from torch import nn
from torch.nn import functional as F
class TextCNNML(nn.Module):
"""
TextCNN优化,多尺度+卷积
"""
def __init__(self,in_features, out_features,L):
super().__init__()
hidden_size = 256
# [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1]
self.gram_2 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=2),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1],
)
# [N, C, seq_len, 1] -- [N, C, seq_len-2, 1]
self.gram_3 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1]
)
# [N, C, seq_len, 1] -- [N, C, seq_len-3, 1]
self.gram_4 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=4),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1]
)
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels=256, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2)
)
self.dropout1 = nn.Dropout(p=0.2)
# 256×1×1
self.ft = nn.Flatten()
self.fc1 = nn.Linear(in_features=hidden_size*10, out_features=out_features)
def forward(self,X):
# [B, seq_len, embedding_dim] -- [B, embedding_dim, seq_len]
# x = torch.permute(input=X, dims=(0, 2, 1))
x=X.unsqueeze(dim=1)
# print(x.shape) # torch.Size([128, 256, 87])
x1 = self.gram_2(x)
# print(f"x1.shape={x1.shape}") #torch.Size([128, 256, 1])
x2 = self.gram_3(x)
x3 = self.gram_4(x)
x = torch.concat(tensors=(x1,x2,x3),dim=2)
# print(x.shape) # torch.Size([114, 256, 41])
x = self.conv1(x)
x = self.conv2(x)
x = self.dropout1(x)
# print(x.shape)
x = self.ft(x)
# print(x.shape)
x = self.fc1(x)
return x
model = TextCNNML(in_features=1, out_features=2, L=X_test.shape[1])
model(X_test)[:3]
tensor([[-0.2633, 0.0193],
[-0.3391, 0.9590],
[-0.4151, 0.3640]], grad_fn=SliceBackward0)
train(model) 收敛速度极快 train_acc: 0.37241417169570923, test_acc: 0.37719297409057617 正在进行第 2 轮训练: train_acc: 0.5244278162717819, test_acc: 0.5 正在进行第 3 轮训练: train_acc: 0.6502530723810196, test_acc: 0.7105263471603394 正在进行第 4 轮训练: train_acc: 0.8698833584785461, test_acc: 0.8947368264198303 正在进行第 5 轮训练: train_acc: 0.8687004745006561, test_acc: 0.8947368264198303 正在进行第 6 轮训练: train_acc: 0.907405361533165, test_acc: 0.9385964870452881 。。。。 之后稳定在94%多一些 。。。。 train_acc: 0.92536860704422, test_acc: 0.9473684430122375 正在进行第 194 轮训练: train_acc: 0.924983486533165, test_acc: 0.9298245906829834 正在进行第 195 轮训练: train_acc: 0.9198943674564362, test_acc: 0.9473684430122375 正在进行第 196 轮训练: train_acc: 0.9112841039896011, test_acc: 0.9649122953414917 正在进行第 197 轮训练: train_acc: 0.92264524102211, test_acc: 0.9473684430122375 正在进行第 198 轮训练: train_acc: 0.9144201129674911, test_acc: 0.9473684430122375 正在进行第 199 轮训练: train_acc: 0.921077236533165, test_acc: 0.9473684430122375 正在进行第 200 轮训练: train_acc: 0.9144201129674911, test_acc: 0.9473684430122375 。。。。 400轮达到98% 。。。。 正在进行第 379 轮训练: train_acc: 0.9206646084785461, test_acc: 0.9824561476707458 正在进行第 380 轮训练: train_acc: 0.9238006174564362, test_acc: 0.9824561476707458 正在进行第 381 轮训练: train_acc: 0.9238006174564362, test_acc: 0.9824561476707458 正在进行第 382 轮训练: train_acc: 0.9136223495006561, test_acc: 0.9736841917037964 正在进行第 383 轮训练: train_acc: 0.9202794879674911, test_acc: 0.9824561476707458 。。。。 再之后开始波动... 。。。。 rain_acc: 0.93045774102211, test_acc: 0.9824561476707458 正在进行第 989 轮训练: train_acc: 0.9175285995006561, test_acc: 0.9912281036376953 正在进行第 990 轮训练: train_acc: 0.928889736533165, test_acc: 0.9736841917037964 正在进行第 991 轮训练: train_acc: 0.94022336602211, test_acc: 0.9824561476707458 正在进行第 992 轮训练: train_acc: 0.9343364834785461, test_acc: 0.9824561476707458 正在进行第 993 轮训练: train_acc: 0.9030864834785461, test_acc: 0.9385964870452881 正在进行第 994 轮训练: train_acc: 0.92146235704422, test_acc: 0.9473684430122375 正在进行第 995 轮训练: train_acc: 0.9343364834785461, test_acc: 0.9824561476707458
|
|
|
|
|
|
|
|
from T import train
from T import X_test
import torch
from torch import nn
from torch.nn import functional as F
class SmallBlock1d(nn.Module):
"""1维模型
"""
def __init__(self, in_channel, hidden_size ):
"""简易1维模型
"""
super().__init__()
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels=256, out_channels=hidden_size*4, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size*4),
nn.Conv1d(in_channels=hidden_size*4, out_channels=hidden_size*4, kernel_size=3,stride=1,padding=1),
nn.Conv1d(in_channels=hidden_size*4, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.ReLU(),
)
def forward(self, x):
h1 = x.clone()
x = self.conv1(x)
o = h1 + x
return o
class TextCNNML(nn.Module):
"""
TextCNN优化,多尺度+卷积
"""
def __init__(self,in_features, out_features,L):
super().__init__()
hidden_size = 256
# [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1]
self.gram_2 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=2),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1],
)
# [N, C, seq_len, 1] -- [N, C, seq_len-2, 1]
self.gram_3 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1]
)
# [N, C, seq_len, 1] -- [N, C, seq_len-3, 1]
self.gram_4 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=4),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1]
)
self.s1 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s2 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s3 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s4 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s5 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s6 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s7 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s8 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s9 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s10 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s11 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s12 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s13 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s14 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s15 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s16 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s17 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s18 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s19 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s20 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s21 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s22 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s23 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s24 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s25 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s26 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s27 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s28 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s29 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s30 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s31 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s32 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s33 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s34 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s35 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s36 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s37 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s38 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s39 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s40 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s41 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s42 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s43 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s44 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s45 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s46 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s47 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s48 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s49 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s50 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s51 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s52 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s53 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s54 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s55 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s56 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s57 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s58 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s59 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.s60 = SmallBlock1d(in_channel=256, hidden_size=hidden_size)
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels=256, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2)
)
self.dropout1 = nn.Dropout(p=0.2)
# 256×1×1
self.ft = nn.Flatten()
self.fc1 = nn.Linear(in_features=hidden_size*10, out_features=out_features)
def forward(self,X):
# [B, seq_len, embedding_dim] -- [B, embedding_dim, seq_len]
# x = torch.permute(input=X, dims=(0, 2, 1))
x=X.unsqueeze(dim=1)
# print(x.shape) # torch.Size([128, 256, 87])
x1 = self.gram_2(x)
# print(f"x1.shape={x1.shape}") #torch.Size([128, 256, 1])
x2 = self.gram_3(x)
x3 = self.gram_4(x)
x = torch.concat(tensors=(x1,x2,x3),dim=2)
# print(x.shape) # torch.Size([114, 256, 41])
x = self.s1(x)
x = self.s2(x)
x = self.s3(x)
x = self.s4(x)
x = self.s5(x)
x = self.s6(x)
x = self.s7(x)
x = self.s8(x)
x = self.s9(x)
x = self.s10(x)
x = self.s11(x)
x = self.s12(x)
x = self.s13(x)
x = self.s14(x)
x = self.s15(x)
x = self.s16(x)
x = self.s17(x)
x = self.s18(x)
x = self.s19(x)
x = self.s20(x)
x = self.s21(x)
x = self.s22(x)
x = self.s23(x)
x = self.s24(x)
x = self.s25(x)
x = self.s26(x)
x = self.s27(x)
x = self.s28(x)
x = self.s29(x)
x = self.s30(x)
x = self.s31(x)
x = self.s32(x)
x = self.s33(x)
x = self.s34(x)
x = self.s35(x)
x = self.s36(x)
x = self.s37(x)
x = self.s38(x)
x = self.s39(x)
x = self.s40(x)
x = self.s41(x)
x = self.s42(x)
x = self.s43(x)
x = self.s44(x)
x = self.s45(x)
x = self.s46(x)
x = self.s47(x)
x = self.s48(x)
x = self.s49(x)
x = self.s50(x)
x = self.s51(x)
x = self.s52(x)
x = self.s53(x)
x = self.s54(x)
x = self.s55(x)
x = self.s56(x)
x = self.s57(x)
x = self.s58(x)
x = self.s59(x)
x = self.s60(x)
x = self.conv1(x)
x = self.conv2(x)
x = self.dropout1(x)
# print(x.shape)
x = self.ft(x)
# print(x.shape)
x = self.fc1(x)
return x
model = TextCNNML(in_features=1, out_features=2, L=X_test.shape[1])
model(X_test)[:3]
tensor([[-0.3637, -0.4065],
[ 0.2695, 1.0281],
[ 0.3592, 0.0745]], grad_fn=SliceBackward0)
|
train(model)
之前有明显的欠拟合现象,
将卷积的部分改为hidden_size -- hidden_size*4 -- hidden_size之后,
可能是由于网络结构变复杂了,欠拟合的现象有所改善
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels=256, out_channels=hidden_size*4, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size*4),
nn.Conv1d(in_channels=hidden_size*4, out_channels=hidden_size*4, kernel_size=3,stride=1,padding=1),
nn.Conv1d(in_channels=hidden_size*4, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.ReLU(),
)
正在进行第 236 轮训练:
train_acc: 0.939838245511055, test_acc: 0.9736841917037964
正在进行第 237 轮训练:
train_acc: 0.94803586602211, test_acc: 0.9649122953414917
正在进行第 238 轮训练:
train_acc: 0.9413787424564362, test_acc: 0.9561403393745422
正在进行第 239 轮训练:
train_acc: 0.94099360704422, test_acc: 0.9736841917037964
......
稳定性也稍微好了一点
......
正在进行第 266 轮训练:
train_acc: 0.9308153539896011, test_acc: 0.9736841917037964
正在进行第 267 轮训练:
train_acc: 0.94608274102211, test_acc: 0.9649122953414917
正在进行第 268 轮训练:
train_acc: 0.951556995511055, test_acc: 0.9649122953414917
最终稳定了在98%,但还有点欠拟合
训练集上中间还是出现过95%的精度的,可能是网络复杂后,需要训练的轮次变得多了
- 估计得几千轮...
正在进行第 492 轮训练:
train_acc: 0.928889736533165, test_acc: 0.9824561476707458
正在进行第 493 轮训练:
train_acc: 0.92341548204422, test_acc: 0.9824561476707458
正在进行第 494 轮训练:
train_acc: 0.9183263629674911, test_acc: 0.9824561476707458
正在进行第 495 轮训练:
train_acc: 0.9194817245006561, test_acc: 0.9824561476707458
正在进行第 496 轮训练:
train_acc: 0.9167583584785461, test_acc: 0.9824561476707458
正在进行第 497 轮训练:
train_acc: 0.923030361533165, test_acc: 0.9385964870452881
正在进行第 498 轮训练:
train_acc: 0.91950923204422, test_acc: 0.9824561476707458
正在进行第 499 轮训练:
train_acc: 0.9222326129674911, test_acc: 0.9824561476707458
正在进行第 500 轮训练:
train_acc: 0.9198943674564362, test_acc: 0.9824561476707458
|
from T import train from T import X_test
import torch
from torch import nn
from tpf.mlib.seq import ShortBlock1d
class TextCNNML(nn.Module):
"""
TextCNN优化,多尺度+卷积
"""
def __init__(self,in_features, out_features,L):
super().__init__()
hidden_size = 256
# [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1]
self.gram_2 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=2),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1],
)
# [N, C, seq_len, 1] -- [N, C, seq_len-2, 1]
self.gram_3 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1]
)
# [N, C, seq_len, 1] -- [N, C, seq_len-3, 1]
self.gram_4 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=4),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1]
)
self.short = ShortBlock1d(in_channel=hidden_size, hidden_size=hidden_size)
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2)
)
self.dropout1 = nn.Dropout(p=0.2)
# in_channel×1×1
self.ft = nn.Flatten()
self.fc1 = nn.Linear(in_features=hidden_size*10, out_features=out_features)
def forward(self,X):
# [B, seq_len, embedding_dim] -- [B, embedding_dim, seq_len]
# x = torch.permute(input=X, dims=(0, 2, 1))
x=X.unsqueeze(dim=1)
# print(x.shape) # torch.Size([128, in_channel, 87])
x1 = self.gram_2(x)
# print(f"x1.shape={x1.shape}") #torch.Size([128, in_channel, 1])
x2 = self.gram_3(x)
x3 = self.gram_4(x)
x = torch.concat(tensors=(x1,x2,x3),dim=2)
# print(x.shape) # torch.Size([114, in_channel, 41])
x = self.short(x)
x = self.conv1(x)
x = self.conv2(x)
x = self.dropout1(x)
# print(x.shape)
x = self.ft(x)
# print(x.shape)
x = self.fc1(x)
return x
model = TextCNNML(in_features=1, out_features=2, L=X_test.shape[1])
model(X_test)[:3]
tensor([[-0.0809, -0.0281],
[-0.4714, -0.6860],
[-0.3987, -0.4960]], grad_fn=SliceBackward0)
|
|
|
|
|
这里数据为交易数据
k=2,3,4相当于合并相邻的2个特征,3个特征,4个特征
但没有区分出主次
于是加入注意力,注意力机制会对特征进行打分,将特征的重要性分别开来
注意力就是要从整体的角度去划分局部的重要性
- 合而为1是整体
- 分开之后,层次分明
|
import torch
from torch import nn
from torch.nn import functional as F
class TextCNNML(nn.Module):
"""
TextCNN优化,多尺度+卷积
"""
def __init__(self,in_features, out_features,seq_len):
super().__init__()
hidden_size = 256
# [N, C, seq_len] -- [N, C, seq_len-1],[N, C, seq_len-kernel_size+1]
self.gram_2 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=2,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1],
)
# [N, C, seq_len, 1] -- [N, C, seq_len-2, 1]
self.gram_3 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1]
)
# [N, C, seq_len, 1] -- [N, C, seq_len-3, 1]
self.gram_4 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=4,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2) # [N, C, 1]
)
self.conv1 = nn.Sequential(
nn.Conv1d(in_channels=256, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2)
)
self.conv2 = nn.Sequential(
nn.Conv1d(in_channels=hidden_size, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
nn.BatchNorm1d(num_features=hidden_size),
nn.ReLU(),
nn.MaxPool1d(kernel_size=2)
)
self.conv3 = nn.Sequential(
nn.Conv1d(in_channels=in_features, out_channels=hidden_size, kernel_size=3,stride=1,padding=1),
)
self.dropout1 = nn.Dropout(p=0.2)
# 256×1×1
self.ft = nn.Flatten()
ll = ((seq_len+2*1)-2+1)//2+((seq_len+2*1)-3+1)//2+((seq_len+2*1)-4+1)//2 +seq_len
ll=(ll//4)*256
print(ll)
self.fc1 = nn.Linear(in_features=ll, out_features=hidden_size)
self.dropout2 = nn.Dropout(p=0.5)
self.fc2 = nn.Linear(in_features=hidden_size, out_features=out_features)
def attention(self, Q, K, V, mask=None, multihead=False):
"""序列数据注意力计算函数
- [batch_size,seq_len,embedding_dim],embedding_dim是要变换的维度
- 变换的是特征维度,特征维度放在最后的一个维度上
- 序列的维度放在倒数第2维上
- embedding_dim:大于0表示使用多头注意力,其值是数据原来的维度,即embedding_dim,多头合并为embedding_dim
params
-----------------------------------
- seq_len:序列特征个数,[B,L,C]中的L
Q (_type_): _description_
K (_type_): _description_
V (_type_): _description_
mask (_type_): _description_
"""
# b句话,每句话50个词,每个词编码成32维向量,4个头,每个头分到8维向量
# Q,K,V = [b, 4, 50, 8]
n_shape = len(Q.shape)
# [b, 4, 50, 8] * [b, 4, 8, 50] -> [b, 4, 50, 50]
# Q,K矩阵相乘,求每个词相对其他所有词的注意力
if n_shape == 4: #embedding被拆分,多头注意力
seq_len = Q.shape[2]
score = torch.matmul(Q, K.permute(0, 1, 3, 2))
elif n_shape == 3:
seq_len = Q.shape[1]
score = torch.matmul(Q, K.permute(0, 2, 1))
else:
raise Exception(f"only 3 or 4 dim,now is {n_shape}")
# 除以每个头维数的平方根,做数值缩放
k = Q.shape[-1]
score /= k ** 0.5
# mask 遮盖,mask是true的地方都被替换成-inf,这样在计算softmax的时候,-inf会被压缩到0
# mask = [b, 1, seq_len, seq_len]
if mask is not None:
score = score.masked_fill_(mask, -float('inf'))
score = torch.softmax(score, dim=-1)
# 以注意力分数乘以V,得到最终的注意力结果
# [b, 4, 50, 50] * [b, 4, 50, 8] -> [b, 4, 50, 8]
score = torch.matmul(score, V)
# 每个头计算的结果合一
# [b, 4, 50, 8] -> [b, 50, 32]
if multihead :
head_n = Q.shape[1]
k = Q.shape[-1]
embedding_dim = head_n*k
score = score.permute(0, 2, 1, 3).reshape(-1, seq_len, embedding_dim)
return score
def forward(self,X):
# [B, seq_len] -- [B, 1, seq_len]
seq_len = X.shape[1]
x=X.unsqueeze(dim=1)
x1 = self.gram_2(x)
# print(f"x1.shape={x1.shape}") #torch.Size([128, 256, 1])
x2 = self.gram_3(x)
x3 = self.gram_4(x)
x4 = self.attention(x,x,x)
x4 = self.conv3(x4)
# print('x3',x3.shape,'x4',x4.shape)
x = torch.concat(tensors=(x1,x2,x3,x4),dim=2)
# print(x1.shape,x2.shape,x3.shape,x.shape) # torch.Size([114, 256, 71])
x = self.conv1(x)
x = self.conv2(x)
x = self.dropout1(x)
# print(x.shape)
x = self.ft(x)
# print(x.shape)
x = self.fc1(x)
x = self.dropout2(x)
x = self.fc2(x)
return x
model = TextCNNML(in_features=1, out_features=2, seq_len=X_test.shape[1])
model(X_test)[:3]
|
|
30000轮训练,每轮的速度很快 train(model) 这是首次深度学习中训练集上精度稳定出现96% 正在进行第 14361 轮训练: train_acc: 0.95975461602211, test_acc: 0.9649122953414917 正在进行第 14371 轮训练: train_acc: 0.969135120511055, test_acc: 0.9736841917037964 正在进行第 14381 轮训练: train_acc: 0.964045986533165, test_acc: 0.9736841917037964 正在进行第 14391 轮训练: train_acc: 0.9554357379674911, test_acc: 0.9649122953414917 正在进行第 14401 轮训练: train_acc: 0.965228870511055, test_acc: 0.9649122953414917 正在进行第 14411 轮训练: train_acc: 0.964045986533165, test_acc: 0.9649122953414917 正在进行第 14421 轮训练: train_acc: 0.962092861533165, test_acc: 0.9561403393745422 偶尔会降一下 正在进行第 14701 轮训练: train_acc: 0.9550506174564362, test_acc: 0.9561403393745422 正在进行第 14711 轮训练: train_acc: 0.9050396084785461, test_acc: 0.9298245906829834 正在进行第 14721 轮训练: train_acc: 0.966796875, test_acc: 0.9736841917037964 正在进行第 14731 轮训练: train_acc: 0.9589568674564362, test_acc: 0.9649122953414917 正在进行第 14741 轮训练: train_acc: 0.962092861533165, test_acc: 0.9649122953414917 正在进行第 14751 轮训练: train_acc: 0.96366086602211, test_acc: 0.9649122953414917 。。。。 。。。。 。。。。 正在进行第 16111 轮训练: train_acc: 0.961322620511055, test_acc: 0.9824561476707458 正在进行第 16121 轮训练: train_acc: 0.97147336602211, test_acc: 0.9736841917037964 正在进行第 16131 轮训练: train_acc: 0.9179412424564362, test_acc: 0.9298245906829834 正在进行第 16141 轮训练: train_acc: 0.960139736533165, test_acc: 0.9824561476707458 正在进行第 16151 轮训练: train_acc: 0.94489985704422, test_acc: 0.9473684430122375 正在进行第 16161 轮训练: train_acc: 0.9511443674564362, test_acc: 0.9561403393745422 偶尔训练集到达97%,98% 正在进行第 20081 轮训练: train_acc: 0.962092861533165, test_acc: 0.9649122953414917 正在进行第 20091 轮训练: train_acc: 0.97265625, test_acc: 0.9736841917037964 正在进行第 20101 轮训练: train_acc: 0.9558208584785461, test_acc: 0.9649122953414917 正在进行第 20111 轮训练: train_acc: 0.964045986533165, test_acc: 0.9649122953414917 。。。。 。。。。 。。。。 正在进行第 26851 轮训练: train_acc: 0.973041370511055, test_acc: 0.9649122953414917 正在进行第 26861 轮训练: train_acc: 0.96443110704422, test_acc: 0.9649122953414917 正在进行第 26871 轮训练: train_acc: 0.958186611533165, test_acc: 0.9649122953414917 正在进行第 26881 轮训练: train_acc: 0.98046875, test_acc: 0.9736841917037964 正在进行第 26891 轮训练: train_acc: 0.967181995511055, test_acc: 0.9736841917037964 最终到30000轮时,感觉这一次波动还没有缓冲过来, - 猜测增加到50000轮,精度还能升一些 - 欠拟合问题基本消失 - 30000轮训练,消耗时间大概20多分钟 正在进行第 29911 轮训练: train_acc: 0.96952024102211, test_acc: 0.9649122953414917 正在进行第 29921 轮训练: train_acc: 0.97537961602211, test_acc: 0.9561403393745422 正在进行第 29931 轮训练: train_acc: 0.967952236533165, test_acc: 0.9649122953414917 正在进行第 29941 轮训练: train_acc: 0.965999111533165, test_acc: 0.9736841917037964 正在进行第 29951 轮训练: train_acc: 0.97029048204422, test_acc: 0.9561403393745422 正在进行第 29961 轮训练: train_acc: 0.96443110704422, test_acc: 0.9824561476707458 正在进行第 29971 轮训练: train_acc: 0.97147336602211, test_acc: 0.9561403393745422 正在进行第 29981 轮训练: train_acc: 0.954280361533165, test_acc: 0.9473684430122375 正在进行第 29991 轮训练: train_acc: 0.96247798204422, test_acc: 0.9649122953414917 |
|
|
|
|
如何在Python中测量程序运行时间