Understanding LSTM Networks http://colah.github.io/posts/2015-08-Understanding-LSTMs/
|
RNN的思想很好,
- 一件事有过去,现在,未来 三个阶段,RNN卡的是当下
但问题是当下发生的事情太多了,它并不能很好的 从全局(过去,现在,未来)的视角去确定一件事
- 哪个当下对于整体是重要的
- 整体的关键节点是哪些
后来的注意力解决这个问题 - 重要信息 能否将RNN的结果与注意整合使用呢?这样效果会不会更好? |
class TextRNN1(nn.Module):
"""
基于基础RNN结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx):
super(TextRNN1, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
# [b, 86] -- [b, 86, 256]
x = self.embed(x)
h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
# [b, 86, 256] -- [86, b, 256]
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.rnn(x, h0)
# hn = torch.squeeze(input=hn, dim=0)
hn = torch.sum(input=hn, dim=0)
out = self.fc(hn)
return out
class TextRNN2(nn.Module):
"""
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx):
super(TextRNN2, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn = nn.RNN(input_size=256, hidden_size=512)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
# [b, 86] -- [b, 86, 256]
x = self.embed(x)
h0 = torch.zeros(1, x.size(0), 512).float()
# [b, 86, 256] -- [86, b, 256]
x = torch.permute(input=x, dims=(1, 0, 2))
# out: [86, b, 512]
out, hn = self.rnn(x, h0)
# hn = torch.squeeze(input=hn, dim=0)
out = torch.sum(input=out, dim=0)
out = self.fc(out)
return out
|
class TextRNN3(nn.Module):
"""
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx):
super(TextRNN3, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.lstm = nn.LSTM(input_size=256, hidden_size=512,
num_layers=2,
bidirectional=True)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
# [b, 86] -- [b, 86, 256]
x = self.embed(x)
# [b, 86, 256] -- [86, b, 256]
x = torch.permute(input=x, dims=(1, 0, 2))
# [1, b, 512]
h0 = torch.zeros(4, x.size(1), 512).float().to(device=device)
c0 = torch.zeros(4, x.size(1), 512).float().to(device=device)
out, (hn, cn) = self.lstm(x, (h0, c0))
# out = torch.sum(input=out, dim=0)
# cn = torch.squeeze(input=cn, dim=0)
cn = torch.sum(input=cn, dim=0)
# hn = hn[-1]
out = self.fc(cn)
return out
|
class TextRNN4(nn.Module):
"""
基于 GRU 结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx):
super(TextRNN4, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.gru = nn.GRU(input_size=256, hidden_size=512, bidirectional=True)
self.fc1 = nn.Linear(in_features=512, out_features=128)
self.fc2 = nn.Linear(in_features=128, out_features=2)
def forward(self, x):
x = self.embed(x)
h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.gru(x, h0)
# hn = torch.squeeze(input=hn, dim=0)
hn = torch.sum(input=hn, dim=0)
out = self.fc1(hn)
out = F.relu(out)
out = self.fc2(out)
return out
|
import os
from tpf import pkl_save,pkl_load
BASE_DIR = "/root/datasets/hotel_reader"
file_path = os.path.join(BASE_DIR,'data_pkl/word.pkl')
X_train,y_train,X_test,y_test,words_set,word2idx,idx2word = pkl_load(file_path)
# 字典长度
dict_len = len(words_set)
# 序列长度
seq_len = 512
# 训练集
X_train1 = []
for x in X_train:
temp = x + ["<PAD>"] * seq_len
X_train1.append(temp[:seq_len])
# 测试集
X_test1 = []
for x in X_test:
temp = x + ["<PAD>"] * seq_len
X_test1.append(temp[:seq_len])
"""
索引向量化
"""
# 训练集向量化
X_train2 = []
for x in X_train1:
temp = []
for word in x:
idx = word2idx[word] if word in word2idx else word2idx["<UNK>"]
temp.append(idx)
X_train2.append(temp)
# 测试集向量化
X_test2 = []
for x in X_test1:
temp = []
for word in x:
idx = word2idx[word] if word in word2idx else word2idx["<UNK>"]
temp.append(idx)
X_test2.append(temp)
"""
构建数据集
"""
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
from torch import nn
from torch.nn import functional as F
class MyDataSet(Dataset):
def __init__(self, X=X_train2, y=y_train):
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
x = self.X[idx]
y = self.y[idx]
return torch.tensor(data=x).long(), torch.tensor(data=y).long()
train_dataset = MyDataSet(X=X_train2, y=y_train)
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128)
test_dataset = MyDataSet(X=X_test2, y=y_test)
test_dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=256)
train_dataset[0][0][:7]
tensor([ 5321, 706, 20311, 14111, 1036, 823, 14111])
class TextRNN1(nn.Module):
"""
基于基础RNN结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx):
super(TextRNN1, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
# [b, 86] -- [b, 86, 256]
x = self.embed(x)
h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
# [b, 86, 256] -- [86, b, 256]
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.rnn(x, h0)
# hn = torch.squeeze(input=hn, dim=0)
hn = torch.sum(input=hn, dim=0)
out = self.fc(hn)
return out
model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx["
# 定义训练轮次
epochs = 200
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# 定义过程监控函数
def get_acc(dataloader=train_dataloader, model=model):
accs = []
model.to(device=device)
model.eval()
with torch.no_grad():
for X,y in dataloader:
X=X.to(device=device)
y=y.to(device=device)
y_pred = model(X)
y_pred = y_pred.argmax(dim=1)
acc = (y_pred == y).float().mean().item()
accs.append(acc)
return np.array(accs).mean()
# 定义训练过程
def train(model=model,
optimizer=optimizer,
loss_fn=loss_fn,
epochs=epochs,
train_dataloader=train_dataloader,
test_dataloader=test_dataloader):
model.to(device=device)
for epoch in range(1, epochs+1):
print(f"正在进行第 {epoch} 轮训练:")
model.train()
for X,y in train_dataloader:
X=X.to(device=device)
y=y.to(device=device)
# 正向传播
y_pred = model(X)
# 清空梯度
optimizer.zero_grad()
# 计算损失
loss = loss_fn(y_pred, y)
# 梯度下降
loss.backward()
# 优化一步
optimizer.step()
print(f"train_acc: {get_acc(dataloader=train_dataloader)}, test_acc: {get_acc(dataloader=test_dataloader)}")
train()
到200轮才76%的精度,并且增长也非常慢了,模型表达能力有限
正在进行第 194 轮训练:
train_acc: 0.8244243421052632, test_acc: 0.7652698874473571
正在进行第 195 轮训练:
train_acc: 0.8225740131578947, test_acc: 0.7633522748947144
正在进行第 196 轮训练:
train_acc: 0.8270970394736842, test_acc: 0.7621448874473572
正在进行第 197 轮训练:
train_acc: 0.8283305921052632, test_acc: 0.7641335248947143
正在进行第 198 轮训练:
train_acc: 0.8279194078947368, test_acc: 0.7641335248947143
正在进行第 199 轮训练:
train_acc: 0.8279194078947368, test_acc: 0.7644886374473572
正在进行第 200 轮训练:
train_acc: 0.8256578947368421, test_acc: 0.7644886374473572
|
from datasets import train_dataloader
from datasets import test_dataloader
from datasets import words_set,word2idx,idx2word
from T import train
import torch
from torch import nn
class TextRNN1(nn.Module):
"""
基于基础RNN结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx):
super(TextRNN1, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn = nn.RNN(input_size=256, hidden_size=512, bidirectional=True)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
# [b, 86] -- [b, 86, 256]
x = self.embed(x)
h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
# [b, 86, 256] -- [86, b, 256]
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.rnn(x, h0)
# hn = torch.squeeze(input=hn, dim=0)
hn = torch.sum(input=hn, dim=0)
out = self.fc(hn)
return out
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# 字典长度
dict_len = len(words_set)
model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx["<PAD>"])
model.to(device=device)
# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)
# 定义损失函数
loss_fn = nn.CrossEntropyLoss()
train(model)
|
from T import words_set,word2idx,idx2word,dict_len,seq_len from T import train
import torch
from torch import nn
device = "cuda:0" if torch.cuda.is_available() else "cpu"
class SingleRNNDefine(nn.Module):
def __init__(self,input_size,hidden_size):
super().__init__()
# [batch_size,embedding_dim]@[embedding_dim,hidden_size] = [batch_size,hidden_size]
self.cell_linear_x = nn.Linear(in_features=input_size, out_features=hidden_size)
self.cell_linear_h = nn.Linear(in_features=hidden_size, out_features=hidden_size)
def forward(self,x,h0):
seq_len, batch_size, embedding = x.shape
output = []
ht = h0[0] # [1,batch_size,embedding]
for t in range(seq_len):
# print(f"x[{t}].shape={x[t].shape}") # x[86].shape=torch.Size([32, 256])
# [batch_size,embedding] -- [batch_size,hidden_size]
# 对于每一个时间步来说,不需要管seq_len的维度,因为一步一个单词
each_word = self.cell_linear_x(x[t])
# print(f"t={t},each_word.shape={each_word.shape}")
# print(f"t={t},ht.shape={ht.shape}")
ht = self.cell_linear_h(ht)
ht = torch.tanh(each_word + ht)
# print(ht.shape) # torch.Size([32, 512])
output.append(ht.tolist())
hn = torch.unsqueeze(input=ht,dim=0)
# print(hn.shape) # torch.Size([1, 32, 512])
output = torch.Tensor(output)
return output,hn
class TextRNN1(nn.Module):
"""
基于基础RNN结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx):
super(TextRNN1, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn = SingleRNNDefine(input_size=256, hidden_size=512)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
# [b, 86] -- [b, 86, 256]
x = self.embed(x)
h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
# [b, 86, 256] -- [86, b, 256]
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.rnn(x, h0)
# hn = torch.squeeze(input=hn, dim=0)
hn = torch.sum(input=hn, dim=0)
out = self.fc(hn)
return out
model = TextRNN1(num_embeddings=dict_len, embedding_dim=256, padding_idx=word2idx["
代码可以运行 a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device) # model(a) 但训练时不仅慢,而且输出结果不会变 train(model) 正在进行第 1 轮训练: train_acc: 0.4967105263157895, test_acc: 0.4947443246841431 正在进行第 2 轮训练: train_acc: 0.4995888157894737, test_acc: 0.4947443246841431 正在进行第 3 轮训练: train_acc: 0.49773848684210525, test_acc: 0.4947443246841431 正在进行第 4 轮训练: train_acc: 0.4985608552631579, test_acc: 0.4947443246841431 正在进行第 5 轮训练: train_acc: 0.4981496710526316, test_acc: 0.4947443246841431 正在进行第 6 轮训练: train_acc: 0.4985608552631579, test_acc: 0.4947443246841431 正在进行第 7 轮训练: train_acc: 0.4981496710526316, test_acc: 0.4947443246841431 正在进行第 8 轮训练: train_acc: 0.4979440789473684, test_acc: 0.4947443246841431 正在进行第 9 轮训练: train_acc: 0.4975328947368421, test_acc: 0.4947443246841431 正在进行第 10 轮训练: train_acc: 0.4967105263157895, test_acc: 0.4947443246841431 正在进行第 11 轮训练: train_acc: 0.49897203947368424, test_acc: 0.4947443246841431 正在进行第 12 轮训练: train_acc: 0.49712171052631576, test_acc: 0.4947443246841431 正在进行第 13 轮训练: train_acc: 0.49835526315789475, test_acc: 0.4947443246841431 正在进行第 14 轮训练: train_acc: 0.4995888157894737, test_acc: 0.4947443246841431 正在进行第 15 轮训练: train_acc: 0.4987664473684211, test_acc: 0.4947443246841431 上次在1维卷积中使用BN就遇到了这样的情况 ... |
import os
from tpf import pkl_save,pkl_load
BASE_DIR = "/root/datasets/hotel_reader"
file_path = os.path.join(BASE_DIR,'data_pkl/word.pkl')
X_train,y_train,X_test,y_test,words_set,word2idx,idx2word = pkl_load(file_path)
# 字典长度
dict_len = len(words_set)
# 序列长度
seq_len = 512
# 训练集
X_train1 = []
for x in X_train:
temp = x + ["<PAD>"] * seq_len
X_train1.append(temp[:seq_len])
# 测试集
X_test1 = []
for x in X_test:
temp = x + ["<PAD>"] * seq_len
X_test1.append(temp[:seq_len])
"""
索引向量化
"""
# 训练集向量化
X_train2 = []
for x in X_train1:
temp = []
for word in x:
idx = word2idx[word] if word in word2idx else word2idx["<UNK>"]
temp.append(idx)
X_train2.append(temp)
# 测试集向量化
X_test2 = []
for x in X_test1:
temp = []
for word in x:
idx = word2idx[word] if word in word2idx else word2idx["<UNK>"]
temp.append(idx)
X_test2.append(temp)
"""
构建数据集
"""
import numpy as np
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch
from torch import nn
from torch.nn import functional as F
class MyDataSet(Dataset):
def __init__(self, X=X_train2, y=y_train):
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
x = self.X[idx]
y = self.y[idx]
return torch.tensor(data=x).long(), torch.tensor(data=y).long()
train_dataset = MyDataSet(X=X_train2, y=y_train)
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128)
test_dataset = MyDataSet(X=X_test2, y=y_test)
test_dataloader = DataLoader(dataset=test_dataset, shuffle=False, batch_size=256)
# 定义训练轮次
epochs = 200
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# 定义过程监控函数
def get_acc(dataloader=train_dataloader, model=None):
accs = []
model.to(device=device)
model.eval()
with torch.no_grad():
for X,y in dataloader:
X=X.to(device=device)
y=y.to(device=device)
y_pred = model(X)
y_pred = y_pred.argmax(dim=1)
acc = (y_pred == y).float().mean().item()
accs.append(acc)
return np.array(accs).mean()
# 定义训练过程
def train(model,
epochs=epochs,
train_dataloader=train_dataloader,
test_dataloader=test_dataloader):
model.to(device=device)
# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)
# 定义损失函数
loss_fn = nn.CrossEntropyLoss()
for epoch in range(1, epochs+1):
print(f"正在进行第 {epoch} 轮训练:")
model.train()
for X,y in train_dataloader:
X=X.to(device=device)
y=y.to(device=device)
# 正向传播
y_pred = model(X)
# 清空梯度
optimizer.zero_grad()
# 计算损失
loss = loss_fn(y_pred, y)
# 梯度下降
loss.backward()
# 优化一步
optimizer.step()
print(f"train_acc: {get_acc(dataloader=train_dataloader,model=model)}, test_acc: {get_acc(dataloader=test_dataloader,model=model)}")
|
|
|
RNN的思想:数据 在时间上的 展开/变化
但问题是没有重点
优化方案1:设置参数学习谁重要
hn = torch.sum(input=out, dim=0)
原来的做法是,直接求和/平均值
现在改为可学习参数
该方案有效但训练波动大
优化方法2:相同的方法,增加并行
单条链上RNN表达能力不足,再来一条相同的
效果上没有什么增益
舍弃
优化3:取一个自注意力,与RNN输出信息融合
有效 ,并且收敛速度也快了一些
|
from T import words_set,word2idx,idx2word,dict_len,seq_len,device from T import train
class TextRNN1(nn.Module):
"""
基于基础RNN结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len):
super(TextRNN1, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn = nn.RNN(input_size=512, hidden_size=512)
self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
x = self.embed(x)
h0 = torch.zeros(1, x.size(0), 512).float().to(device=device)
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.rnn(x, h0)
out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
out = out@self.w
# print(1,out.shape)
out = torch.squeeze(input=out, dim=2)
# print(2,out.shape)
# hn = torch.sum(input=out, dim=0)
out = self.fc(out)
return out
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx["
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device) # model(a) a = torch.randn(64,3,7) w = torch.randn(7,1) (a@w).shape #torch.Size([64, 3, 1]) 可能是由于网络过于简单,导致训练过程波动较大 正在进行第 1 轮训练: train_acc: 0.6476151315789473, test_acc: 0.6496448874473572 正在进行第 2 轮训练: train_acc: 0.7886513157894737, test_acc: 0.7704545497894287 正在进行第 3 轮训练: train_acc: 0.6231496710526315, test_acc: 0.61796875 正在进行第 4 轮训练: train_acc: 0.8011924342105263, test_acc: 0.7922585248947144 正在进行第 5 轮训练: train_acc: 0.7518503289473685, test_acc: 0.7339488625526428 正在进行第 6 轮训练: train_acc: 0.819078947368421, test_acc: 0.803125 正在进行第 7 轮训练: train_acc: 0.75390625, test_acc: 0.7557528495788575 正在进行第 8 轮训练: train_acc: 0.7999588815789473, test_acc: 0.8031960248947143 。。。 。。。 。。。 train_acc: 0.743421052631579, test_acc: 0.7326704621315002 正在进行第 197 轮训练: train_acc: 0.8887746710526315, test_acc: 0.8418323874473572 正在进行第 198 轮训练: train_acc: 0.7909128289473685, test_acc: 0.7748579621315003 正在进行第 199 轮训练: train_acc: 0.8871299342105263, test_acc: 0.8360795497894287 正在进行第 200 轮训练: train_acc: 0.8838404605263158, test_acc: 0.8465909123420715 |
import torch from torch import nn from T import words_set,word2idx,idx2word,dict_len,seq_len,device from T import train
class TextRNN1(nn.Module):
"""
基于基础RNN结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len):
super(TextRNN1, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn = nn.RNN(input_size=512, hidden_size=512)
self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
x = self.embed(x)
x1 = torch.permute(input=x, dims=(0, 2, 1))
x2 = torch.bmm(x,x1)
x2 = torch.mean(x2,2)
a = torch.softmax(x2,1)
# print('a:',a.shape)
x2 = x2*a
h0 = torch.zeros(1, x.size(0), 512).float().to(device=device)
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.rnn(x, h0)
out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
out = out@self.w
# print(1,out.shape)
out = torch.squeeze(input=out, dim=2)
# print(2,out.shape)
# hn = torch.sum(input=out, dim=0)
out = out+x2
# print(1,out.shape)
out = self.fc(out)
return out
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx["
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
model(a)[:3]
tensor([[ -6.5141, 2.8470],
[-15.3093, 3.4901],
[ -2.6264, 1.9446]], device='cuda:0', grad_fn=SliceBackward0)
融合注意力,高大上了很多,然...实际效果与线性参数学习效果一样... train(model) 正在进行第 1 轮训练: train_acc: 0.6227384868421053, test_acc: 0.6295454621315002 正在进行第 2 轮训练: train_acc: 0.6437088815789473, test_acc: 0.6204545497894287 正在进行第 3 轮训练: train_acc: 0.6494654605263158, test_acc: 0.6527698874473572 正在进行第 4 轮训练: train_acc: 0.6669407894736842, test_acc: 0.6718039870262146 正在进行第 5 轮训练: train_acc: 0.63671875, test_acc: 0.6379971623420715 正在进行第 6 轮训练: train_acc: 0.80078125, test_acc: 0.7698863744735718 正在进行第 7 轮训练: train_acc: 0.6348684210526315, test_acc: 0.6235795497894288 正在进行第 8 轮训练: train_acc: 0.7890625, test_acc: 0.7745028495788574 .... .... .... 正在进行第 193 轮训练: train_acc: 0.8005756578947368, test_acc: 0.7829545497894287 正在进行第 194 轮训练: train_acc: 0.8118832236842105, test_acc: 0.7953835248947143 正在进行第 195 轮训练: train_acc: 0.8807565789473685, test_acc: 0.8306107997894288 正在进行第 196 轮训练: train_acc: 0.88671875, test_acc: 0.8410511374473572 正在进行第 197 轮训练: train_acc: 0.8614309210526315, test_acc: 0.8410511374473572 正在进行第 198 轮训练: train_acc: 0.8754111842105263, test_acc: 0.8321022748947143 正在进行第 199 轮训练: train_acc: 0.8817845394736842, test_acc: 0.8282670497894287 正在进行第 200 轮训练: train_acc: 0.889391447368421, test_acc: 0.8495738744735718 |
from T import words_set,word2idx,idx2word,dict_len,seq_len,device from T import train
class TextRNN1(nn.Module):
"""
基于基础RNN结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len,rnn_num_layers=2):
super(TextRNN1, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn_num_layers = rnn_num_layers
self.rnn = nn.RNN(input_size=512, hidden_size=512,num_layers=rnn_num_layers,bidirectional=False)
self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
x = self.embed(x)
h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device)
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.rnn(x, h0)
out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
out = out@self.w
# print(1,out.shape)
out = torch.squeeze(input=out, dim=2)
# print(2,out.shape)
# hn = torch.sum(input=out, dim=0)
out = self.fc(out)
return out
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx["
依然表现出不稳定性 train(model) 正在进行第 1 轮训练: train_acc: 0.502672697368421, test_acc: 0.49595171213150024 正在进行第 2 轮训练: train_acc: 0.5092516447368421, test_acc: 0.5020596623420716 正在进行第 3 轮训练: train_acc: 0.6147203947368421, test_acc: 0.6397727251052856 正在进行第 4 轮训练: train_acc: 0.6537828947368421, test_acc: 0.6372159123420715 正在进行第 5 轮训练: train_acc: 0.7039473684210527, test_acc: 0.7002840995788574 正在进行第 6 轮训练: train_acc: 0.7206003289473685, test_acc: 0.7237926125526428 正在进行第 7 轮训练: train_acc: 0.6905838815789473, test_acc: 0.7029119372367859 正在进行第 8 轮训练: train_acc: 0.6079358552631579, test_acc: 0.5798295497894287 正在进行第 9 轮训练: train_acc: 0.653577302631579, test_acc: 0.66171875 ... ... ... 正在进行第 287 轮训练: train_acc: 0.9292763157894737, test_acc: 0.8425426244735718 正在进行第 288 轮训练: train_acc: 0.9319490131578947, test_acc: 0.8455965995788575 正在进行第 289 轮训练: train_acc: 0.9229029605263158, test_acc: 0.8397727370262146 正在进行第 290 轮训练: train_acc: 0.9212582236842105, test_acc: 0.8308948874473572 正在进行第 291 轮训练: train_acc: 0.9222861842105263, test_acc: 0.8284801244735718 正在进行第 292 轮训练: train_acc: 0.9292763157894737, test_acc: 0.8455965995788575 正在进行第 293 轮训练: train_acc: 0.9222861842105263, test_acc: 0.8335227370262146 正在进行第 294 轮训练: train_acc: 0.9286595394736842, test_acc: 0.8350852370262146 正在进行第 295 轮训练: train_acc: 0.9356496710526315, test_acc: 0.8432528495788574 正在进行第 296 轮训练: train_acc: 0.9366776315789473, test_acc: 0.8401988744735718 正在进行第 297 轮训练: train_acc: 0.8947368421052632, test_acc: 0.819744324684143 正在进行第 298 轮训练: train_acc: 0.9189967105263158, test_acc: 0.8362926244735718 正在进行第 299 轮训练: train_acc: 0.9165296052631579, test_acc: 0.8337357997894287 正在进行第 300 轮训练: train_acc: 0.9187911184210527, test_acc: 0.83203125 |
class TextRNN1(nn.Module):
"""
基于基础RNN结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx,seq_len):
super(TextRNN1, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn = nn.RNN(input_size=512, hidden_size=512,num_layers=2,bidirectional=False)
self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
self.w1 = nn.Parameter(torch.ones(1, dtype=torch.float32),requires_grad=True)
self.w2 = nn.Parameter(torch.ones(1, dtype=torch.float32),requires_grad=True)
self.fc = nn.Linear(in_features=512, out_features=2)
def forward(self, x):
x = self.embed(x)
x1 = torch.permute(input=x, dims=(0, 2, 1))
x2 = torch.bmm(x,x1)
x2 = torch.mean(x2,2)
a = torch.softmax(x2,1)
# print('a:',a.shape)
x2 = x2*a
h0 = torch.zeros(2, x.size(0), 512).float().to(device=device)
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.rnn(x, h0)
out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
out = out@self.w
# print(1,out.shape)
out = torch.squeeze(input=out, dim=2)
# print(2,out.shape)
# hn = torch.sum(input=out, dim=0)
out = out*self.w1+x2*self.w2
# print(1,out.shape)
out = self.fc(out)
return out
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = TextRNN1(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx["
out = out*self.w1+x2*self.w2 最后一步融合的时候加了学习参数,结果让收敛速度变得非常地慢 正在进行第 259 轮训练: train_acc: 0.8587582236842105, test_acc: 0.8132102370262146 正在进行第 260 轮训练: train_acc: 0.8497121710526315, test_acc: 0.8088778495788574 正在进行第 261 轮训练: train_acc: 0.8536184210526315, test_acc: 0.8107244372367859 正在进行第 262 轮训练: train_acc: 0.78515625, test_acc: 0.7372159123420715 正在进行第 263 轮训练: train_acc: 0.8540296052631579, test_acc: 0.8095880746841431 正在进行第 264 轮训练: train_acc: 0.841077302631579, test_acc: 0.8037642121315003 正在进行第 265 轮训练: train_acc: 0.6971628289473685, test_acc: 0.6632102251052856 |
import torch
from torch import nn
from torch.nn import functional as F
from T import words_set,word2idx,idx2word,dict_len,seq_len,device
from T import train
class TextRNN4(nn.Module):
"""
基于 GRU 结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx,rnn_num_layers=2,seq_len=seq_len):
super(TextRNN4, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn_num_layers = rnn_num_layers
self.gru = nn.GRU(input_size=embedding_dim, hidden_size=512,num_layers=rnn_num_layers, bidirectional=False)
self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
self.fc1 = nn.Linear(in_features=512, out_features=128)
self.fc2 = nn.Linear(in_features=128, out_features=2)
def forward(self, x):
x = self.embed(x)
h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device)
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.gru(x, h0)
# hn = torch.squeeze(input=hn, dim=0)
# out = torch.sum(input=out, dim=0)
out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
out = out@self.w
out = torch.squeeze(input=out, dim=2)
out = self.fc1(out)
out = F.relu(out)
out = self.fc2(out)
return out
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = TextRNN4(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx["
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
model(a)[:3]
tensor([[-0.0076, -1.2789],
[-1.9879, -1.3223],
[ 0.3328, -3.4323]], device='cuda:0', grad_fn=SliceBackward0)
train(model)
正在进行第 1 轮训练:
train_acc: 0.5960115131578947, test_acc: 0.6265625
正在进行第 2 轮训练:
train_acc: 0.5030838815789473, test_acc: 0.49595171213150024
正在进行第 3 轮训练:
train_acc: 0.6938733552631579, test_acc: 0.6837357997894287
正在进行第 4 轮训练:
train_acc: 0.6461759868421053, test_acc: 0.6132102251052857
正在进行第 5 轮训练:
train_acc: 0.6975740131578947, test_acc: 0.6717329621315002
正在进行第 6 轮训练:
train_acc: 0.7230674342105263, test_acc: 0.7051136374473572
正在进行第 7 轮训练:
train_acc: 0.7364309210526315, test_acc: 0.7055397748947143
正在进行第 8 轮训练:
train_acc: 0.7099095394736842, test_acc: 0.7137073874473572
正在进行第 9 轮训练:
train_acc: 0.5779194078947368, test_acc: 0.5641335248947144
正在进行第 10 轮训练:
train_acc: 0.7571957236842105, test_acc: 0.7381392121315002
正在进行第 11 轮训练:
train_acc: 0.759046052631579, test_acc: 0.7424715995788574
正在进行第 12 轮训练:
train_acc: 0.7259457236842105, test_acc: 0.6887784123420715
正在进行第 13 轮训练:
train_acc: 0.7674753289473685, test_acc: 0.72890625
正在进行第 14 轮训练:
train_acc: 0.6486430921052632, test_acc: 0.6557528495788574
正在进行第 15 轮训练:
train_acc: 0.6946957236842105, test_acc: 0.6487926125526429
正在进行第 16 轮训练:
train_acc: 0.752672697368421, test_acc: 0.7154119372367859
正在进行第 17 轮训练:
train_acc: 0.7859786184210527, test_acc: 0.761150574684143
正在进行第 18 轮训练:
train_acc: 0.5587993421052632, test_acc: 0.5475142121315002
正在进行第 19 轮训练:
train_acc: 0.7826891447368421, test_acc: 0.7446732997894288
正在进行第 20 轮训练:
train_acc: 0.5546875, test_acc: 0.5762784123420716
正在进行第 21 轮训练:
train_acc: 0.7974917763157895, test_acc: 0.7553267121315003
正在进行第 22 轮训练:
train_acc: 0.7549342105263158, test_acc: 0.7100142121315003
正在进行第 23 轮训练:
train_acc: 0.6217105263157895, test_acc: 0.5901278495788574
正在进行第 24 轮训练:
train_acc: 0.7255345394736842, test_acc: 0.7355823874473572
正在进行第 25 轮训练:
train_acc: 0.7826891447368421, test_acc: 0.7743607997894287
正在进行第 26 轮训练:
train_acc: 0.7039473684210527, test_acc: 0.704900574684143
正在进行第 27 轮训练:
train_acc: 0.6934621710526315, test_acc: 0.6998579621315002
正在进行第 28 轮训练:
train_acc: 0.7380756578947368, test_acc: 0.7480113625526428
正在进行第 29 轮训练:
train_acc: 0.7534950657894737, test_acc: 0.7097301125526428
正在进行第 30 轮训练:
train_acc: 0.7039473684210527, test_acc: 0.6578125
正在进行第 31 轮训练:
train_acc: 0.77734375, test_acc: 0.7390625
正在进行第 32 轮训练:
train_acc: 0.7711759868421053, test_acc: 0.7329545497894288
正在进行第 33 轮训练:
train_acc: 0.6461759868421053, test_acc: 0.6142755746841431
正在进行第 34 轮训练:
train_acc: 0.8032483552631579, test_acc: 0.78671875
正在进行第 35 轮训练:
train_acc: 0.7884457236842105, test_acc: 0.7833096623420716
正在进行第 36 轮训练:
train_acc: 0.7569901315789473, test_acc: 0.7615056872367859
正在进行第 37 轮训练:
train_acc: 0.7419819078947368, test_acc: 0.6953835248947143
正在进行第 38 轮训练:
train_acc: 0.7925575657894737, test_acc: 0.7453125
正在进行第 39 轮训练:
train_acc: 0.8100328947368421, test_acc: 0.7690340995788574
正在进行第 40 轮训练:
train_acc: 0.7060032894736842, test_acc: 0.6689630746841431
正在进行第 41 轮训练:
train_acc: 0.7911184210526315, test_acc: 0.7840198874473572
正在进行第 42 轮训练:
train_acc: 0.7991365131578947, test_acc: 0.79375
正在进行第 43 轮训练:
train_acc: 0.8256578947368421, test_acc: 0.8014204621315002
正在进行第 44 轮训练:
train_acc: 0.7129934210526315, test_acc: 0.7213068246841431
正在进行第 45 轮训练:
train_acc: 0.5993009868421053, test_acc: 0.5727272748947143
正在进行第 46 轮训练:
train_acc: 0.8390213815789473, test_acc: 0.8026278495788575
正在进行第 47 轮训练:
train_acc: 0.7775493421052632, test_acc: 0.7833096623420716
正在进行第 48 轮训练:
train_acc: 0.6036184210526315, test_acc: 0.6108664751052857
正在进行第 49 轮训练:
train_acc: 0.7481496710526315, test_acc: 0.7030539870262146
正在进行第 50 轮训练:
train_acc: 0.8009868421052632, test_acc: 0.7875710248947143
正在进行第 51 轮训练:
train_acc: 0.7185444078947368, test_acc: 0.728338074684143
正在进行第 52 轮训练:
train_acc: 0.7193667763157895, test_acc: 0.6794034123420716
正在进行第 53 轮训练:
train_acc: 0.6944901315789473, test_acc: 0.6545454621315002
正在进行第 54 轮训练:
train_acc: 0.8402549342105263, test_acc: 0.7958806872367858
正在进行第 55 轮训练:
train_acc: 0.8159950657894737, test_acc: 0.8026988744735718
正在进行第 56 轮训练:
train_acc: 0.8342927631578947, test_acc: 0.8084517121315002
正在进行第 57 轮训练:
train_acc: 0.8439555921052632, test_acc: 0.8029119372367859
正在进行第 58 轮训练:
train_acc: 0.8114720394736842, test_acc: 0.7667613744735717
正在进行第 59 轮训练:
train_acc: 0.8441611842105263, test_acc: 0.8099431872367859
正在进行第 60 轮训练:
train_acc: 0.7921463815789473, test_acc: 0.7349431872367859
正在进行第 61 轮训练:
train_acc: 0.8266858552631579, test_acc: 0.8092329621315002
正在进行第 62 轮训练:
train_acc: 0.7335526315789473, test_acc: 0.74140625
正在进行第 63 轮训练:
train_acc: 0.8375822368421053, test_acc: 0.7951704621315002
正在进行第 64 轮训练:
train_acc: 0.8451891447368421, test_acc: 0.8024857997894287
正在进行第 65 轮训练:
train_acc: 0.8036595394736842, test_acc: 0.8034090995788574
正在进行第 66 轮训练:
train_acc: 0.7979029605263158, test_acc: 0.7561789870262146
正在进行第 67 轮训练:
train_acc: 0.8223684210526315, test_acc: 0.7737926244735718
正在进行第 68 轮训练:
train_acc: 0.852796052631579, test_acc: 0.8145596623420716
正在进行第 69 轮训练:
train_acc: 0.8205180921052632, test_acc: 0.7683238744735718
正在进行第 70 轮训练:
train_acc: 0.8227796052631579, test_acc: 0.8089488744735718
正在进行第 71 轮训练:
train_acc: 0.7944078947368421, test_acc: 0.7480823874473572
正在进行第 72 轮训练:
train_acc: 0.7909128289473685, test_acc: 0.7422585248947143
正在进行第 73 轮训练:
train_acc: 0.6566611842105263, test_acc: 0.6286931872367859
正在进行第 74 轮训练:
train_acc: 0.7481496710526315, test_acc: 0.7042613625526428
正在进行第 75 轮训练:
train_acc: 0.7701480263157895, test_acc: 0.7671164870262146
正在进行第 76 轮训练:
train_acc: 0.7571957236842105, test_acc: 0.7639204621315002
正在进行第 77 轮训练:
train_acc: 0.86328125, test_acc: 0.8176846623420715
正在进行第 78 轮训练:
train_acc: 0.8077713815789473, test_acc: 0.7589488744735717
。。。。
。。。。
。。。。
train_acc: 0.9097450657894737, test_acc: 0.8473011374473571
正在进行第 286 轮训练:
train_acc: 0.7378700657894737, test_acc: 0.7109375
正在进行第 287 轮训练:
train_acc: 0.8969983552631579, test_acc: 0.8231534123420715
正在进行第 288 轮训练:
train_acc: 0.9286595394736842, test_acc: 0.8594460248947143
正在进行第 289 轮训练:
train_acc: 0.9344161184210527, test_acc: 0.8642045497894287
正在进行第 290 轮训练:
train_acc: 0.9189967105263158, test_acc: 0.854900574684143
正在进行第 291 轮训练:
train_acc: 0.6891447368421053, test_acc: 0.64375
正在进行第 292 轮训练:
train_acc: 0.9263980263157895, test_acc: 0.8576704621315002
正在进行第 293 轮训练:
train_acc: 0.8460115131578947, test_acc: 0.8085227370262146
正在进行第 294 轮训练:
train_acc: 0.9385279605263158, test_acc: 0.8755681872367859
正在进行第 295 轮训练:
train_acc: 0.9409950657894737, test_acc: 0.8747869372367859
正在进行第 296 轮训练:
train_acc: 0.9379111842105263, test_acc: 0.8736505746841431
正在进行第 297 轮训练:
train_acc: 0.9432565789473685, test_acc: 0.8747869372367859
正在进行第 298 轮训练:
train_acc: 0.9389391447368421, test_acc: 0.8700994372367858
正在进行第 299 轮训练:
train_acc: 0.9424342105263158, test_acc: 0.8735795497894288
正在进行第 300 轮训练:
train_acc: 0.94140625, test_acc: 0.8774857997894288
|
import torch
from torch import nn
from torch.nn import functional as F
from T import words_set,word2idx,idx2word,dict_len,seq_len,device
from T import train
class TextRNN4(nn.Module):
"""
基于 GRU 结构来进行情感分析
"""
def __init__(self, num_embeddings, embedding_dim, padding_idx,rnn_num_layers=2,seq_len=seq_len):
super(TextRNN4, self).__init__()
self.embed = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx)
self.rnn_num_layers = rnn_num_layers
self.gru = nn.GRU(input_size=embedding_dim, hidden_size=512,num_layers=rnn_num_layers, bidirectional=False)
self.w = nn.Parameter(torch.ones(seq_len,1, dtype=torch.float32),requires_grad=True)
self.fc1 = nn.Linear(in_features=512, out_features=128)
self.fc2 = nn.Linear(in_features=128, out_features=2)
def forward(self, x):
x = self.embed(x)
x1 = torch.permute(input=x, dims=(0, 2, 1))
x2 = torch.bmm(x,x1)
x2 = torch.mean(x2,2)
a = torch.softmax(x2,1)
# print('a:',a.shape)
x2 = x2*a
h0 = torch.zeros(self.rnn_num_layers, x.size(0), 512).float().to(device=device)
x = torch.permute(input=x, dims=(1, 0, 2))
out, hn = self.gru(x, h0)
# hn = torch.squeeze(input=hn, dim=0)
# out = torch.sum(input=out, dim=0)
out = torch.permute(input=out, dims=(1, 0, 2)) #[B,seq_len,embedding_dim]
out = torch.permute(input=out, dims=(0, 2, 1)) #[B,embedding_dim,seq_len]
out = out@self.w
out = torch.squeeze(input=out, dim=2)
out = out + x2
out = self.fc1(out)
out = F.relu(out)
out = self.fc2(out)
return out
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
model = TextRNN4(num_embeddings=dict_len, embedding_dim=512, padding_idx=word2idx["<PAD>"],seq_len=seq_len)
model.to(device=device)
# 定义优化器
optimizer = torch.optim.SGD(params=model.parameters(), lr=1e-3)
# 定义损失函数
loss_fn = nn.CrossEntropyLoss()
a = torch.randint(0,dict_len-1,(64,seq_len)).to(device=device)
model(a)[:3]
tensor([[ 3.0386, 3.3657],
[ 7.9816, 0.3211],
[ 4.6253, -1.9200]], device='cuda:0', grad_fn=SliceBackward0)
train(model)
正在进行第 1 轮训练:
train_acc: 0.6212993421052632, test_acc: 0.6240767121315003
正在进行第 2 轮训练:
train_acc: 0.63671875, test_acc: 0.6154119372367859
正在进行第 3 轮训练:
train_acc: 0.5916940789473685, test_acc: 0.5946022748947144
正在进行第 4 轮训练:
train_acc: 0.5333059210526315, test_acc: 0.5214488625526428
正在进行第 5 轮训练:
train_acc: 0.6903782894736842, test_acc: 0.6786931872367858
...
...
...
正在进行第 294 轮训练:
train_acc: 0.9333881578947368, test_acc: 0.8639914870262146
正在进行第 295 轮训练:
train_acc: 0.9399671052631579, test_acc: 0.8640625
正在进行第 296 轮训练:
train_acc: 0.9422286184210527, test_acc: 0.8389914870262146
正在进行第 297 轮训练:
train_acc: 0.9346217105263158, test_acc: 0.8578835248947143
正在进行第 298 轮训练:
train_acc: 0.8784950657894737, test_acc: 0.8085227370262146
正在进行第 299 轮训练:
train_acc: 0.9432565789473685, test_acc: 0.8585227370262146
正在进行第 300 轮训练:
train_acc: 0.9346217105263158, test_acc: 0.8484375
|
|
|
|
|
|
|