评论索引数据集简介
评论二分类:正面评论-0,负面评论-1 文本向量化后转为索引矩阵,索引从0开始编码,个数为词典长度 一段文本是一个1维索引向量,比如,[3,2,88,94,...,1,33],对应一个标签,比如1 多个文本形成一个批次,比如[batch_size,seq_len],对应一个批次的标签[batch_size]
获取索引数据集
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from ai.datasets import load_hotel
X_train,y_train,X_test,y_test = load_hotel(return_dict=False, return_Xy=True)
class MyDataSet(Dataset):
"""
构建数据集
"""
def __init__(self, X, y):
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
x = self.X[idx]
y = self.y[idx]
return torch.tensor(data=x).long(), torch.tensor(data=y).long()
train_dataset = MyDataSet(X=X_train, y=y_train)
test_dataset = MyDataSet(X=X_test,y=y_test)
X_train: (4800, 85)
y_train: (4800,)
X_test: (1200, 85)
y_test: (1200,)
MyDataSet中,将数据x转成了long类型tensor,标签也转成了long类型tensor
在pytorch中,一个标量也是一个tensor
批量加载
# 从数据集中批次取数据
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size=128)
for X,y in train_dataloader:
print(X.shape,X.ndim,y.shape,y.ndim) # torch.Size([128, 85]) 2 torch.Size([128]) 1
print(type(X)) # class 'torch.Tensor'
break
训练集shuffle=True,
测试集shuffle=False
批次batch_size在机器性能允许的范围内尽可能大点
从loader中取出来的批次数据类型为 Tensor
索引矩阵到RNN
一个索引对应一个单词, 在模型中由nn.Embedding模板类生成的对象转为一个维数为embedding_dim向量, 然后批次数据的shape,就转换为[batch_size,seq_len,embedding_dim] 通常情况下,pytorch的批数据格式为[批次,特征个数,特征shape] 这个是大方向,数据的shape要统一转化为这个格式, 不同的模型需要不同的数据格式时,再在模型内部进行转化 对于文本类数据, 特征个数C,就是单词的维数,即embedding_dim 特征shape,即序列长度seq_len, RNN对象默认参数,极其看重特征的顺序, 因为它就是按特征的顺序进行for循环计算的, 所以序列的维度默认在dim=0上, RNN数据的shape为[seq_len,batch_size,embedding_dim] 因此数据进入RNN前还要交换数据后面的两个维度, [batch_size,seq_len,embedding_dim] --> [seq_len, batch_size, embedding_dim] 后续就是RNN对数据的变换了
RNN的数据shape转化
需要思考并确定三个值:
要将一个单词映射到几维向量:hidden_size
RNN是双向还是单向:bidirectional
RNN的层数:num_layers
这三个定下之后,那么RNN的参数转化就定了
h0.shape = hn.shape
if bidirectional:
hn.shape = [2*num_layers, batch_size, hidden_size]
output.shape = [seq_len, batch_size, 2*hidden_size]
else:
hn.shape = [num_layers, batch_size, hidden_size]
output.shape = [seq_len, batch_size, hidden_size]
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from ai.datasets import load_hotel
X_train,y_train,X_test,y_test = load_hotel(return_Xy=True)
seq_len = X_train.shape[1]
import sys,os
class ParamConfig():
padding_idx = 0
embedding_dim = 256
hidden_size = 512
output_size = 2
bidirectional = True
num_layers=2
batch_size = 128
debug = False
BASE_DIR = sys.path[0]
param_path = os.path.join(BASE_DIR,"model/model_rnn1.pkl")
log_file = os.path.join(BASE_DIR,"main.log")
def __init__(self, isTest=False, seq_len=seq_len) -> None:
if isTest:# 测试不需要加载真实数据,随机给个数,快速验证模型
self.dict_len = 10000
self.seq_len = seq_len
self.debug = True
else:
words_set,word2idx = load_hotel(return_dict=True)
dict_len = len(words_set)
print(f"dict_len:{dict_len}") # dict_len:21437
self.dict_len = dict_len
self.word2idx = word2idx
self.seq_len = seq_len
pm = ParamConfig()
# pm = ParamConfig(isTest=True)
class MyDataSet(Dataset):
"""
构建数据集
"""
def __init__(self, X, y):
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
x = self.X[idx]
y = self.y[idx]
return torch.tensor(data=x).long(), torch.tensor(data=y).long()
train_dataset = MyDataSet(X=X_train, y=y_train)
test_dataset = MyDataSet(X=X_test,y=y_test)
# 从数据集中批次取数据
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size = pm.batch_size)
for X,y in train_dataloader:
print(X.shape,X.ndim,y.shape,y.ndim) # torch.Size([128, 85]) 2 torch.Size([128]) 1
break
class RNNClassify1(nn.Module):
def __init__(self, dict_len, input_size, hidden_size, output_size, num_layers=2, bidirectional=True,debug=pm.debug):
super(RNNClassify1, self).__init__()
self.embedding = nn.Embedding(num_embeddings=dict_len,
embedding_dim=input_size,
padding_idx=0)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.debug = debug
self.model = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional)
if bidirectional:
self.single_rnn_nums = num_layers*2
else:
self.single_rnn_nums = num_layers
self.fc = nn.Linear(hidden_size, output_size)
def forward(self,X):
"""
- X:2维索引矩阵
-
"""
if X.ndim != 2:
throw("X must be 2-dimensional")
# 这个批次是动态的,因为不管你指定大多,最后一个批次(余数)大概率不与该指定值相同
batch_size = X.shape[0]
x = self.embedding(X) # [B,seq_len] --> [B,seq_len,embedding]
if self.debug:
print(f"embedding:{x.shape}") # embedding:torch.Size([128, 85, 256])
x = torch.permute(input=x, dims=(1,0,2)) # [B,seq_len,embedding] --> [seq_len,B,embedding]
if self.debug:
print(f"permute后x.shape:{x.shape}") # permute后x.shape:torch.Size([85, 128, 256])
h0 = torch.zeros(self.single_rnn_nums, batch_size, self.hidden_size)
# h0与hn是一个单向RNN链的两端,一个是首单词的初始化向量,一个是尾单词的输出向量
out, hn = self.model(x, h0)
#根据RNN思想,最后一个单词的输出包含整个序列的信息
#意思就是最后一个单词的输出,也是整个序列的输出
#即hn中一个长度为hidden_size的向量就是一个序列的上下文向量
#现有batch_size个序列,应该有batch_size个长度为hidden_size的上下文向量
#需要将hn的shape转化为[batch_size,hidden_size]
#对hn的dim=0维度进行sum,使该维消失,正好满足需求
#hn的dim=0维是指几条单向RNN链,最后将所有的单向RNN链最后一个单词的输出,融合到一起
#这里选择了相加,融合,还有一种做法,就是拼接,将多个单向RNN的输出结果拼接到一起
out = torch.sum(input=hn, dim=0)
#全连接分类
out = self.fc(out)
return out
model = RNNClassify1(dict_len=pm.dict_len,
input_size=pm.embedding_dim,
hidden_size=pm.hidden_size,
output_size=pm.output_size,
num_layers=pm.num_layers,
bidirectional=pm.bidirectional)
X_test = torch.randint(low=0,high=pm.dict_len,size=(pm.batch_size, pm.seq_len))
y_out = model(X_test)
print(y_out.shape)
# 定义损失函数
loss_fn = nn.CrossEntropyLoss()
from ai.dl import T
T.train(model=model,loss_fn=loss_fn,optimizer="adam",
continuation=True,
is_regression=False,
learning_rate=1e-3,
epochs=10,
auto_save=True,
train_dataset=train_dataset,
test_dataset=test_dataset,
model_param_path=pm.param_path,
log_file=pm.log_file)
较好效果的模型
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
# 加载数据集
from ai.datasets import load_hotel
X_train,y_train,X_test,y_test = load_hotel(return_Xy=True)
seq_len = X_train.shape[1]
import sys,os
class ParamConfig():
padding_idx = 0
embedding_dim = 256
hidden_size = 512
output_size = 2
bidirectional = True
num_layers=2
batch_size = 128
debug = False
BASE_DIR = sys.path[0]
param_path = os.path.join(BASE_DIR,"model/model_gru8.pkl")
log_file = os.path.join(BASE_DIR,"main.log")
def __init__(self, isTest=False, seq_len=seq_len) -> None:
if isTest:# 测试不需要加载真实数据,随机给个数,快速验证模型
self.dict_len = 10000
self.seq_len = seq_len
self.debug = True
else:
words_set,word2idx = load_hotel(return_dict=True)
dict_len = len(words_set)
print(f"dict_len:{dict_len}") # dict_len:21437
self.dict_len = dict_len
self.word2idx = word2idx
self.seq_len = seq_len
pm = ParamConfig()
# pm = ParamConfig(isTest=True)
class MyDataSet(Dataset):
"""
构建数据集
"""
def __init__(self, X, y):
self.X = X
self.y = y
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
x = self.X[idx]
y = self.y[idx]
return torch.tensor(data=x).long(), torch.tensor(data=y).long()
train_dataset = MyDataSet(X=X_train, y=y_train)
test_dataset = MyDataSet(X=X_test,y=y_test)
# 从数据集中批次取数据
train_dataloader = DataLoader(dataset=train_dataset, shuffle=True, batch_size = pm.batch_size)
for X,y in train_dataloader:
print(X.shape,X.ndim,y.shape,y.ndim) # torch.Size([128, 85]) 2 torch.Size([128]) 1
break
class RNNClassify1(nn.Module):
def __init__(self, dict_len, input_size, hidden_size, output_size, num_layers=2, bidirectional=True,debug=pm.debug):
super(RNNClassify1, self).__init__()
self.embedding = nn.Embedding(num_embeddings=dict_len,
embedding_dim=input_size,
padding_idx=0)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.debug = debug
self.model = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional)
if bidirectional:
self.single_rnn_nums = num_layers*2
else:
self.single_rnn_nums = num_layers
self.fc1 = nn.Linear(hidden_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self,X):
"""
- X:2维索引矩阵
-
"""
if X.ndim != 2:
throw("X must be 2-dimensional")
# 这个批次是动态的,因为不管你指定大多,最后一个批次(余数)大概率不与该指定值相同
batch_size = X.shape[0]
x = self.embedding(X) # [B,seq_len] --> [B,seq_len,embedding]
if self.debug:
print(f"embedding:{x.shape}") # embedding:torch.Size([128, 85, 256])
x = torch.permute(input=x, dims=(1,0,2)) # [B,seq_len,embedding] --> [seq_len,B,embedding]
if self.debug:
print(f"permute后x.shape:{x.shape}") # permute后x.shape:torch.Size([85, 128, 256])
h0 = torch.zeros(self.single_rnn_nums, batch_size, self.hidden_size)
# h0与hn是一个单向RNN链的两端,一个是首单词的初始化向量,一个是尾单词的输出向量
out, hn = self.model(x, h0)
#根据RNN思想,最后一个单词的输出包含整个序列的信息
#意思就是最后一个单词的输出,也是整个序列的输出
#即hn中一个长度为hidden_size的向量就是一个序列的上下文向量
#现有batch_size个序列,应该有batch_size个长度为hidden_size的上下文向量
#需要将hn的shape转化为[batch_size,hidden_size]
#对hn的dim=0维度进行sum,使该维消失,正好满足需求
#hn的dim=0维是指几条单向RNN链,最后将所有的单向RNN链最后一个单词的输出,融合到一起
#这里选择了相加,融合,还有一种做法,就是拼接,将多个单向RNN的输出结果拼接到一起
# out = torch.sum(input=hn, dim=0)
out = out[:,:,self.hidden_size:] + out[:,:,:self.hidden_size]
out = torch.sum(input=out, dim=0)
#全连接分类
out = self.fc1(out)
out = self.fc2(out)
return out
model = RNNClassify1(dict_len=pm.dict_len,
input_size=pm.embedding_dim,
hidden_size=pm.hidden_size,
output_size=pm.output_size,
num_layers=pm.num_layers,
bidirectional=pm.bidirectional)
# 定义损失函数
loss_fn = nn.CrossEntropyLoss()
from ai.dl import T
T.train(model=model,loss_fn=loss_fn,optimizer="adam",
continuation=True,
is_regression=False,
learning_rate=1e-3,
epochs=20,
auto_save=True,
train_dataset=train_dataset,
test_dataset=test_dataset,
model_param_path=pm.param_path,
log_file=pm.log_file)
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
# 加载数据集
from ai.datasets import load_hotel
X_train,y_train,X_test,y_test = load_hotel(return_Xy=True)
seq_len = X_train.shape[1]
import sys,os
class ParamConfig():
padding_idx = 0
embedding_dim = 256
hidden_size = 512
output_size = 2
bidirectional = True
num_layers=2
batch_size = 128
debug = False
BASE_DIR = sys.path[0]
param_path = os.path.join(BASE_DIR,"model/model_gru8.pkl")
log_file = os.path.join(BASE_DIR,"main.log")
def __init__(self, isTest=False, seq_len=seq_len) -> None:
if isTest:# 测试不需要加载真实数据,随机给个数,快速验证模型
self.dict_len = 10000
self.seq_len = seq_len
self.debug = True
else:
words_set,word2idx = load_hotel(return_dict=True)
dict_len = len(words_set)
print(f"dict_len:{dict_len}") # dict_len:21437
self.dict_len = dict_len
self.word2idx = word2idx
self.seq_len = seq_len
pm = ParamConfig()
class RNNClassify1(nn.Module):
def __init__(self, dict_len, input_size, hidden_size, output_size, num_layers=2, bidirectional=True,debug=pm.debug):
super(RNNClassify1, self).__init__()
self.embedding = nn.Embedding(num_embeddings=dict_len,
embedding_dim=input_size,
padding_idx=0)
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.debug = debug
self.model = nn.GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional)
if bidirectional:
self.single_rnn_nums = num_layers*2
else:
self.single_rnn_nums = num_layers
self.fc1 = nn.Linear(hidden_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, output_size)
def forward(self,X):
"""
- X:2维索引矩阵
-
"""
if X.ndim != 2:
throw("X must be 2-dimensional")
# 这个批次是动态的,因为不管你指定大多,最后一个批次(余数)大概率不与该指定值相同
batch_size = X.shape[0]
x = self.embedding(X) # [B,seq_len] --> [B,seq_len,embedding]
if self.debug:
print(f"embedding:{x.shape}") # embedding:torch.Size([128, 85, 256])
x = torch.permute(input=x, dims=(1,0,2)) # [B,seq_len,embedding] --> [seq_len,B,embedding]
if self.debug:
print(f"permute后x.shape:{x.shape}") # permute后x.shape:torch.Size([85, 128, 256])
h0 = torch.zeros(self.single_rnn_nums, batch_size, self.hidden_size)
# h0与hn是一个单向RNN链的两端,一个是首单词的初始化向量,一个是尾单词的输出向量
out, hn = self.model(x, h0)
#根据RNN思想,最后一个单词的输出包含整个序列的信息
#意思就是最后一个单词的输出,也是整个序列的输出
#即hn中一个长度为hidden_size的向量就是一个序列的上下文向量
#现有batch_size个序列,应该有batch_size个长度为hidden_size的上下文向量
#需要将hn的shape转化为[batch_size,hidden_size]
#对hn的dim=0维度进行sum,使该维消失,正好满足需求
#hn的dim=0维是指几条单向RNN链,最后将所有的单向RNN链最后一个单词的输出,融合到一起
#这里选择了相加,融合,还有一种做法,就是拼接,将多个单向RNN的输出结果拼接到一起
# out = torch.sum(input=hn, dim=0)
out = out[:,:,self.hidden_size:] + out[:,:,:self.hidden_size]
out = torch.sum(input=out, dim=0)
#全连接分类
out = self.fc1(out)
out = self.fc2(out)
return out
model = RNNClassify1(dict_len=pm.dict_len,
input_size=pm.embedding_dim,
hidden_size=pm.hidden_size,
output_size=pm.output_size,
num_layers=pm.num_layers,
bidirectional=pm.bidirectional)
model.load_state_dict(torch.load(pm.param_path))
X_train: (4800, 85)
y_train: (4800,)
X_test: (1200, 85)
y_test: (1200,)
dict_len:21437
参数查看
params = model.parameters()
for param in params:
print(param)
Parameter containing:
tensor([[ 0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],
[ 1.1802, 0.7505, -0.9388, ..., -0.3928, -1.3043, -1.4117],
[ 0.5218, 0.7069, -0.5760, ..., 0.1971, -0.6769, 1.0947],
...,
[-1.4839, -1.3945, 1.2294, ..., -0.5314, -0.2000, -1.2279],
[ 0.0224, 0.4936, 0.7235, ..., -0.7298, 0.0284, 1.5092],
[ 1.3206, 0.3973, 1.6314, ..., 1.1834, -1.9032, -0.9954]],
requires_grad=True)
Parameter containing:
tensor([[ 0.0693, 0.0321, 0.0275, ..., 0.0230, 0.0582, 0.0134],
[-0.0055, 0.0122, -0.0528, ..., -0.0053, 0.0540, -0.0324],
[-0.0068, -0.0148, -0.0050, ..., -0.0370, 0.0282, -0.0017],
...,
[ 0.0066, -0.0636, -0.0358, ..., -0.0119, 0.0530, 0.0142],
[ 0.0033, 0.0176, 0.0265, ..., -0.0460, -0.0056, 0.0219],
[-0.0248, -0.0047, -0.0279, ..., -0.0037, -0.0181, 0.0326]],
requires_grad=True)
Parameter containing:
tensor([[ 0.0370, 0.0069, 0.0149, ..., 0.0259, 0.0211, 0.0333],
[ 0.0219, 0.0340, -0.0193, ..., 0.0198, -0.0432, -0.0247],
[-0.0471, 0.0224, 0.0109, ..., 0.0174, -0.0180, -0.0382],
...,
[ 0.0087, -0.0396, 0.0051, ..., -0.0510, -0.0020, 0.0453],
[ 0.0078, -0.0054, 0.0006, ..., 0.0105, -0.0438, 0.0265],
[-0.0316, -0.0105, 0.0271, ..., -0.0224, -0.0277, 0.0199]],
requires_grad=True)
Parameter containing:
tensor([ 0.0204, -0.0189, -0.0090, ..., 0.0218, -0.0309, -0.0302],
requires_grad=True)
Parameter containing:
tensor([-0.0398, 0.0280, 0.0159, ..., 0.0238, 0.0295, -0.0314],
requires_grad=True)
Parameter containing:
tensor([[ 0.0154, 0.0428, -0.0074, ..., -0.0005, -0.0351, -0.0757],
[-0.0535, -0.0825, 0.0145, ..., -0.0280, -0.0617, 0.0242],
[-0.0410, -0.0308, -0.0457, ..., 0.0030, 0.0025, 0.0007],
...,
[ 0.0101, 0.0327, -0.0012, ..., -0.0421, -0.0264, 0.0367],
[-0.0211, -0.0454, -0.0315, ..., -0.0359, 0.0262, 0.0035],
[ 0.0264, 0.0021, 0.0318, ..., -0.0176, -0.0241, 0.0271]],
requires_grad=True)
Parameter containing:
tensor([[ 0.0095, 0.0283, -0.0589, ..., -0.0089, -0.0176, -0.0457],
[ 0.0022, -0.0181, -0.0295, ..., -0.0417, 0.0470, -0.0161],
[-0.0169, 0.0010, -0.0233, ..., 0.0155, 0.0368, 0.0318],
...,
[-0.0025, -0.0400, -0.0153, ..., 0.0288, -0.0182, -0.0059],
[-0.0199, 0.0168, -0.0015, ..., 0.0030, 0.0214, 0.0330],
[ 0.0190, 0.0058, -0.0423, ..., -0.0341, 0.0300, -0.0216]],
requires_grad=True)
Parameter containing:
tensor([-0.0203, -0.0047, -0.0277, ..., 0.0253, 0.0158, 0.0082],
requires_grad=True)
Parameter containing:
tensor([-0.0060, -0.0291, 0.0055, ..., -0.0359, -0.0391, 0.0315],
requires_grad=True)
Parameter containing:
tensor([[-0.0395, -0.0255, -0.0486, ..., 0.0474, -0.0069, -0.0043],
[ 0.0054, 0.0257, -0.0380, ..., 0.0905, -0.0286, 0.0442],
[ 0.0040, 0.0346, 0.0336, ..., -0.0055, 0.0330, 0.0167],
...,
[ 0.0210, 0.0141, -0.0333, ..., -0.0278, -0.0215, -0.0175],
[ 0.0415, 0.0039, 0.0446, ..., -0.0078, -0.0186, 0.0154],
[-0.0187, 0.0082, -0.0360, ..., 0.0176, 0.0064, 0.0057]],
requires_grad=True)
Parameter containing:
tensor([[ 0.0209, -0.0400, -0.0456, ..., -0.0005, -0.0109, 0.0261],
[-0.0435, -0.0288, -0.0129, ..., 0.0267, 0.0098, 0.0111],
[-0.0064, -0.0289, -0.0098, ..., 0.0331, 0.0385, 0.0188],
...,
[ 0.0233, -0.0304, -0.0354, ..., -0.0051, 0.0180, -0.0445],
[ 0.0029, -0.0387, 0.0154, ..., -0.0347, -0.0435, 0.0036],
[-0.0108, -0.0071, -0.0138, ..., 0.0129, 0.0221, -0.0095]],
requires_grad=True)
Parameter containing:
tensor([-0.0155, 0.0367, -0.0390, ..., 0.0014, -0.0098, -0.0116],
requires_grad=True)
Parameter containing:
tensor([ 0.0413, 0.0380, 0.0452, ..., -0.0292, -0.0195, 0.0312],
requires_grad=True)
Parameter containing:
tensor([[ 0.0417, 0.0099, 0.0421, ..., 0.0143, -0.0170, -0.0095],
[-0.0085, 0.0091, -0.0193, ..., -0.0025, -0.0246, 0.0235],
[-0.0454, 0.0445, 0.0198, ..., 0.0402, -0.0210, -0.0140],
...,
[ 0.0132, 0.0189, -0.0185, ..., 0.0094, -0.0018, -0.0054],
[-0.0180, -0.0366, -0.0243, ..., -0.0275, 0.0122, 0.0281],
[-0.0287, -0.0319, 0.0090, ..., 0.0417, -0.0065, 0.0435]],
requires_grad=True)
Parameter containing:
tensor([[-0.0302, -0.0129, 0.0106, ..., -0.0365, 0.0287, -0.0393],
[ 0.0176, 0.0054, -0.0043, ..., -0.0314, -0.0276, 0.0002],
[-0.0254, 0.0416, 0.0031, ..., -0.0468, -0.0121, 0.0020],
...,
[-0.0335, -0.0049, 0.0102, ..., 0.0003, -0.0246, -0.0283],
[-0.0376, 0.0309, 0.0134, ..., -0.0249, 0.0238, 0.0321],
[ 0.0009, 0.0014, -0.0194, ..., -0.0075, -0.0053, -0.0098]],
requires_grad=True)
Parameter containing:
tensor([ 0.0309, -0.0131, -0.0226, ..., -0.0201, 0.0276, 0.0148],
requires_grad=True)
Parameter containing:
tensor([ 0.0138, 0.0010, -0.0406, ..., 0.0255, 0.0366, -0.0405],
requires_grad=True)
Parameter containing:
tensor([[ 0.0184, 0.0253, 0.0070, ..., 0.0083, -0.0366, -0.0300],
[-0.0126, -0.0126, -0.0161, ..., -0.0075, 0.0211, -0.0301],
[-0.0223, -0.0436, -0.0091, ..., 0.0418, -0.0255, -0.0194],
...,
[-0.0099, -0.0418, 0.0198, ..., 0.0277, -0.0030, -0.0008],
[ 0.0188, 0.0018, -0.0183, ..., -0.0019, 0.0435, -0.0160],
[-0.0282, -0.0451, -0.0375, ..., 0.0257, 0.0119, -0.0024]],
requires_grad=True)
Parameter containing:
tensor([ 0.0102, 0.0341, -0.0306, 0.0291, -0.0242, -0.0070, -0.0411, 0.0199,
0.0041, 0.0138, -0.0276, 0.0058, 0.0428, 0.0009, 0.0174, 0.0125,
-0.0117, -0.0164, 0.0012, 0.0260, 0.0131, -0.0327, 0.0363, -0.0333,
-0.0183, -0.0036, -0.0391, -0.0317, 0.0190, 0.0333, -0.0315, 0.0165,
-0.0353, 0.0040, 0.0107, -0.0293, 0.0279, 0.0370, -0.0356, 0.0433,
-0.0113, 0.0359, -0.0424, 0.0212, 0.0409, -0.0237, -0.0161, 0.0159,
0.0262, 0.0193, -0.0197, -0.0178, 0.0093, -0.0406, -0.0065, 0.0263,
0.0136, -0.0231, -0.0409, 0.0302, 0.0307, -0.0387, -0.0303, 0.0055,
0.0101, -0.0380, -0.0061, -0.0053, 0.0068, -0.0305, -0.0317, -0.0063,
-0.0040, 0.0167, -0.0186, 0.0071, -0.0280, 0.0432, 0.0393, 0.0185,
0.0037, 0.0248, 0.0323, 0.0210, -0.0198, 0.0347, 0.0071, -0.0357,
0.0263, -0.0167, -0.0156, -0.0136, 0.0219, -0.0076, -0.0150, -0.0049,
-0.0124, 0.0315, 0.0433, -0.0387, -0.0347, -0.0387, 0.0436, 0.0172,
0.0349, -0.0416, 0.0427, -0.0167, -0.0060, -0.0020, -0.0149, 0.0330,
0.0064, -0.0163, -0.0378, -0.0324, 0.0303, 0.0118, -0.0129, -0.0213,
-0.0388, 0.0230, -0.0069, 0.0396, 0.0101, -0.0325, 0.0264, 0.0132,
0.0033, 0.0292, 0.0413, -0.0202, 0.0122, -0.0074, 0.0092, 0.0174,
-0.0313, -0.0214, 0.0239, 0.0001, 0.0318, 0.0387, -0.0168, 0.0438,
0.0133, -0.0013, 0.0322, -0.0388, 0.0382, -0.0128, 0.0294, -0.0355,
-0.0161, -0.0065, -0.0228, -0.0265, 0.0275, -0.0210, 0.0099, -0.0198,
0.0358, 0.0319, -0.0182, -0.0406, 0.0002, -0.0142, 0.0437, -0.0403,
-0.0094, 0.0202, 0.0162, 0.0214, -0.0175, -0.0301, -0.0104, 0.0431,
0.0241, -0.0121, 0.0278, 0.0369, -0.0073, -0.0309, -0.0079, 0.0407,
-0.0440, -0.0099, -0.0214, 0.0353, -0.0187, 0.0386, -0.0279, -0.0426,
-0.0211, -0.0377, 0.0224, 0.0266, 0.0407, -0.0097, -0.0190, -0.0202,
0.0226, -0.0204, -0.0080, -0.0285, -0.0172, 0.0330, 0.0401, 0.0325,
-0.0095, 0.0044, -0.0377, 0.0260, -0.0177, 0.0164, 0.0385, 0.0391,
-0.0207, 0.0045, 0.0228, -0.0240, 0.0056, -0.0311, 0.0235, 0.0048,
-0.0310, -0.0021, -0.0406, -0.0164, 0.0078, -0.0078, 0.0190, 0.0055,
-0.0094, -0.0423, -0.0253, -0.0068, 0.0109, 0.0059, 0.0116, -0.0082,
0.0033, -0.0266, 0.0282, 0.0150, 0.0030, -0.0100, -0.0172, 0.0036,
0.0306, -0.0279, -0.0134, 0.0239, 0.0184, -0.0348, 0.0099, 0.0106,
-0.0116, -0.0403, 0.0055, -0.0404, 0.0291, -0.0369, 0.0078, 0.0301,
0.0316, 0.0133, -0.0297, 0.0078, 0.0346, -0.0410, -0.0136, -0.0219,
-0.0233, -0.0105, -0.0388, 0.0220, -0.0337, 0.0178, 0.0326, -0.0043,
-0.0260, 0.0136, -0.0065, -0.0305, -0.0143, 0.0285, 0.0349, 0.0167,
-0.0042, -0.0090, 0.0417, -0.0183, 0.0119, 0.0111, -0.0273, -0.0429,
0.0152, 0.0288, 0.0218, 0.0314, -0.0424, 0.0294, -0.0002, 0.0439,
0.0023, 0.0215, 0.0046, -0.0405, 0.0378, -0.0028, 0.0314, 0.0265,
-0.0379, -0.0243, -0.0165, 0.0048, 0.0280, 0.0331, -0.0237, -0.0397,
0.0189, -0.0378, -0.0142, 0.0074, -0.0363, -0.0329, 0.0325, 0.0349,
0.0305, 0.0122, 0.0312, -0.0385, 0.0022, 0.0031, 0.0017, -0.0104,
0.0320, -0.0106, -0.0309, -0.0048, -0.0424, 0.0362, -0.0271, 0.0424,
-0.0368, 0.0383, 0.0208, -0.0063, -0.0164, 0.0099, -0.0020, -0.0074,
-0.0119, 0.0257, 0.0238, 0.0378, -0.0007, 0.0066, -0.0297, -0.0207,
-0.0332, 0.0364, -0.0010, 0.0026, 0.0062, -0.0073, 0.0226, 0.0438,
0.0100, -0.0279, -0.0282, 0.0214, 0.0403, 0.0269, 0.0206, -0.0304,
-0.0434, -0.0118, 0.0315, 0.0310, -0.0400, 0.0182, -0.0409, -0.0329,
-0.0010, -0.0269, 0.0152, -0.0092, -0.0136, 0.0357, 0.0348, -0.0157,
0.0322, 0.0286, -0.0286, 0.0110, 0.0413, 0.0122, -0.0167, 0.0186,
-0.0389, -0.0237, -0.0065, 0.0292, -0.0048, 0.0333, -0.0281, 0.0086,
0.0092, -0.0119, 0.0186, 0.0298, 0.0125, 0.0198, -0.0326, -0.0376,
0.0389, -0.0006, -0.0046, -0.0411, -0.0222, -0.0202, 0.0236, 0.0160,
0.0208, -0.0172, -0.0121, -0.0117, 0.0227, 0.0137, 0.0079, 0.0282,
0.0123, -0.0416, 0.0003, -0.0017, 0.0285, -0.0234, -0.0149, 0.0258,
0.0370, -0.0397, 0.0056, -0.0187, -0.0319, -0.0404, 0.0066, 0.0129,
-0.0434, -0.0076, -0.0039, -0.0408, 0.0086, -0.0150, -0.0274, 0.0129,
-0.0011, 0.0335, 0.0076, 0.0300, -0.0409, -0.0436, -0.0137, -0.0166,
0.0188, 0.0364, -0.0313, 0.0016, -0.0299, -0.0286, 0.0085, 0.0057,
-0.0442, -0.0354, 0.0203, 0.0350, 0.0379, 0.0150, -0.0414, 0.0191,
-0.0245, 0.0124, -0.0262, 0.0412, 0.0108, -0.0422, -0.0278, -0.0188,
-0.0199, 0.0234, 0.0018, 0.0320, -0.0029, 0.0381, -0.0403, 0.0089,
0.0266, 0.0408, 0.0310, 0.0030, -0.0184, -0.0382, 0.0168, -0.0356,
-0.0052, -0.0048, -0.0227, -0.0426, 0.0257, -0.0214, 0.0038, -0.0409],
requires_grad=True)
Parameter containing:
tensor([[-0.0298, 0.0013, 0.0063, ..., 0.0177, 0.0143, -0.0130],
[-0.0134, 0.0035, 0.0215, ..., 0.0220, 0.0280, -0.0165]],
requires_grad=True)
Parameter containing:
tensor([ 0.0375, -0.0276], requires_grad=True)
直接调用模型
model(torch.tensor(X_test))
tensor([[ -2.6753, 1.8119],
[ 9.7845, -9.0205],
[-19.9369, 24.6413],
...,
[ 18.5353, -21.7781],
[-12.1830, 8.3670],
[ 4.7882, -2.9590]], grad_fn=)
转换技巧
a = torch.tensor([[ -2.6753, 1.8119],
[ 9.7845, -9.0205],
[-19.9369, 24.6413]])
a.argmax(dim =1)
tensor([1, 0, 1])
此处是正负评论,二分类问题,且负面评论为1,
与索引的0与1对应,
正常情况下,通常会做一个softmax,转概率,
但softmax的单调性此直接取max value的单调性一致
故而不如直接取最大值的索引来得方便
预测方法
def predict(model,X=X_test):
y_out = model(torch.tensor(X))
return y_out.argmax(dim =1)
y_out = predict(model=model,X=X_test[:3])
y_out
tensor([1, 0, 1])