[a, b],左右皆闭
import random # Return random integer in range [a, b], including both end points. random.randint(1,2) 1 random.randint(1,2) 2
x = np.random.choice(words, size=n, replace=True, p=p) replace=True表示有放回抽样 - 有放回抽样可以抽取任意个数量的数据 np.random.choice:Generates a random sample from a given 1-D array
import numpy as np
import random
seq_len=(30, 48)
# 单词集合,对应键盘上的字母
words = [
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'q', 'w', 'e', 'r', 't', 'y', 'u', 'i', 'o', 'p',
'a', 's', 'd', 'f', 'g', 'h', 'j', 'k', 'l',
'z', 'x', 'c', 'v', 'b', 'n', 'm'
]
# 每个词被选中的概率,随机初始化的概率
p = np.array([
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
])
# 转概率,所有单词的概率之和为1
p = p / p.sum()
# 随机选n个词
# Return random integer in range [a, b], including both end points.
n = random.randint(seq_len[0], seq_len[1])
x = np.random.choice(words, size=n, replace=True, p=p)
x
array(['f', 'g', 'v', 'v', 's', 'z', '4', 'f', 'n', 'z', 'k', 'e', 'p',
'x', 'g', 'o', 'j', 'f', 's', 'z', 'c', 'l', '4', 'k', 'n', '8',
'm', 'k', 'h', 'f', 'p', 's', 'i', 's', 'c', 'a', 'x', 'v', 'o',
'm', 'a'], dtype='<U1')
参数说明
重点说明两个参数:大小size,可放回replace
size : int or tuple of ints, optional
Output shape. If the given shape is, e.g., ``(m, n, k)``, then
``m * n * k`` samples are drawn. Default is None, in which case a
single value is returned
x = np.random.choice(words, size=(5,10), replace=True, p=p)
x.shape
(5, 10)
replace : boolean, optional
Whether the sample is with or without replacement. Default is True,
meaning that a value of ``a`` can be selected multiple times.
replace:是否放回,默认True,即放回,意味着一个样本可被多次提取
import torch import pandas as pd a=torch.linspace(start=1,end=120,steps=120).reshape(30,4) a = pd.DataFrame(a,columns=["A","B","C","D"]) print(a) # 读取数据集 df = a # 随机抽取10行,若数据真实行数低于10行则报错 df.sample(n=10) # 随机抽取20%的行 df.sample(frac=0.2) # 允许重复抽取 df.sample(n=10, replace=True) # 为每个行设置不同的权重 weights = torch.rand(30) df.sample(n=10, weights=weights) # 设置随机数种子 df.sample(n=10, random_state=73)