|
```
X_train.shape,X_test.shape
((455, 30), (114, 30))
```
## 列的排序
- 数据中最开始的列
```
import sys
sys.path.insert(0,r'/ai/wks/aitpf/src')
from tpf.mlib import MLBase
# 数据的列名与值
X_train, X_test, y_train, y_test = MLBase.data_ruxianai()
feature_cols = X_train.columns.tolist()
feature_cols=feature_cols[:3]
feature_cols # ['mean radius', 'mean texture', 'mean perimeter']
```
- MLBase初始化时的列顺序与原始数据的列顺序一致
- self.feature_cols = feature_cols
```
from tpf.mlib import MLBase
ml = MLBase(
model_type='lr',
model_version=2,
model_save_dir='/tmp/',
feature_cols=feature_cols,
log_path='ml.log',
max_iter=10000)
```
```
def feature_names(self):
"""
获取特征列名称,固定为升序排序
"""
feature_cols = set(self.feature_cols)
feature_cols = sorted(feature_cols)
self.feature_cols = feature_cols
return feature_cols
def getX(self, X):
return X[self.feature_names()]
def fit(self, X, y, X_valid=None, y_valid=None, cat_features=None):
X = self.getX(X)
```
## 数据的形状
```
X_train.shape,X_test.shape # ((455, 30), (114, 30))
```
- 训练之后,原始的数据的列顺序不变,且列数不变
- 训练所用,只是原数据的一个子集 :X[self.feature_names()]
|
|
## 模型保存
- 核心参数:模型类型,版本,参数,使用的特征列,
- 辅助参数:保存目录,日志
```
from tpf.mlib import MLBase
# 定义模版,核心参数:模型类型,参数,使用的特征列,辅助参数:保存目录,名称,日志
ml = MLBase(
model_type='lr',
model_version=1,
model_save_dir='/tmp/models',
feature_cols=feature_cols,
log_path='ml.log',
max_iter=10000)
```
```
#训练实例及保存
model = ml.fit(X_train, y_train)
ml.model_save()
ml.predict_proba(X_test) #训练时的预测测试
#加载预测验证
ml.predict_proba(X_test, model=model)
ml.model_save(model=model) #默认文件中存储了其他相关信息
```
## 加载预测
```
# 目录+模型类型+版本号
model_save_path = ml.model_save_path
model_save_path # '/tmp/models/lr_1.pkl'
```
- 真正的预测,一个空的模板+模型路径+数据
```
# 测试模型保存和加载
ml2 = MLBase()
model = ml2.model_load(model_save_path)
print(ml2.model_save_path) # '/tmp/models/lr_2.pkl'
y_probs2 = ml.predict_proba(X_test,model=model)
y_probs2[0]
```
- 只加载路径即可:这是因为保存模型文件时已经将模型其他相关信息存储在文件中了
```
# def model_load(self, model_save_path=None,
# model_save_dir=None, model_type=None, model_version=None):
model, model_msg = pkl_load(file_path=model_save_path,use_joblib=True)
self.model = model
# model = joblib.load(model_save_path)
pc.lg(f"LR模型已从 {model_save_path} 加载")
self.model_name = model_msg['model_name']
self.model_type = model_msg['model_type']
self.model_version = model_msg['model_version']
self.feature_cols = model_msg['feature_cols']
self.model_params = model_msg['model_params']
```
|
|
```
import sys
sys.path.insert(0,r'/ai/wks/aitpf/src')
from tpf.mlib import MLBase
# 数据的列名与值
X_train, X_test, y_train, y_test = MLBase.data_ruxianai()
feature_cols = X_train.columns.tolist()
feature_cols=feature_cols[:3]
feature_cols
```
['mean radius', 'mean texture', 'mean perimeter']
```
from tpf.mlib import MLBase
ml = MLBase(
model_type='lr',
model_version=2,
model_save_dir='/tmp/',
feature_cols=feature_cols,
log_path='ml.log',
max_iter=10000)
```
```
ml.fit(X=X_train,y=y_train)
ml.model_save()
ml.model_msg()
```
```
{'model_name': 'lr_2',
'model_type': 'lr',
'model_version': 2,
'feature_cols': ['mean perimeter', 'mean radius', 'mean texture'],
'feature_count': 3,
'model_params': {'max_iter': 10000},
'model_save_dir': '/tmp/',
'model_save_path': '/tmp/lr_2.pkl'}
```
```
ml.feature_cols
['mean perimeter', 'mean radius', 'mean texture']
ml.feature_names()
['mean perimeter', 'mean radius', 'mean texture']
```
- 获取模型
```
# 获取获取
ml = MLBase(model_save_dir='/tmp/')
model = ml.get_model(model_type='lr', model_version=2)
```
```
ml.set_model_msg(model_type='lr', model_version=2)
model = ml.model_load()
```
|
|
- 先save后load
```
import sys
sys.path.insert(0,r'/ai/wks/aitpf/src')
from tpf.mlib import MLBase
# 数据的列名与值
X_train, X_test, y_train, y_test = MLBase.data_ruxianai()
feature_cols = X_train.columns.tolist()
feature_cols=feature_cols[:3]
feature_cols
```
```
from tpf.mlib import MLBase
ml = MLBase(
model_type='lr',
model_version=2,
model_save_dir='/tmp/',
feature_cols=feature_cols,
log_path='ml.log',
max_iter=10000)
```
```
ml.fit(X=X_train,y=y_train)
```
```
ml.model_save() # 必须要执行save才能真正将Model保存到磁盘
print(ml.model_save_path) # /tmp/lr_2.pkl
ml.model_load(model_save_path='/tmp/lr_2.pkl')
ml.model_load(model_type='lr',model_version=2,)
```
|
|
|