集成API
``` X_train.shape,X_test.shape ((455, 30), (114, 30)) ``` ## 列的排序 - 数据中最开始的列 ``` import sys sys.path.insert(0,r'/ai/wks/aitpf/src') from tpf.mlib import MLBase # 数据的列名与值 X_train, X_test, y_train, y_test = MLBase.data_ruxianai() feature_cols = X_train.columns.tolist() feature_cols=feature_cols[:3] feature_cols # ['mean radius', 'mean texture', 'mean perimeter'] ``` - MLBase初始化时的列顺序与原始数据的列顺序一致 - self.feature_cols = feature_cols ``` from tpf.mlib import MLBase ml = MLBase( model_type='lr', model_version=2, model_save_dir='/tmp/', feature_cols=feature_cols, log_path='ml.log', max_iter=10000) ``` ``` def feature_names(self): """ 获取特征列名称,固定为升序排序 """ feature_cols = set(self.feature_cols) feature_cols = sorted(feature_cols) self.feature_cols = feature_cols return feature_cols def getX(self, X): return X[self.feature_names()] def fit(self, X, y, X_valid=None, y_valid=None, cat_features=None): X = self.getX(X) ``` ## 数据的形状 ``` X_train.shape,X_test.shape # ((455, 30), (114, 30)) ``` - 训练之后,原始的数据的列顺序不变,且列数不变 - 训练所用,只是原数据的一个子集 :X[self.feature_names()]
## 模型保存 - 核心参数:模型类型,版本,参数,使用的特征列, - 辅助参数:保存目录,日志 ``` from tpf.mlib import MLBase # 定义模版,核心参数:模型类型,参数,使用的特征列,辅助参数:保存目录,名称,日志 ml = MLBase( model_type='lr', model_version=1, model_save_dir='/tmp/models', feature_cols=feature_cols, log_path='ml.log', max_iter=10000) ``` ``` #训练实例及保存 model = ml.fit(X_train, y_train) ml.model_save() ml.predict_proba(X_test) #训练时的预测测试 #加载预测验证 ml.predict_proba(X_test, model=model) ml.model_save(model=model) #默认文件中存储了其他相关信息 ``` ## 加载预测 ``` # 目录+模型类型+版本号 model_save_path = ml.model_save_path model_save_path # '/tmp/models/lr_1.pkl' ``` - 真正的预测,一个空的模板+模型路径+数据 ``` # 测试模型保存和加载 ml2 = MLBase() model = ml2.model_load(model_save_path) print(ml2.model_save_path) # '/tmp/models/lr_2.pkl' y_probs2 = ml.predict_proba(X_test,model=model) y_probs2[0] ``` - 只加载路径即可:这是因为保存模型文件时已经将模型其他相关信息存储在文件中了 ``` # def model_load(self, model_save_path=None, # model_save_dir=None, model_type=None, model_version=None): model, model_msg = pkl_load(file_path=model_save_path,use_joblib=True) self.model = model # model = joblib.load(model_save_path) pc.lg(f"LR模型已从 {model_save_path} 加载") self.model_name = model_msg['model_name'] self.model_type = model_msg['model_type'] self.model_version = model_msg['model_version'] self.feature_cols = model_msg['feature_cols'] self.model_params = model_msg['model_params'] ```
``` import sys sys.path.insert(0,r'/ai/wks/aitpf/src') from tpf.mlib import MLBase # 数据的列名与值 X_train, X_test, y_train, y_test = MLBase.data_ruxianai() feature_cols = X_train.columns.tolist() feature_cols=feature_cols[:3] feature_cols ``` ['mean radius', 'mean texture', 'mean perimeter'] ``` from tpf.mlib import MLBase ml = MLBase( model_type='lr', model_version=2, model_save_dir='/tmp/', feature_cols=feature_cols, log_path='ml.log', max_iter=10000) ``` ``` ml.fit(X=X_train,y=y_train) ml.model_save() ml.model_msg() ``` ``` {'model_name': 'lr_2', 'model_type': 'lr', 'model_version': 2, 'feature_cols': ['mean perimeter', 'mean radius', 'mean texture'], 'feature_count': 3, 'model_params': {'max_iter': 10000}, 'model_save_dir': '/tmp/', 'model_save_path': '/tmp/lr_2.pkl'} ``` ``` ml.feature_cols ['mean perimeter', 'mean radius', 'mean texture'] ml.feature_names() ['mean perimeter', 'mean radius', 'mean texture'] ``` - 获取模型 ``` # 获取获取 ml = MLBase(model_save_dir='/tmp/') model = ml.get_model(model_type='lr', model_version=2) ``` ``` ml.set_model_msg(model_type='lr', model_version=2) model = ml.model_load() ```
- 先save后load ``` import sys sys.path.insert(0,r'/ai/wks/aitpf/src') from tpf.mlib import MLBase # 数据的列名与值 X_train, X_test, y_train, y_test = MLBase.data_ruxianai() feature_cols = X_train.columns.tolist() feature_cols=feature_cols[:3] feature_cols ``` ``` from tpf.mlib import MLBase ml = MLBase( model_type='lr', model_version=2, model_save_dir='/tmp/', feature_cols=feature_cols, log_path='ml.log', max_iter=10000) ``` ``` ml.fit(X=X_train,y=y_train) ``` ``` ml.model_save() # 必须要执行save才能真正将Model保存到磁盘 print(ml.model_save_path) # /tmp/lr_2.pkl ml.model_load(model_save_path='/tmp/lr_2.pkl') ml.model_load(model_type='lr',model_version=2,) ```
参考