[TOC]

多目标回归

多目标学习模型

存为test.csv

x1,x2,x3,x4,y1,y2,y3
1,2,3,4,0,1,1
2,3,4,5,1,0,1
3,3,4,5,0,1,1
1,2,3,4,0,1,1
2,3,4,5,1,0,1
3,3,4,5,0,1,1
1,2,3,4,0,1,1
2,3,4,5,1,0,1
3,3,4,5,0,1,1
3,3,4,5,0,1,1

__author__ = 'yanerle'
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputRegressor
from sklearn.decomposition import PCA, KernelPCA
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor

df = pd.read_csv("test.csv")
print("原始数据:\\\\n",df)
y_col = ['y1','y2','y3']
y = df[y_col]
X = df[df.columns.drop(y)]

print("因变量:\\\\n",y.head())
print("自变量:\\\\n",X.head())

# 拆分数据集
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size = 0.20, random_state = 39)

# 自变量归一化
pca = KernelPCA (kernel='rbf', fit_inverse_transform=True, random_state=39)
X_train1 = pca.fit_transform(X_train)
X_test1 = pca.transform(X_test)
# 保存模型
pickle.dump(pca, open("pca.pickle", "wb"))

# 模型训练
rfg = MultiOutputRegressor(RandomForestRegressor(n_estimators=100))
rfg.fit(X_train1, y_train)
y_pred = rfg.predict(X_test1)

# 保存模型
pickle.dump(rfg, open("rfg.pickle", "wb"))

# 模型评估
print(rfg.score(X_test1, y_test))

# 预测
x = {'x1': [1,2],'x2': [2,2],'x3': [3,3],'x4': [4,4]}  # 待检测的自变量
x = pd.DataFrame(x)
print(x)
# 使用pickle加载模型文件
pca = pickle.load(open("pca.pickle", "rb"))
x_1 = pca.transform(x)

print(x_1)
# 使用pickle加载模型文件
rfg1 = pickle.load(open("rfg.pickle", "rb"))
y= rfg1.predict(x_1)
print(y)