逻辑回归参数详解
最编程
2024-08-13 11:03:57
...
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 11 10:12:48 2020
@author: Admin
"""
# 引入数据
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()
X = iris.data[:,[2,3]]
y = iris.target
print("Class labels:",np.unique(y)) #打印分类类别的种类
# 切分训练数据和测试数据
from sklearn.model_selection import train_test_split
## 30%测试数据,70%训练数据,stratify=y表示训练数据和测试数据具有相同的类别比例
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1,stratify=y)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
## 估算训练数据中的mu和sigma
sc.fit(X_train)
## 使用训练数据中的mu和sigma对数据进行标准化
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
## 画出决策边界图(只有在2个特征才能画出来)
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.colors import ListedColormap
def plot_decision_region(X,y,classifier,resolution=0.02):
markers = ('s','x','o','^','v')
colors = ('red','blue','lightgreen','gray','cyan')
cmap = ListedColormap(colors[:len(np.unique(y))])
# plot the decision surface
x1_min,x1_max = X[:,0].min()-1,X[:,0].max()+1
x2_min,x2_max = X[:,1].min()-1,X[:,1].max()+1
xx1,xx2 = np.meshgrid(np.arange(x1_min,x1_max,resolution),
np.arange(x2_min,x2_max,resolution))
Z = classifier.predict(np.array([xx1.ravel(),xx2.ravel()]).T)
Z = Z.reshape(xx1.shape)
plt.contourf(xx1,xx2,Z,alpha=0.3,cmap=cmap)
plt.xlim(xx1.min(),xx1.max())
plt.ylim(xx2.min(),xx2.max())
# plot class samples
for idx,cl in enumerate(np.unique(y)):
plt.scatter(x=X[y==cl,0],
y = X[y==cl,1],
alpha=0.8,
c=colors[idx],
marker = markers[idx],
label=cl,
edgecolors='black')
#逻辑回归 由于标签有三类,特征有2个,因此截距和系数也有三对
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(C=100.0,random_state=1)
lr.fit(X_train_std,y_train)
print("Class:",lr.classes_)
print("Coef:",lr.coef_)
print("intercept",lr.intercept_)
print("n_iter",lr.n_iter_)
'''
Class: [0 1 2]
Coef: [[-5.61268224 -4.30718677]
[ 2.40969576 -2.07325711]
[ 9.51524418 5.39484899]]
intercept [-5.8391281 -0.75730853 -9.21167569]
n_iter [9]
'''
plot_decision_region(X_train_std,y_train,classifier=lr,resolution=0.02)
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()
# 预测
## 预测前三样本在各个类别的概率
print("前三样本在各个类别的预测概率为:\n",lr.predict_proba(X_test_std[:3,:]))
print("\n============================")
## 获得前三个样本的分类标签
print("\n前三样本在各个类别的预测类别为:\n",lr.predict(X_test_std[:3,:]))
print("\n============================")
'''
前三样本在各个类别的预测概率为:
[[3.17983737e-08 1.44886616e-01 8.55113353e-01]
[8.33962295e-01 1.66037705e-01 4.55557009e-12]
[8.48762934e-01 1.51237066e-01 4.63166788e-13]]
============================
前三样本在各个类别的预测类别为:
[2 0 0]
============================
'''
推荐阅读
-
详解Linux挂载命令mount的用法和参数解析
-
详解Python scipy.signal.windows.gaussian的使用方法-参数解析
-
详解Ty2y平台配置参数的JavaScript混淆加密方式
-
Sklearn LogisticRegression回归算法参数全面解析
-
Python使用逻辑回归提示FutureWarning:Specify a solver to silence warning
-
逻辑回归参数详解
-
机器学习:LR逻辑回归(实战)
-
回归分析:逻辑斯蒂模型,多分类任务
-
逻辑回归用于讽刺文本检测
-
电信行业客户流失预测方法比较:KNN、朴素贝叶斯、逻辑回归、LDA/QDA、随机森林、支持向量机、CART、神经网络