sklearn中支持向量机(SVM)用于分类
摘要:本文使用SVM进行分类;
00 获取sklearn中鸢尾花数据
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, svm
iris=datasets.load_iris()
dex1=np.random.choice(150,size=120,replace=False)
dex2=[]
for i in range(150):
if i not in dex1:
dex2.append(i)
train_x=iris.data[dex1,:]
train_y=iris.target[dex1]
test_x=iris.data[dex2,:]
test_y=iris.target[dex2]
01 SVM线性分类
classi=svm.LinearSVC(max_iter=5000)
classi.fit(train_x,train_y)
classi.coef_
Out[21]:
array([[ 0.18423835, 0.45122936, -0.80794123, -0.4507153 ],
[ 0.07950217, -0.8192085 , 0.42309252, -0.99830954],
[-0.96954958, -0.97387786, 1.54891257, 1.68449995]])
classi.intercept_
Out[22]: array([ 0.109562 , 1.31266596, -1.50936256])
classi.n_iter_
Out[23]: 3038
classi.score(test_x,test_y)
Out[24]: 0.9666666666666667
classi.predict(test_x)
Out[25]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2])
no predic_proba
考察参数loss的影响:
classi=svm.LinearSVC(loss='squared_hinge',max_iter=5000)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.n_iter_)
print(classi.score(test_x,test_y))
[[ 0.0674815 0.5778595 -0.70756411 -0.42914531]
[ 0.06964455 -1.02770961 0.23367496 -0.59335033]
[-0.77073817 -1.06192286 1.38308227 1.60386805]]
[ 0.05251698 2.22466216 -1.55255915]
3076
1.0
classi=svm.LinearSVC(loss='hinge',max_iter=100000)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.n_iter_)
print(classi.score(test_x,test_y))
[[ 0.09327859 0.68619761 -0.90122056 -0.46567814]
[ 0.59591561 -1.75722195 0.2309207 -1.11337723]
[-1.13269167 -1.21043708 1.79676509 2.10920917]]
[ 0.02469279 1.71864239 -1.67720285]
19790
0.9666666666666667
考察参数penalt的影响:
classi=svm.LinearSVC(penalty='l1',dual=False,max_iter=100000)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.n_iter_)
print(classi.score(test_x,test_y))
[[ 0. 0.71088514 -0.82089186 0. ]
[ 0. -1.18510039 0.18205107 -0.44226839]
[-0.62838108 -1.03256742 1.36785984 1.78641921]]
[ 0. 3.12346065 -2.76267249]
2098
1.0
classi=svm.LinearSVC(penalty='l2',dual=False,max_iter=100000)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.n_iter_)
print(classi.score(test_x,test_y))
[[ 0.06418587 0.5804145 -0.70566552 -0.42951467]
[ 0.06982957 -1.02744762 0.23370738 -0.59354675]
[-0.77077082 -1.06191754 1.38312325 1.60387392]]
[ 0.05723343 2.22288996 -1.55258049]
8
1.0
考察参数C的影响:
cs=np.logspace(-2,1,100)
scor=[]
for C in cs:
classi=svm.LinearSVC(C=C,max_iter=100000)
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(cs,scor)
plt.xscale('log')
02 SVM非线性分类
classi=svm.SVC(kernel='linear',max_iter=-1)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.score(test_x,test_y))
[[-0.04617041 0.52139469 -1.00309152 -0.46414917]
[-0.00709388 0.17889062 -0.53842766 -0.29225126]
[ 0.41433436 0.33921135 -2.05263433 -1.87171831]]
[1.45194667 1.50728785 9.70602451]
0.9666666666666667
在kernel=‘‘poly’’下,考察degree,gamma,coef0对模型预测性能的影响:
degrees=range(1,20)
scor=[]
for degree in degrees:
classi=svm.SVC(kernel='poly',degree=degree,gamma='auto')
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(degrees,scor)
plt.ylim(0,1.1)
gammas=range(1,20)
scor=[]
for gamma in gammas:
classi=svm.SVC(kernel='poly',degree=3,gamma=gamma)
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(gammas,scor)
plt.ylim(0,1.1)
coef0s=range(0,20)
scor=[]
for coef0 in coef0s:
classi=svm.SVC(kernel='poly',degree=3,gamma=10,coef0=coef0)
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(coef0s,scor)
plt.ylim(0,1.1)
考察kernel=‘rbf’时,gamma对模型预测性能的影响:
gammas=range(1,20)
scor=[]
for gamma in gammas:
classi=svm.SVC(kernel='rbf',gamma=gamma)
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(gammas,scor)
plt.ylim(0,1.1)
考察kernel=‘sigmoid’时,gamma,coef0对模型预测性能的影响:
gammas=np.logspace(-2,1,20)
scor=[]
for gamma in gammas:
classi=svm.SVC(kernel='sigmoid',gamma=gamma,coef0=0)
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(gammas,scor)
plt.ylim(0,1.1)
plt.xscale('log')
coef0s=np.linspace(0,5,20)
scor=[]
for coef0 in coef0s:
classi=svm.SVC(kernel='sigmoid',gamma=0.01,coef0=coef0)
classi.fit(train_x,train_y)
scor.append(classi.score(test_x,test_y))
plt.plot(coef0s,scor)
plt.ylim(0,1.1)