sklearn中支持向量机(SVM)用于分类

摘要:本文使用SVM进行分类;

00 获取sklearn中鸢尾花数据

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, svm

iris=datasets.load_iris()
dex1=np.random.choice(150,size=120,replace=False)
dex2=[]
for i in range(150):
    if i not in dex1:
        dex2.append(i)
train_x=iris.data[dex1,:]
train_y=iris.target[dex1]
test_x=iris.data[dex2,:]
test_y=iris.target[dex2]

01 SVM线性分类

classi=svm.LinearSVC(max_iter=5000)
classi.fit(train_x,train_y)
classi.coef_
Out[21]:
array([[ 0.18423835,  0.45122936, -0.80794123, -0.4507153 ],
       [ 0.07950217, -0.8192085 ,  0.42309252, -0.99830954],
       [-0.96954958, -0.97387786,  1.54891257,  1.68449995]])

classi.intercept_
Out[22]: array([ 0.109562  ,  1.31266596, -1.50936256])

classi.n_iter_
Out[23]: 3038

classi.score(test_x,test_y)
Out[24]: 0.9666666666666667

classi.predict(test_x)
Out[25]:
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2])

no predic_proba

考察参数loss的影响:

 classi=svm.LinearSVC(loss='squared_hinge',max_iter=5000)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.n_iter_)
print(classi.score(test_x,test_y))
[[ 0.0674815   0.5778595  -0.70756411 -0.42914531]
 [ 0.06964455 -1.02770961  0.23367496 -0.59335033]
 [-0.77073817 -1.06192286  1.38308227  1.60386805]]
[ 0.05251698  2.22466216 -1.55255915]
3076
1.0

classi=svm.LinearSVC(loss='hinge',max_iter=100000)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.n_iter_)
print(classi.score(test_x,test_y))
[[ 0.09327859  0.68619761 -0.90122056 -0.46567814]
 [ 0.59591561 -1.75722195  0.2309207  -1.11337723]
 [-1.13269167 -1.21043708  1.79676509  2.10920917]]
[ 0.02469279  1.71864239 -1.67720285]
19790
0.9666666666666667

考察参数penalt的影响:

classi=svm.LinearSVC(penalty='l1',dual=False,max_iter=100000)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.n_iter_)
print(classi.score(test_x,test_y))
[[ 0.          0.71088514 -0.82089186  0.        ]
 [ 0.         -1.18510039  0.18205107 -0.44226839]
 [-0.62838108 -1.03256742  1.36785984  1.78641921]]
[ 0.          3.12346065 -2.76267249]
2098
1.0

classi=svm.LinearSVC(penalty='l2',dual=False,max_iter=100000)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.n_iter_)
print(classi.score(test_x,test_y))
[[ 0.06418587  0.5804145  -0.70566552 -0.42951467]
 [ 0.06982957 -1.02744762  0.23370738 -0.59354675]
 [-0.77077082 -1.06191754  1.38312325  1.60387392]]
[ 0.05723343  2.22288996 -1.55258049]
8
1.0

考察参数C的影响:

cs=np.logspace(-2,1,100)
scor=[]
for C in cs:
    classi=svm.LinearSVC(C=C,max_iter=100000)
    classi.fit(train_x,train_y)
    scor.append(classi.score(test_x,test_y))
plt.plot(cs,scor)
plt.xscale('log')

sklearn中支持向量机(SVM)用于分类的图1


02 SVM非线性分类

classi=svm.SVC(kernel='linear',max_iter=-1)
classi.fit(train_x,train_y)
print(classi.coef_)
print(classi.intercept_)
print(classi.score(test_x,test_y))
[[-0.04617041  0.52139469 -1.00309152 -0.46414917]
 [-0.00709388  0.17889062 -0.53842766 -0.29225126]
 [ 0.41433436  0.33921135 -2.05263433 -1.87171831]]
[1.45194667 1.50728785 9.70602451]
0.9666666666666667

在kernel=‘‘poly’’下,考察degree,gamma,coef0对模型预测性能的影响:

degrees=range(1,20)
scor=[]
for degree in degrees:
    classi=svm.SVC(kernel='poly',degree=degree,gamma='auto')
    classi.fit(train_x,train_y)
    scor.append(classi.score(test_x,test_y))
plt.plot(degrees,scor)
plt.ylim(0,1.1)

sklearn中支持向量机(SVM)用于分类的图2gammas=range(1,20)
scor=[]
for gamma in gammas:
    classi=svm.SVC(kernel='poly',degree=3,gamma=gamma)
    classi.fit(train_x,train_y)
    scor.append(classi.score(test_x,test_y))
plt.plot(gammas,scor)
plt.ylim(0,1.1)

sklearn中支持向量机(SVM)用于分类的图3coef0s=range(0,20)
scor=[]
for coef0 in coef0s:
    classi=svm.SVC(kernel='poly',degree=3,gamma=10,coef0=coef0)
    classi.fit(train_x,train_y)
    scor.append(classi.score(test_x,test_y))
plt.plot(coef0s,scor)
plt.ylim(0,1.1)
sklearn中支持向量机(SVM)用于分类的图4

考察kernel=‘rbf’时,gamma对模型预测性能的影响:

gammas=range(1,20)
scor=[]
for gamma in gammas:
    classi=svm.SVC(kernel='rbf',gamma=gamma)
    classi.fit(train_x,train_y)
    scor.append(classi.score(test_x,test_y))
plt.plot(gammas,scor)
plt.ylim(0,1.1)

sklearn中支持向量机(SVM)用于分类的图5

考察kernel=‘sigmoid’时,gamma,coef0对模型预测性能的影响:

gammas=np.logspace(-2,1,20)
scor=[]
for gamma in gammas:
    classi=svm.SVC(kernel='sigmoid',gamma=gamma,coef0=0)
    classi.fit(train_x,train_y)
    scor.append(classi.score(test_x,test_y))
plt.plot(gammas,scor)
plt.ylim(0,1.1)
plt.xscale('log')

sklearn中支持向量机(SVM)用于分类的图6

coef0s=np.linspace(0,5,20)
scor=[]
for coef0 in coef0s:
    classi=svm.SVC(kernel='sigmoid',gamma=0.01,coef0=coef0)
    classi.fit(train_x,train_y)
    scor.append(classi.score(test_x,test_y))
plt.plot(coef0s,scor)
plt.ylim(0,1.1)

sklearn中支持向量机(SVM)用于分类的图7

默认 最新
当前暂无评论,小编等你评论哦!
点赞 2 评论 收藏
关注