Chybeta

机器学习算法:最近邻(KNN)

机器学习算法:最近邻(KNN)

原理

算法

tensorflow

sklearn

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
cancer = load_breast_cancer()
print(cancer.DESCR)
def convertToDataframe():
feature = cancer['feature_names']
df = pd.DataFrame(cancer.data,index=range(569),columns=feature)
df['target'] = cancer.target
return df
print("Now convert the dataset into dataframe:\n")
cancerdf = convertToDataframe()
print(cancerdf.head())
def classDistribution(cancerdf):
malignant = len(cancerdf[cancerdf['target']==0])
benign = len(cancerdf[cancerdf['target']==1])
index = ['malignant', 'benign']
target = pd.Series([malignant,benign],index=index)
return target
def splitDataset(cancerdf):
X = cancerdf[cancerdf.columns[:30]]
y = cancerdf[cancerdf.columns[30]]
X_train,X_test, y_train, y_test = train_test_split(X,y,random_state = 0)
return X_train, X_test, y_train, y_test
def trainKNN(feature_data,class_data,k):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(feature_data,class_data)
return knn
print("\nThe class distribution is:\n")
print(classDistribution(cancerdf))
print("\nUsing train_test_split, split X and y into training and test sets (X_train, X_test, y_train, and y_test).using random_state=0\n")
X_train, X_test, y_train, y_test = splitDataset(cancerdf)
print("X_train: " ,X_train.shape)
print("X_test: " ,X_test.shape)
print("y_train: " ,y_train.shape)
print("y_test: ", y_test.shape)
print("Now, training the Knn model: done!\n")
knn = trainKNN(X_train,y_train,1)
print("Predict the class label using the mean value for each feature.\n")
print("The mean value for each feature.\n")
means = cancerdf.mean()
print(means)
means = means[:-1].values.reshape(1, -1)
means_predict = knn.predict(means)
print("The predict result : ",means_predict)
print("Predict the class labels for the test set X_test")
test_predict = knn.predict(X_test)
print("The prediction result :\n",test_predict)
print("The prediction result accuracy : ",knn.score(X_test,y_test))
微信扫码加入知识星球【漏洞百出】
chybeta WeChat Pay

点击图片放大,扫码知识星球【漏洞百出】