机器学习算法:最近邻(KNN) 发表于 2017-07-25 | 分类于 机器学习 | 阅读次数 机器学习算法:最近邻(KNN) 原理算法tensorflowsklearn123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869import numpy as npimport pandas as pdfrom sklearn.datasets import load_breast_cancerfrom sklearn.model_selection import train_test_splitfrom sklearn.neighbors import KNeighborsClassifiercancer = load_breast_cancer()print(cancer.DESCR)def convertToDataframe():feature = cancer['feature_names']df = pd.DataFrame(cancer.data,index=range(569),columns=feature)df['target'] = cancer.targetreturn dfprint("Now convert the dataset into dataframe:\n")cancerdf = convertToDataframe()print(cancerdf.head())def classDistribution(cancerdf):malignant = len(cancerdf[cancerdf['target']==0])benign = len(cancerdf[cancerdf['target']==1])index = ['malignant', 'benign']target = pd.Series([malignant,benign],index=index)return targetdef splitDataset(cancerdf):X = cancerdf[cancerdf.columns[:30]]y = cancerdf[cancerdf.columns[30]]X_train,X_test, y_train, y_test = train_test_split(X,y,random_state = 0)return X_train, X_test, y_train, y_testdef trainKNN(feature_data,class_data,k):knn = KNeighborsClassifier(n_neighbors=k)knn.fit(feature_data,class_data)return knnprint("\nThe class distribution is:\n")print(classDistribution(cancerdf))print("\nUsing train_test_split, split X and y into training and test sets (X_train, X_test, y_train, and y_test).using random_state=0\n")X_train, X_test, y_train, y_test = splitDataset(cancerdf)print("X_train: " ,X_train.shape)print("X_test: " ,X_test.shape)print("y_train: " ,y_train.shape)print("y_test: ", y_test.shape)print("Now, training the Knn model: done!\n")knn = trainKNN(X_train,y_train,1)print("Predict the class label using the mean value for each feature.\n")print("The mean value for each feature.\n")means = cancerdf.mean()print(means)means = means[:-1].values.reshape(1, -1)means_predict = knn.predict(means)print("The predict result : ",means_predict)print("Predict the class labels for the test set X_test")test_predict = knn.predict(X_test)print("The prediction result :\n",test_predict)print("The prediction result accuracy : ",knn.score(X_test,y_test)) 微信扫码加入知识星球【漏洞百出】 点击图片放大,扫码知识星球【漏洞百出】