import matplotlib.pyplot as plt
import numpy as np
import matplotlib as mpl
import scipy
from collections import defaultdict
import csv
import os
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import minmax_scale, MinMaxScaler
from sklearn.pipeline import Pipeline
mpl.use("TkAgg")
data_filename = "data/ionosphere.data"
data = pd.read_csv(data_filename, header=None)
X = np.array(data.iloc[:, :-1], dtype=float)
y = np.array(data.iloc[:, -1] == 'g', dtype=int)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=14)
estimator = KNeighborsClassifier()
estimator.fit(X_train, y_train)
y_predicted = estimator.predict(X_test)
accuracy = np.mean(y_test == y_predicted)
print("K近邻算法正确率:{:.2f}%".format(accuracy*100))
# K折交叉检验
for i in range(1, 21):
estimator.n_neighbors = i
scores = cross_val_score(estimator, X, y, scoring='accuracy')
average_accuracy = np.mean(scores)
print("{:.2f}".format(average_accuracy))
# 数据预处理
X_transformed = minmax_scale(X)
for i in range(1, 21):
estimator.n_neighbors = i
scores = cross_val_score(estimator, X, y, scoring='accuracy')
scores_transformed = cross_val_score(estimator, X_transformed, y, scoring='accuracy')
average_accuracy = np.mean(scores)
average_accuracy_transformed = np.mean(scores_transformed)
print("处理前:{:.3f},处理后{:.3f}".format(average_accuracy,average_accuracy_transformed))
# 流水线
scaling_pipeline = Pipeline(steps=[('scale', MinMaxScaler()), ('predict', KNeighborsClassifier(n_neighbors=2))])
scores = cross_val_score(scaling_pipeline, X, y, scoring='accuracy')
average_accuracy = np.mean(scores)
print("{:.2f}%".format(average_accuracy * 100))
评论区