-
Notifications
You must be signed in to change notification settings - Fork 18
/
06_sklearn_knn.py
56 lines (37 loc) · 1.35 KB
/
06_sklearn_knn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
'''
MACHINE LEARNING WITH KNN
'''
import pandas as pd
# read in the iris data
from sklearn.datasets import load_iris
iris = load_iris()
# create X (features) and y (response)
data = pd.read_csv('https://raw.githubusercontent.com/sinanuozdemir/SF_DAT_15/master/data/iris.csv')
X, y = data.drop('species', axis = 1), data['species']
X.shape
y.shape
# predict y with KNN
from sklearn.neighbors import KNeighborsClassifier # import class
knn = KNeighborsClassifier(n_neighbors=1) # instantiate the estimator
knn.fit(X, y) # fit with data
knn.predict([3, 5, 4, 2]) # predict for a new observation
# predict for multiple observations at once
X_new = [[3, 5, 4, 2], [3, 5, 2, 2]]
knn.predict(X_new)
# try a different value of K
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
knn.predict(X_new) # predictions
knn.predict_proba(X_new) # predicted probabilities
knn.kneighbors([3, 5, 4, 2]) # distances to nearest neighbors (and identities)
# compute the accuracy for K=5 and K=1
# K = 5
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
knn.score(X, y)
# the score function will return the accuracy of your prediction
# the number of correct prepdictions / the number of rows
# K = 1
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X, y)
knn.score(X, y)