-
Notifications
You must be signed in to change notification settings - Fork 1
/
kNNlibrary.py
64 lines (54 loc) · 1.87 KB
/
kNNlibrary.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#-*- coding: utf-8 -*-
import csv
import math
import operator
def loadDataset(trainfn, testfn, trainingSet=[], testSet=[]):
with open(trainfn, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
#print dataset
for x in range(len(dataset)):
for y in range(3):
dataset[x][y] = int(dataset[x][y])
trainingSet.append(dataset[x])
#print trainingSet
with open(testfn, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)):
for y in range(3):
dataset[x][y] = int(dataset[x][y])
testSet.append(dataset[x])
#print testSet
def euclideanDistance(instance1, instance2, length):
distance = 0
for x in range(length):
distance += pow((instance1[x] - instance2[x]), 2)
return math.sqrt(distance)
def getNeighbors(trainingSet, testInstance, k):
distances = []
length = len(testInstance) - 1
for x in range(len(trainingSet)):
dist = euclideanDistance(testInstance, trainingSet[x], length)
distances.append((trainingSet[x], dist))
distances.sort(key=operator.itemgetter(1))
neighbors = []
for x in range(k):
neighbors.append(distances[x][0])
return neighbors
def getResponse(neighbors):
classVotes = {}
for x in range(len(neighbors)):
response = neighbors[x][-1]
if response in classVotes:
classVotes[response] += 1
else:
classVotes[response] = 1
sortedVotes = sorted(classVotes.iteritems(), key=operator.itemgetter(1), reverse=True)
return sortedVotes[0][0]
def getAccuracy(testSet, predictions):
correct = 0
for x in range(len(testSet)):
if testSet[x][-1] == predictions[x]:
correct += 1
return (correct / float(len(testSet))) * 100.0