kNN分类例子
算法简单,原理就不介绍了,直接上代码:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
file: kNN.py
author: xjump.me#at#gmail#dot#com
'''
import numpy as np
import operator
def kNN_classify(test_vector, train_data_set, labels, k, dim):
dim = int(dim)
assert(dim>0)
train_data_set_size = train_data_set.shape[0]
diff = np.tile(test_vector, (train_data_set_size,1))-train_data_set
sq_diff = diff**dim
if dim==1: sq_diff = np.abs(sq_diff)
sq_distance = sq_diff.sum(axis=1)
distances = sq_distance**(1.0/dim)
sorted_distance_indexs = distances.argsort()
#print sorted_distance_indexs
#print distances
classes={}
for i in range(k):
vote_label = labels[sorted_distance_indexs[i]]
classes[vote_label] = classes.get(vote_label,0) + 1
sorted_classes= sorted(classes.iteritems(), key=operator.itemgetter(1), reverse=True)
#print classes
return sorted_classes[0][0]
def kNN_classify_Euclidian(test_vector, train_data_set, labels, k):
return kNN_classify(test_vector, train_data_set, labels, k, 2)
def kNN_classify_Manhattan(test_vector, train_data_set, labels, k):
return kNN_classify(test_vector, train_data_set, labels, k, 1)
if __name__=="__main__":
v0 = np.array([1,2,3,4,5,6])
tds = np.array([
[1.2,3,6,7,3,2],
[2,9,17,7,6,59],
[1.2,44,6,3,3,23],
[9,3,51,7,3,100],
[18,4,39,7,3,21],
[66,8,28,7,3,88],
[3,1,2,7,3,33],
[24,0.5,1,7,3,56],
[22,99,7,7,3,0.6],
[70,13,9,7,3,2],
])
labels = ['A','B','C','D','C','B','A','C','D','B']
for k in range(1,len(labels)):
print "==================Euclidian===================="
print 'k=',k
print kNN_classify_Euclidian(v0,tds,labels,k)
print "==================Manhattan===================="
print 'k=',k
print kNN_classify_Manhattan(v0,tds,labels,k)