算法简单,原理就不介绍了,直接上代码:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
 
'''
file: kNN.py
author: xjump.me#at#gmail#dot#com
'''

import numpy as np
import operator

def kNN_classify(test_vector, train_data_set, labels, k, dim):
    dim = int(dim)
    assert(dim>0)    
    train_data_set_size = train_data_set.shape[0]
    diff = np.tile(test_vector, (train_data_set_size,1))-train_data_set
    sq_diff = diff**dim
    if dim==1: sq_diff = np.abs(sq_diff)
    sq_distance = sq_diff.sum(axis=1)
    distances = sq_distance**(1.0/dim)
    sorted_distance_indexs = distances.argsort()
    #print sorted_distance_indexs
    #print distances
    classes={}
    for i in range(k):
        vote_label = labels[sorted_distance_indexs[i]]
        classes[vote_label] = classes.get(vote_label,0) + 1
    sorted_classes= sorted(classes.iteritems(), key=operator.itemgetter(1), reverse=True)
    #print classes
    return sorted_classes[0][0]
    
def kNN_classify_Euclidian(test_vector, train_data_set, labels, k):
    return kNN_classify(test_vector, train_data_set, labels, k, 2)
    
def kNN_classify_Manhattan(test_vector, train_data_set, labels, k):
    return kNN_classify(test_vector, train_data_set, labels, k, 1)
    
if __name__=="__main__":
    v0 = np.array([1,2,3,4,5,6])
    tds = np.array([
        [1.2,3,6,7,3,2],
        [2,9,17,7,6,59],
        [1.2,44,6,3,3,23],
        [9,3,51,7,3,100],
        [18,4,39,7,3,21],
        [66,8,28,7,3,88],
        [3,1,2,7,3,33],
        [24,0.5,1,7,3,56],
        [22,99,7,7,3,0.6],
        [70,13,9,7,3,2],
    ])
    labels = ['A','B','C','D','C','B','A','C','D','B']
    for k in range(1,len(labels)):
        print "==================Euclidian===================="
        print 'k=',k
        print kNN_classify_Euclidian(v0,tds,labels,k)
        
        print "==================Manhattan===================="
        print 'k=',k
        print kNN_classify_Manhattan(v0,tds,labels,k)