算法简单,原理就不介绍了,直接上代码:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
 
'''
file: kNN.py
author: xjump.me#at#gmail#dot#com
'''

import numpy as np
import operator

def kNN_classify(test_vector, train_data_set, labels, k, dim):
  dim = int(dim)
  assert(dim>0)  
  train_data_set_size = train_data_set.shape[0]
  diff = np.tile(test_vector, (train_data_set_size,1))-train_data_set
  sq_diff = diff**dim
  if dim==1: sq_diff = np.abs(sq_diff)
  sq_distance = sq_diff.sum(axis=1)
  distances = sq_distance**(1.0/dim)
  sorted_distance_indexs = distances.argsort()
  #print sorted_distance_indexs
  #print distances
  classes={}
  for i in range(k):
    vote_label = labels[sorted_distance_indexs[i]]
    classes[vote_label] = classes.get(vote_label,0) + 1
  sorted_classes= sorted(classes.iteritems(), key=operator.itemgetter(1), reverse=True)
  #print classes
  return sorted_classes[0][0]
  
def kNN_classify_Euclidian(test_vector, train_data_set, labels, k):
  return kNN_classify(test_vector, train_data_set, labels, k, 2)
  
def kNN_classify_Manhattan(test_vector, train_data_set, labels, k):
  return kNN_classify(test_vector, train_data_set, labels, k, 1)
  
if __name__=="__main__":
  v0 = np.array([1,2,3,4,5,6])
  tds = np.array([
    [1.2,3,6,7,3,2],
    [2,9,17,7,6,59],
    [1.2,44,6,3,3,23],
    [9,3,51,7,3,100],
    [18,4,39,7,3,21],
    [66,8,28,7,3,88],
    [3,1,2,7,3,33],
    [24,0.5,1,7,3,56],
    [22,99,7,7,3,0.6],
    [70,13,9,7,3,2],
  ])
  labels = ['A','B','C','D','C','B','A','C','D','B']
  for k in range(1,len(labels)):
    print "==================Euclidian===================="
    print 'k=',k
    print kNN_classify_Euclidian(v0,tds,labels,k)
    
    print "==================Manhattan===================="
    print 'k=',k
    print kNN_classify_Manhattan(v0,tds,labels,k)