|
- # 余弦距离
- import numpy as np
- def cosine_distance(a, b):
- if a.shape != b.shape:
- raise RuntimeError("array {} shape not match {}".format(a.shape, b.shape))
- if a.ndim==1:
- # 向量的模 长度 欧式距离
- a_norm = np.linalg.norm(a)
- b_norm = np.linalg.norm(b)
- elif a.ndim==2:
- a_norm = np.linalg.norm(a, axis=1, keepdims=True)
- b_norm = np.linalg.norm(b, axis=1, keepdims=True)
- else:
- raise RuntimeError("array dimensions {} not right".format(a.ndim))
- similiarity = np.dot(a, b.T)/(a_norm * b_norm)
- dist = 1. - similiarity
- return dist
- a = np.array([1, 2, 3])
- b = np.array([4, 5, 6])
- # 余弦相似度的取值范围在-1到1之间。完全相同时数值为1,相反反向时为-1,正交或不相关是为0
- # 余弦距离 = 1- 余弦相似度
- # 欧式距离用于相似度检索更符合直觉。因此在使用时,需要将余弦相似度转化成类似于欧氏距离的余弦距离
- print("向量的余弦距离:", cosine_distance(a, b))
- aa = np.array([[1, 2, 3], [4, 5, 6]])
- bb = np.array([[3, 2, 4], [5, 8, 9]])
- print("矩阵的余弦距离:", cosine_distance(aa, bb))
复制代码
|
|