向量之间的相似度和距离计算

/*
特征向量相似度和距离的计算
相似度:
·夹角余弦
·相关系数
·Dice
·Jaccard
距离
·明氏距离
·欧氏距离
·马氏距离
·Jffreys & Matusita 距离
·Mahalanobis 距离,未实现,协方差矩阵
·Camberra 距离(Lance 距离,Williams 距离)
*/
 
#include <iostream>
#include <vector>
#include <cassert>
#include <cmath>
using namespace std;
 
double dotProduct(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    double ret = 0.0;
    for (vector<double>::size_type i = 0; i != v1.size(); ++i)
    {
        ret += v1[i] * v2[i];
    }
    return ret;
}
 
double module(const vector<double>& v)
{
    double ret = 0.0;
    for (vector<double>::size_type i = 0; i != v.size(); ++i)
    {
        ret += v[i] * v[i];
    }
    return sqrt(ret);
}
 
// 夹角余弦
double cosine(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    return dotProduct(v1, v2) / (module(v1) * module(v2));
}
 
double mean(const vector<double>& v)
{
    assert(v.size() != 0);
    double ret = 0.0;
    for (vector<double>::size_type i = 0; i != v.size(); ++i)
    {
        ret += v[i];
    }
    return ret / v.size();
}
 
double cov(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size() && v1.size() > 1);
    double ret = 0.0;
    double v1a = mean(v1), v2a = mean(v2);
 
    for (vector<double>::size_type i = 0; i != v1.size(); ++i)
    {
            ret += (v1[i] - v1a) * (v2[i] - v2a);
    }
 
    return ret / (v1.size() - 1);
}
 
// 相关系数
double coefficient(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    return cov(v1, v2) / sqrt(cov(v1, v1) * cov(v2, v2));
}
 
// Dice 系数
double dice(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    return 2.0 * dotProduct(v1, v2) / (dotProduct(v1, v1) + dotProduct(v2, v2));
}
 
// Jaccard 系数
double jaccard(const vector<double>& v1, const vector<double>& v2)
{
        assert(v1.size() == v2.size());
        return dotProduct(v1, v2) / (dotProduct(v1, v2) + dotProduct(v2, v2) - dotProduct(v1, v2));
}
 
// Minkowsky 距离
double minkowsky(const vector<double>& v1, const vector<double>& v2, double m)
{
    assert(v1.size() == v2.size());
    double ret = 0.0;
    for (vector<double>::size_type i = 0; i != v1.size(); ++i)
    {
            ret += pow(abs(v1[i] - v2[i]), m);
    }
    return pow(ret, 1.0 / m);
}
 
// Euclidean 距离
double euclidean(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    return minkowsky(v1, v2, 2.0);
}
 
// Manhattan 距离
double manhattan(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    return minkowsky(v1, v2, 1.0);
}
 
// Jffreys & Matusita 距离
double jffreysMatusita(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    double ret = 0.0;
    for (vector<double>::size_type i = 0; i != v1.size(); ++i)
    {
        ret += (sqrt(v1[i]) - sqrt(v2[i])) * (sqrt(v1[i]) - sqrt(v2[i]));
    }
    return sqrt(ret);
}
 
// Mahalanobis 距离
double mahalanobis(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    return 0.0;
}
 
// Camberra 距离(Lance 距离,Williams 距离)
double camberra(const vector<double>& v1, const vector<double>& v2)
{
    assert(v1.size() == v2.size());
    double ret = 0.0;
    for (vector<double>::size_type i = 0; i != v1.size(); ++i)
    {
        ret += abs(v1[i] - v2[i]) / abs(v1[i] + v2[i]);
    }
    return ret;
}
 
int main()
{
    double a[] = {1, 2, 3, 4, 5};
    double b[] = {5, 4, 3, 2, 1};
    vector<double> v1(a, a + sizeof (a) / sizeof (*a)), v2(b, b + sizeof (b) / sizeof (*b));
 
    cout << cosine(v1, v2) << endl;
    cout << coefficient(v1, v2) << endl;
    cout << dice(v1, v2) << endl;
    cout << jaccard(v1, v2) << endl;
 
    cout << minkowsky(v1, v2, 5.0) << endl;
    cout << euclidean(v1, v2) << endl;
    cout << manhattan(v1, v2) << endl;
    cout << jffreysMatusita(v1, v2) << endl;
    cout << mahalanobis(v1, v2) << endl;
    cout << camberra(v1, v2) << endl;
 
    return 0;
}
发布了210 篇原创文章 · 获赞 105 · 访问量 12万+

猜你喜欢

转载自blog.csdn.net/qq_30263737/article/details/100714331