在NLP自然语言处理中,常常会有推荐相关的算法,这就免不了相关的距离计算公式计算两个向量之间的举例。这里用Java语言实现余弦定理和修正余弦,直接上代码~
【余弦定理】
package com.xzw.cos; import java.util.Map; import java.util.Map.Entry; /** * 相似度计算,余弦定理Java实现 * @author xzw * */ public class Cos { public static double getTwoVectorsSimilar(Map<String, Double> src, Map<String, Double> dest){ double score = 0; if (src.size() == 0 || dest.size() == 0) { return 0; } double v1 = 0, v2 = 0, fenzi = 0, fmOne = 0, fmTwo = 0; for (Entry<String, Double> item : src.entrySet()) { v1 = item.getValue(); fmOne += v1 * v1; if (dest.containsKey(item.getKey())) { v2 = dest.remove(item.getKey()); fmTwo += v2 * v2; fenzi += v1 * v2; } } for (double dv : dest.values()) { fmTwo += dv * dv; } dest.clear(); dest = null; if (fmOne == 0 || fmTwo == 0) { return 0; }else { score = fenzi / Math.sqrt(fmOne * fmTwo); } return score; } }
【修正余弦】
package com.xzw.cos; import java.util.Map; import java.util.Map.Entry; /** * 相似度计算,修正余弦Java实现 * @author xzw * */ public class NodCos { public static double getSim(Map<String,Double> src, Map<String,Double> dest){ double score = 0; double avgs = 0; double avgd = 0; if(src.size() == 0 || dest.size() == 0){ return 0; } for( double v :src.values()){ avgs += v; } avgs = avgs / src.size(); for( double v :dest.values()){ avgd += v; } avgd = avgd / dest.size(); double v1 = 0, v2 = 0, fz = 0, fm1 = 0, fm2 = 0; for (Entry<String, Double> items : src.entrySet()) { v1 = items.getValue(); fm1 += (v1 - avgs) * (v1-avgs); if(dest.containsKey(items.getKey())){ v2 = dest.remove(items.getKey()); fm2 += (v2 - avgd) * (v2 - avgd); fz += (v1 - avgs) * (v2 - avgd); } } for(double dv : dest.values()){ fm2 += (dv - avgd) * (dv - avgd); } dest.clear(); dest = null; if(fm1 == 0 || fm2 == 0){ return 0; } else{ score = fz / Math.sqrt((fm1 * fm2)); } return score; } }