期望值,方差,标准差,协方差,相关系数
package com.chipmunk.java.base; public class AlgorithmUtil { /** * 平均值 */ public static double getAverageValue(double[]arr){ int len = arr.length; double sum = 0; for (double d : arr) { sum+=d; } double av = sum/len; return av; } /** * 方差:样本方差是n-1,总体方差是n */ public static double getVariance(double[]arr){ double av = getAverageValue(arr); double sum = 0; for (double d : arr) { double c = d-av; double p = Math.pow(c, 2); // System.out.println(c+"--"+p); sum+=p; } int len = arr.length; double v = sum/len;//样本方差是len-1,总体方差是len return v; } /** * 标准差或均方差 */ public static double getStandardDeviation(double[]arr){ double v = getVariance(arr); double sd = Math.sqrt(v); return sd; } /** * 期望值E(X)=sum[i:1~n]Xi*Pi * @param arr * @return */ public static double getExpectation(double[]arr_x,double[]arr_p){ int len_x = arr_x.length; int len_p = arr_p.length; double ex = 0; if (len_x==len_p) { for (int i = 0; i < len_x; i++) { double x = arr_x[i]; double p = arr_p[i]; ex+=x*p; } } return ex; } /** * 协方差cov(X,Y)=E[XY]-E[X]E[Y] * @param arr * Cov(X,X)=D(X),Cov(Y,Y)=D(Y)。 * 此处期望值是平均数 * @return */ public static double getCovariance(double[]arr_a,double[]arr_b){ double ex_a = getAverageValue(arr_a);//期望值是平均数 double ex_b = getAverageValue(arr_b); // System.out.println(ex_a); // System.out.println(ex_b); double ex_ab = 0; int len_a = arr_a.length; int len_b = arr_b.length; if (len_a==len_b) { for (int i = 0; i < len_a; i++) { double a = arr_a[i]; double b = arr_b[i]; ex_ab+=a*b; } ex_ab=ex_ab/len_a; } // System.out.println(ex_ab); double cov = ex_ab-ex_a*ex_b; return cov; } /** * * 相关系数 correlation coefficient * Pxy=cov(X,Y)/sqrt(D(X)*D(Y)) * 公式中Cov(X,Y)为X,Y的协方差,D(X)、D(Y)分别为X、Y的方差。 * * 相关系数(r)的定义如下图所示,取值范围为[-1,1],r>0表示正相关,r<0表示负相关,|r|表示了变量之间相关程度的高低。 * 特殊地,r=1称为完全正相关,r=-1称为完全负相关,r=0称为不相关。通常|r|大于0.8时,认为两个变量有很强的线性相关性。 * */ public static double getCorrelationCoefficient(double[]arr_a,double[]arr_b){ double cov = getCovariance(arr_a, arr_b); double dx_a = getVariance(arr_a); double dx_b = getVariance(arr_b); double c = Math.sqrt(dx_a*dx_b); double pxy = cov/c; return pxy; } public static void main(String[] args) { double[]arr1 = new double[]{1.2,2.3,4.2,6.3,2.6}; double[]arr2 = new double[]{1.2,1.3,1.2,1.3,1.6}; double[]arr3 = new double[]{2.4,2.6,2.4,2.6,5.2}; // System.out.println(getVariance(arr1)); System.out.println(getCovariance(arr1, arr2)); System.out.println(getCorrelationCoefficient(arr1, arr2)); } }