seetaface 人脸识别 android 优化源码开放

申明:由于本人水平有限,所提供的代码、dll、so等必然存在很多问题,仅用于学习,不适合工业级使用,请谨慎使用,如果造成损失,责任自负。

基于seetaface的android实现,包含检测、对齐、比对,带自动遍历比对人脸,

JNI传递复杂对象,包括数组,

采用NEON优化提高处理速度,提供人脸旋转角度roll,pitch,yaw,

加载人脸识别模型改为只需加载一次,

在手机上提取特征并比对一次约需1.5~2秒,下图是模拟器的,只需要500多毫秒。

需要自己编译jni,并把3个.bin的模型文件放到手机的/sdcard/目录,也就是连上电脑后的根目录。

3个模型文件seeta_fa_v1.1.bin, seeta_fd_frontal_v1.0.bin,seeta_fr_v1.0.bin:

链接:http://pan.baidu.com/s/1geMDddD 密码:km1q


apk:http://download.csdn.net/detail/wuzuyu365/9843787

源码: 下载

jni编译好的so库:http://download.csdn.net/detail/wuzuyu365/9810605



neon优化的文件:math_functions.cpp

/*
 *
 * This file is part of the open-source SeetaFace engine, which includes three modules:
 * SeetaFace Detection, SeetaFace Alignment, and SeetaFace Identification.
 *
 * This file is part of the SeetaFace Identification module, containing codes implementing the
 * face identification method described in the following paper:
 *
 *   
 *   VIPLFaceNet: An Open Source Deep Face Recognition SDK,
 *   Xin Liu, Meina Kan, Wanglong Wu, Shiguang Shan, Xilin Chen.
 *   In Frontiers of Computer Science.
 *
 *
 * Copyright (C) 2016, Visual Information Processing and Learning (VIPL) group,
 * Institute of Computing Technology, Chinese Academy of Sciences, Beijing, China.
 *
 * The codes are mainly developed by Zining Xu(a M.S. supervised by Prof. Shiguang Shan)
 *
 * As an open-source face recognition engine: you can redistribute SeetaFace source codes
 * and/or modify it under the terms of the BSD 2-Clause License.
 *
 * You should have received a copy of the BSD 2-Clause License along with the software.
 * If not, see < https://opensource.org/licenses/BSD-2-Clause>.
 *
 * Contact Info: you can send an email to [email protected] for any problems. 
 *
 * Note: the above information must be kept whenever or wherever the codes are used.
 *
 */

#include "math_functions.h"
//#include <xmmintrin.h>
#include "arm_neon.h"
#include <cstdint>
 
//neon process
//采用neon单指令流多数据流SIMD优化内积运算,可以减少1秒时间
float simd_dot(const float* src1, const float* src2, const long& count) {
    long i = 0;

	float32x4_t sum_vec = vdupq_n_f32(0);
	for (; i <count - 3 ; i+=4){
		float32x4_t data_a = vld1q_f32(&src1[i]);
		float32x4_t data_b = vld1q_f32(&src2[i]);
		sum_vec = vaddq_f32(sum_vec, vmulq_f32(data_a, data_b));
	}

	float sum = sum_vec[0] + sum_vec[1] + sum_vec[2] + sum_vec[3];

	//累加剩下的
	for (; i < count; i++){
		sum += src1[i] * src2[i];
	}

	return sum;
}

//为了更进一步提高速度,可尝试采用unsigned char类型的点积,未完成
float simd_dot_uc(const unsigned char* src1, const unsigned char* src2, const long& count) {
    long i = 0;
	//neon process
    uint8x16_t sum_vec = vdupq_n_u8(0);
	for (; i <count - 15 ; i+=16){
		uint8x16_t data_a = vld1q_u8(&src1[i]);
		uint8x16_t data_b = vld1q_u8(&src2[i]);
		sum_vec = vaddq_u8(sum_vec, vmulq_u8(data_a, data_b));
	}

	float sum = 0;
	for(int i=0; i<16; i++){
		sum += sum_vec[i];
	}

	//累加剩下的
	for (; i < count; i++){
		sum += src1[i] * src2[i];
	}

	return sum;
}

//耗时很长的没有并行化的点积计算代码
float simd_dot_c(const float* x, const float* y, const long& len) {
  float inner_prod = 0.0f; 
  long i;
  //LOGD("simd_dot,len=%ld", len);
  for (i = 0; i < len; i++) {
	  inner_prod += x[i] * y[i];
  }
 
  return inner_prod;
}

//PC上的SSE优化代码,android上无法使用
float simd_dot_sse(const float* x, const float* y, const long& len) {
  float inner_prod = 0.0f;

	__m128 X, Y; // 128-bit values
	__m128 acc = _mm_setzero_ps(); // set to (0, 0, 0, 0)
	float temp[4];
	
	long i;
	for (i = 0; i + 4 < len; i += 4) {
	     X = _mm_loadu_ps(x + i); // load chunk of 4 floats
	     Y = _mm_loadu_ps(y + i);
	     acc = _mm_add_ps(acc, _mm_mul_ps(X, Y));
	 }
	 _mm_storeu_ps(&temp[0], acc); // store acc into an array
	 inner_prod = temp[0] + temp[1] + temp[2] + temp[3];
	
	return inner_prod;
}

void matrix_procuct(const float* A, const float* B, float* C, const int n,
    const int m, const int k, bool ta, bool tb) {
#ifdef _BLAS
  arma::fmat mA = ta ? arma::fmat(A, k, n).t() : arma::fmat(A, n, k);
  arma::fmat mB = tb ? arma::fmat(B, m, k).t() : arma::fmat(B, k, m);
  arma::fmat mC(C, n, m, false);
  mC = mA * mB;
#else
  CHECK_TRUE(ta && !tb);
  const float* x = B;
  for (int i = 0, idx = 0; i < m; ++i) {
    const float* y = A;
    for (int j = 0; j < n; ++j, ++idx) {
      C[idx] = simd_dot(x, y, k);
      y += k;
    }
    x += k;
  }
#endif
}

猜你喜欢

转载自blog.csdn.net/wuzuyu365/article/details/53468631