android arm64硬件实现加速crc32算法

 在android arm64平台下,crc32,aes等常用算法有指令集实现。故在android下,可借助这些指令实现代码加速。

如何判断自己的手机是否支持crc32呢? 有三个方法: 


 方法1,直接查看/proc/cpuinfo




方法2,使用ELF辅助向量 API

unsigned long hwcap = getauxval(AT_HWCAP);

if (hwcap & HWCAP_CRC32) 
     return 1;
}

return 0;
第三个方法:


使用cpu-features库

#include <cpu-features.h>
uint64_t cap = android_getCpuFeatures();
if (cap & ANDROID_CPU_ARM64_FEATURE_CRC32) {
     return 1;
}
return 0;


这里只讨论使用第三种方法的实现,完整代码如下:

#ifdef __ANDROID__

#ifdef __aarch64__

#define CRC32X(crc, value) __asm__("crc32x %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32W(crc, value) __asm__("crc32w %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32H(crc, value) __asm__("crc32h %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32B(crc, value) __asm__("crc32b %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CX(crc, value) __asm__("crc32cx %w[c], %w[c], %x[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CW(crc, value) __asm__("crc32cw %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CH(crc, value) __asm__("crc32ch %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))
#define CRC32CB(crc, value) __asm__("crc32cb %w[c], %w[c], %w[v]":[c]"+r"(crc):[v]"r"(value))

uint32_t __arm64_accelerate_crc32(uint32_t crc, const void* chunk, size_t size) {

	uint64_t cap = android_getCpuFeatures();
	if (!(cap & ANDROID_CPU_ARM64_FEATURE_CRC32)) return 0;

	register uint32_t l = crc ^ 0xffffffffu; // 使用局部变量,利用寄存器优化
	register size_t len = size; // 同上
	const uint8_t* p = reinterpret_cast<const uint8_t *>(chunk);


#define STEP1 do {						\
	CRC32CB(l, *p++);					\
	len--;							\
} while (0)

#define STEP2 do {						\
	CRC32CH(l, *(uint16_t *)p);			        \
	p += 2;							\
	len -= 2;						\
} while (0)

#define STEP4 do {						\
	CRC32CW(l, *(uint32_t *)p);			        \
	p += 4;							\
	len -= 4;						\
} while (0)

#define STEP8 do {						\
	CRC32CX(l, *(uint64_t *)p);			        \
	p += 8;							\
	len -= 8;						\
} while (0)
	// 512路循环内联展开
	while (len >= 512) {
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
	}
	// 直接使用if判断,效果会高点
	if (len >= 256) {
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
	}

	if (len >= 128) {
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
	}

	if (len >= 64) {
		STEP8; STEP8; STEP8; STEP8;
		STEP8; STEP8; STEP8; STEP8;
	}

	if (len >= 32) {
		STEP8; STEP8; STEP8; STEP8;
	}

	if (len >= 16) {
		STEP8; STEP8;
	}

	if (len >= 8) {
		STEP8;
	}

	if (len >= 4) {
		STEP4;
	}

	if (len >= 2) {
		STEP2;
	}

	if (len >= 1) {
		STEP1;
	}

#undef STEP8
#undef STEP4
#undef STEP2
#undef STEP1
	
	return ~l;
}


注意,crc32x系列与crc32cx系列,这是被乘的多项式不同

其中crc32x 系列的多项式系数是: 0x4C11DB7,而crc32cx系列的多项式系数是0x1EDC6F41


使用clock计算CPU的时间测试如下:



测试机器为Android 5.0,nubia,测试数据为200字节的随意字符串

猜你喜欢

转载自blog.csdn.net/lyx2007825/article/details/77113256