BloomFilter
- 请求到达正式业务之前, 判断该请求是否有效
- 维护一个大的bit数组, 把有效key的一次或多次的hash索引位置标志已存在. 当有请求进来时, 计算进来的key的hash索引, 判断每一个索引的值是否为true.
- 常用于处理缓存穿透问题
选用redis实现的好处
- 部署高可用节点时, 减少每一个节点的开销
- redis可以持久化, 避免因服务器宕机导致需要重新灌数据的开销
- redis中可实现的数组长度更长,大数据量下,hash索引可散列的范围更大
实现
索引个数,bit数组长度
- 使用google中根据原有数据长度计算索引个数, bit数组长度的方法
hash算法
- 使用一个key衍生出多个key, 根据对bit长度求余的hash算法
initBloomFilter方法
- 根据数据的长度, 计算hash个数,和bit数组长度, 将以上信息和容错率信息保存到redis中(这样可以实现多个过滤器)
- 使用通道将信息,所有数据压到redis中,使用通道可以提高效率,减少网络开销(测试时初始数据,可以减少75%时间)
代码
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.connection.RedisConnection;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.stereotype.Component;
import org.springframework.util.ObjectUtils;
import java.util.List;
@Component
public class BloomFilterUtil {
private final static String BLOOM_FILTER_INFO_SUFFIX = ":info";
private final static String FPP = "fpp";
private final static String NUM_BITS = "numBits";
private final static String NUM_HASH_FUNCTIONS = "numHashFunctions";
private final static String BLOOM_FILTER_BIT_ARRAY_SUFFIX = ":bitarray";
@Autowired
private RedisTemplate<String, String> redisTemplate;
public void put(String bloomFilterName, String key) throws Exception {
long[] hashIndexArray = hash(bloomFilterName, key);
StringBuilder sbName = new StringBuilder(bloomFilterName);
sbName.append(BLOOM_FILTER_BIT_ARRAY_SUFFIX);
for (long hashIndex : hashIndexArray) {
redisTemplate.opsForValue().setBit(sbName.toString().intern(), hashIndex, true);
}
}
public boolean mightContain(String bloomFilterName, String key) throws Exception {
boolean keyIsContain = false;
long[] hashIndexArray = hash(bloomFilterName, key);
StringBuilder sbName = new StringBuilder(bloomFilterName);
sbName.append(BLOOM_FILTER_BIT_ARRAY_SUFFIX);
for (long hashIndex : hashIndexArray) {
if (keyIsContain = redisTemplate.opsForValue().getBit(sbName.toString().intern(), hashIndex)) {
return keyIsContain;
}
}
return keyIsContain;
}
private long[] hash(String bloomFilterName, String key) throws Exception {
long numBits;
int numHashFunctions;
StringBuilder sbName = new StringBuilder(bloomFilterName);
sbName.append(BLOOM_FILTER_INFO_SUFFIX);
Object numBitsString = redisTemplate.opsForHash().get(sbName.toString().intern(), NUM_BITS);
Object numHashFunctionsString = redisTemplate.opsForHash().get(sbName.toString().intern(), NUM_HASH_FUNCTIONS);
if (ObjectUtils.isEmpty(numBitsString) || ObjectUtils.isEmpty(numHashFunctionsString)) {
throw new Exception();
} else {
numBits = Integer.valueOf(numBitsString.toString());
numHashFunctions = Integer.valueOf(numHashFunctionsString.toString());
}
return hash(key, numHashFunctions, numBits);
}
private long[] hash(String key, int numHashFunctions, long numBits) {
long[] hashIndexArray = new long[numHashFunctions];
for (int i = 0, j = hashIndexArray.length; i < j; i++) {
hashIndexArray[i] = (key.hashCode() + numHashFunctions) % numBits;
}
return hashIndexArray;
}
static long optimalNumOfBits(long expectedInsertions, double fpp) {
if (fpp == 0.0D) {
fpp = 4.9E-324D;
}
return (long) ((double) (-expectedInsertions) * Math.log(fpp) / (Math.log(2.0D) * Math.log(2.0D)));
}
static int optimalNumOfHashFunctions(long expectedInsertions, long numBits) {
return Math.max(1, (int) Math.round((double) numBits / (double) expectedInsertions * Math.log(2.0D)));
}
public boolean initBloomFilter(String bloomFilterName, List<String> dataList) {
return initBloomFilter(bloomFilterName, dataList, 0.03d);
}
public boolean initBloomFilter(String bloomFilterName, List<String> dataList, double fpp) {
int numHashFunctions;
long expectedInsertions, numBits;
expectedInsertions = dataList.size();
numBits = optimalNumOfBits(expectedInsertions, fpp);
numHashFunctions = optimalNumOfHashFunctions(expectedInsertions, numBits);
List<Object> resultList = redisTemplate.executePipelined((RedisConnection connection) -> {
connection.openPipeline();
connection.hSet((bloomFilterName + BLOOM_FILTER_INFO_SUFFIX).getBytes(), FPP.getBytes(), String.valueOf(fpp).getBytes());
connection.hSet((bloomFilterName + BLOOM_FILTER_INFO_SUFFIX).getBytes(), NUM_BITS.getBytes(), String.valueOf(numBits).getBytes());
connection.hSet((bloomFilterName + BLOOM_FILTER_INFO_SUFFIX).getBytes(), NUM_HASH_FUNCTIONS.getBytes(), String.valueOf(numHashFunctions).getBytes());
for (String key : dataList) {
long[] hashIndexArray = hash(key, numHashFunctions, numBits);
for (long hashIndex : hashIndexArray) {
connection.setBit((bloomFilterName + BLOOM_FILTER_BIT_ARRAY_SUFFIX).getBytes(), hashIndex, true);
}
}
connection.closePipeline();
return null;
});
return true;
}
}