HBase LruBlockCache源码分析
本章对LruBlockCache源码进行简单部分和讲解,其中包括如下部分:
- 构造函数
- getBlock
- cacheBlock
- evict
构造函数说明
LruBlockCache 构造函数
从构造函数可以看出,整体有一下几个部分组成
- LruBlockCache 内存比例
- 统计参数
this.stats = new CacheStats(this.getClass().getSimpleName());
// 一下配合stats进行必要统计
this.count = new AtomicLong(0);
this.elements = new AtomicLong(0);
this.dataBlockElements = new AtomicLong(0);
this.dataBlockSize = new AtomicLong(0);
this.size = new AtomicLong(this.overhead);
存储容器
new ConcurrentHashMap<BlockCacheKey, LruCachedBlock>(mapInitialSize,
mapLoadFactor, mapConcurrencyLevel);
LruBlockCache EvictionThread (clean/10s | clean/notify)
if(evictionThread) {
this.evictionThread = new EvictionThread(this);
this.evictionThread.start();
} else {
this.evictionThread = null;
}
@Override
public void run() {
enteringRun = true;
while (this.go) {
synchronized(this) {
try {
this.wait(1000 * 10/*Don't wait for ever*/);
} catch(InterruptedException e) {
LOG.warn("Interrupted eviction thread ", e);
Thread.currentThread().interrupt();
}
}
LruBlockCache cache = this.cache.get();
if (cache == null) break;
cache.evict();
}
}
定时日志 – print log/5m (default)
this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this),
statThreadPeriod, statThreadPeriod, TimeUnit.SECONDS);
public void logStats() {
// Log size
long totalSize = heapSize();
long freeSize = maxSize - totalSize;
LruBlockCache.LOG.info("totalSize=" + StringUtils.byteDesc(totalSize) + ", " +
"freeSize=" + StringUtils.byteDesc(freeSize) + ", " +
"max=" + StringUtils.byteDesc(this.maxSize) + ", " +
"blockCount=" + getBlockCount() + ", " +
"accesses=" + stats.getRequestCount() + ", " +
"hits=" + stats.getHitCount() + ", " +
"hitRatio=" + (stats.getHitCount() == 0 ?
"0" : (StringUtils.formatPercent(stats.getHitRatio(), 2)+ ", ")) + ", " +
"cachingAccesses=" + stats.getRequestCachingCount() + ", " +
"cachingHits=" + stats.getHitCachingCount() + ", " +
"cachingHitsRatio=" + (stats.getHitCachingCount() == 0 ?
"0,": (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + ", ")) +
"evictions=" + stats.getEvictionCount() + ", " +
"evicted=" + stats.getEvictedCount() + ", " +
"evictedPerRun=" + stats.evictedPerEviction());
}
getBlock分析
public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
boolean updateCacheMetrics) {
LruCachedBlock cb = map.get(cacheKey);
if (cb == null) {
if (!repeat && updateCacheMetrics) {
// update CacheStats的统计信息
stats.miss(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
}
// victimHandler -- 表示【无辜者】,即哪些不应该被清理,但是被清理掉的Entry 将会转移到victim中
if (victimHandler != null && !repeat) {
// 尝试从victimHandler中获取Entry
Cacheable result = victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
// 重新将victim中的Entry缓存到当前cache中
if (result != null && caching) {
cacheBlock(cacheKey, result, false, true);
}
return result;
}
return null;
}
// 更新各统计指标
if (updateCacheMetrics) stats.hit(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
cb.access(count.incrementAndGet());
return cb.getBuffer();
}
cacheBlock分析
public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory,
final boolean cacheDataInL1) {
if (buf.heapSize() > maxBlockSize) {
// 更新stats,并输出warn日志
return;
}
LruCachedBlock cb = map.get(cacheKey);
if (cb != null) { // 检测该key是否已经存在
// 检测key对应value 与当前需要缓存的buf 是否相等
if (BlockCacheUtil.compareCacheBlock(buf, cb.getBuffer()) != 0) {
// 不相等,抛出异常
throw new RuntimeException("Cached block contents differ, which should not have happened."
+ "cacheKey:" + cacheKey);
}
// 输出warn日志
return;
}
long currentSize = size.get();
long currentAcceptableSize = acceptableSize();
long hardLimitSize = (long) (hardCapacityLimitFactor * currentAcceptableSize);
if (currentSize >= hardLimitSize) {
stats.failInsert();
// print trace log
if (!evictionInProgress) {
// 空间使用过多,进行必要清理
runEviction();
}
return;
}
cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
map.put(cacheKey, cb);
// 更新各统计指标
long newSize = updateSizeMetrics(cb, false);
long val = elements.incrementAndGet();
if (buf.getBlockType().isData()) {
dataBlockElements.incrementAndGet();
}
if (newSize > currentAcceptableSize && !evictionInProgress) {
runEviction(); // 清理空间
}
}
evict分析
该逻辑较为复杂,换种方式来说明一下
- 阈值: 当bytesToFree = currentSize - minSize(); bytesToFree > 0;
- 过程(正常情况下,不正常情况不以说明)(伪代码):
- [single, multi, memory].map(new BlockBucket(_))
- cache.entrys.foreach(entry => findBucket(entry.blockType).add(entry))
- queue = new PriorityQueue; queue.add(buckets)
- queue.foreach(_.free)
- print log
注意点说明
- PriorityQueue: 可以翻译成【优先队列】,即添加至该队列中的对象 将会被排序
public int compareTo(BlockBucket that) {
return Long.compare(this.overflow(), that.overflow()); // BlockBucket的排序逻辑
}
- BlockBucket构造函数
public BlockBucket(String name, long bytesToFree, long blockSize, long bucketSize) {
this.name = name; // single | multi | memory
this.bucketSize = bucketSize; // (long)Math.floor(this.maxSize * this.[single|multi|memory]Factor * this.minFactor)
queue = new LruCachedBlockQueue(bytesToFree, blockSize); // 被选择出来 要被清理掉的对象 存储在这里
totalSize = 0; // 实际上最终将表示为: 当前cache中 该类型(name)缓存的整体大小
}
public void add(LruCachedBlock block) {
// 添加对象时,更新totalSize大小(只增不减,即使对象不存在与queue中【这句话 后面解释】)
totalSize += block.heapSize();
queue.add(block); // 将对象添加至queue中,会进行必要的【选择】
}
public long overflow() {
// 当前缓存中该类型(name)缓存所占空间 与 该类型(name)缓存应占空间的差值,即溢出大小
return totalSize - bucketSize;
}
- LruCachedBlockQueue
/** LruCachedBlockQueue */
/** MinMaxPriorityQueue<LruCachedBlock> queue */
// LruCachedBlockQueue的add方法
public void add(LruCachedBlock cb) {
if(heapSize < maxSize) {
// 当 当前存储的大小(heapSize) 小于 需要存储的大小(maxSize,其实是bytesToFree,即需要回收的空间大小)
queue.add(cb);
heapSize += cb.heapSize();
} else {
// 当前存储的大小(heapSize) 已经到达 需要存储的大小(maxSize)
LruCachedBlock head = queue.peek(); // 取出queue中 accessTime最大的
if(cb.compareTo(head) > 0) {
// cb.accessTime < head.accessTime,将cb放入,head换出
heapSize += cb.heapSize();
heapSize -= head.heapSize();
if(heapSize > maxSize) {
queue.poll();
} else {
heapSize += head.heapSize();
}
queue.add(cb);
}
}
// 如此一来,queue中只会存储
// 1. accessTime最小的那些对象
// 2. 存储的对象大小总和 将正好大于等于 需要清理的总和
}
/** LruCachedBlock */
@Override
public int compareTo(LruCachedBlock that) {
if (this.accessTime == that.accessTime) return 0;
return this.accessTime < that.accessTime ? 1 : -1; // 比较两者的【被访问次数】
}
注意点总结
- 循环将cache.entrys添加到BlockBucket中,最后BlockBucket中只会留下【被选择清理的对象】
- 添加到PriorityQueue中的BlockBucket,会按照overflow的大小(实际存放量 与 min存放量之间的差值)进行排序,即将超出最多的bucket优先进行清理。同时会尽可能的保留更多的block:
long bucketBytesToFree = Math.min(overflow,
(bytesToFree - bytesFreed) / remainingBuckets);
bytesFreed += bucket.free(bucketBytesToFree);