前面我们通过研究KOOM的开源代码,研究了关于Java层和native层内存泄漏监控的实现原理。还剩下线程泄漏这部分没有进行分析,今天来补全它。整体下来,相信我们对于内存监控在代码上的实现上会有一个较为体系化的了解。
线程monitor的流程
从开启monitor的startLoop方法开始:
override fun call(): LoopState {
handleThreadLeak()
return LoopState.Continue
}
一路进入方法栈,到了这里:
void Refresh() {
auto info = new SimpleHookInfo(Util::CurrentTimeNs());
sHookLooper->post(ACTION_REFRESH, info);
}
看下sHookLooper类:
namespace koom {
class HookLooper : public looper {
public:
koom::ThreadHolder *holder;
HookLooper();
~HookLooper();
void handle(int what, void *data);
void post(int what, void *data);
};
根据ACTION_REFRESH来看看有哪些action:
enum HookAction {
ACTION_ADD_THREAD,
ACTION_START_THREAD,
ACTION_JOIN_THREAD,
ACTION_EXIT_THREAD,
ACTION_DETACH_THREAD,
ACTION_INIT,
ACTION_REFRESH,
ACTION_SET_NAME,
};
根据action的处理,找到了handler处理message的地方:
namespace koom {
const char *looper_tag = "koom-hook-looper";
HookLooper::HookLooper() : looper() {
this->holder = new koom::ThreadHolder(); }
HookLooper::~HookLooper() {
delete this->holder; }
void HookLooper::handle(int what, void *data) {
looper::handle(what, data);
switch (what) {
case ACTION_ADD_THREAD: {
koom::Log::info(looper_tag, "AddThread");
auto info = static_cast<HookAddInfo *>(data);
holder->AddThread(info->tid, info->pthread, info->is_thread_detached,
info->time, info->create_arg);
delete info;
break;
}
case ACTION_JOIN_THREAD: {
koom::Log::info(looper_tag, "JoinThread");
auto info = static_cast<HookInfo *>(data);
holder->JoinThread(info->thread_id);
delete info;
break;
}
case ACTION_DETACH_THREAD: {
koom::Log::info(looper_tag, "DetachThread");
auto info = static_cast<HookInfo *>(data);
holder->DetachThread(info->thread_id);
delete info;
break;
}
case ACTION_EXIT_THREAD: {
koom::Log::info(looper_tag, "ExitThread");
auto info = static_cast<HookExitInfo *>(data);
holder->ExitThread(info->thread_id, info->threadName, info->time);
delete info;
break;
}
case ACTION_REFRESH: {
koom::Log::info(looper_tag, "Refresh");
auto info = static_cast<SimpleHookInfo *>(data);
holder->ReportThreadLeak(info->time);
delete info;
break;
}
default: {
}
}
}
void HookLooper::post(int what, void *data) {
looper::post(what, data); }
} // namespace koom
可以发现不同线程相关的操作都进行了处理。
以HookThreadStart为例,看看发送这个message的地方:
ALWAYS_INLINE void ThreadHooker::HookThreadStart(void *arg) {
koom::Log::info(thread_tag, "HookThreadStart");
auto *hookArg = (StartRtnArg *)arg;
pthread_attr_t attr;
pthread_t self = pthread_self();
int state = 0;
if (pthread_getattr_np(self, &attr) == 0) {
pthread_attr_getdetachstate(&attr, &state);
}
int tid = (int)syscall(SYS_gettid);
koom::Log::info(thread_tag, "HookThreadStart %p, %d, %d", self, tid,
hookArg->thread_create_arg->stack_time);
auto info = new HookAddInfo(tid, Util::CurrentTimeNs(), self,
state == PTHREAD_CREATE_DETACHED,
hookArg->thread_create_arg);
sHookLooper->post(ACTION_ADD_THREAD, info);
void *(*start_rtn)(void *) = hookArg->start_rtn;
void *routine_arg = hookArg->arg;
delete hookArg;
start_rtn(routine_arg);
}
这个方法被HookThreadCreate方法调用:
int ThreadHooker::HookThreadCreate(pthread_t *tidp, const pthread_attr_t *attr,
void *(*start_rtn)(void *), void *arg) {
if (hookEnabled() && start_rtn != nullptr) {
auto time = Util::CurrentTimeNs();
koom::Log::info(thread_tag, "HookThreadCreate");
auto *hook_arg = new StartRtnArg(arg, Util::CurrentTimeNs(), start_rtn);
auto *thread_create_arg = hook_arg->thread_create_arg;
void *thread = koom::CallStack::GetCurrentThread();
if (thread != nullptr) {
koom::CallStack::JavaStackTrace(thread,
hook_arg->thread_create_arg->java_stack);
}
koom::CallStack::FastUnwind(thread_create_arg->pc,
koom::Constant::kMaxCallStackDepth);
thread_create_arg->stack_time = Util::CurrentTimeNs() - time;
return pthread_create(tidp, attr,
reinterpret_cast<void *(*)(void *)>(HookThreadStart),
reinterpret_cast<void *>(hook_arg));
}
return pthread_create(tidp, attr, start_rtn, arg);
}
HookThreadCreate又被RegisterSo调用。
bool ThreadHooker::RegisterSo(const std::string &lib, int source) {
if (IsLibIgnored(lib)) {
return false;
}
auto lib_ctr = lib.c_str();
koom::Log::info(thread_tag, "HookSo %d %s", source, lib_ctr);
xhook_register(lib_ctr, "pthread_create",
reinterpret_cast<void *>(HookThreadCreate), nullptr);
xhook_register(lib_ctr, "pthread_detach",
reinterpret_cast<void *>(HookThreadDetach), nullptr);
xhook_register(lib_ctr, "pthread_join",
reinterpret_cast<void *>(HookThreadJoin), nullptr);
xhook_register(lib_ctr, "pthread_exit",
reinterpret_cast<void *>(HookThreadExit), nullptr);
return true;
}
来到这里,hook实现的地方找到了,还是通过爱奇艺的xhook,把线程操作的系统API给hook出来了。
到这里,整体的实现思路出来了,通过looper不断轮询获取handler定时发送的message去refresh一些进程里面各个线程相关信息。
而线程信息则是通过native hook技术中中PLT hook来实现hook和信息获取。
上述分析,知道了整体实现hook的流程,但是拿到系统API之后,做了什么。下面继续分析:
怎么判断线程是否泄漏
AddThread
void ThreadHolder::AddThread(int tid, pthread_t threadId, bool isThreadDetached,
int64_t start_time, ThreadCreateArg *create_arg) {
bool valid = threadMap.count(threadId) > 0;
if (valid) return;
koom::Log::info(holder_tag, "AddThread tid:%d pthread_t:%p", tid, threadId);
auto &item = threadMap[threadId];
item.Clear();
item.thread_internal_id = threadId;
item.thread_detached = isThreadDetached;
item.startTime = start_time;
item.create_time = create_arg->time;
item.id = tid;
std::string &stack = item.create_call_stack;
stack.assign("");
try {
// native stack
int ignoreLines = 0;
for (int index = 0; index < koom::Constant::kMaxCallStackDepth; ++index) {
uintptr_t p = create_arg->pc[index];
if (p == 0) continue;
// koom::Log::info(holder_tag, "unwind native callstack #%d pc%p", index,
// p);
std::string line = koom::CallStack::SymbolizePc(p, index - ignoreLines);
if (line.empty()) {
ignoreLines++;
} else {
line.append("\n");
stack.append(line);
}
}
// java stack
std::vector<std::string> splits =
koom::Util::Split(create_arg->java_stack.str(), '\n');
for (const auto &split : splits) {
if (split.empty()) continue;
std::string line;
line.append("#");
line.append(split);
line.append("\n");
stack.append(line);
}
//空白堆栈,去掉##
if (stack.size() == 3) stack.assign("");
} catch (const std::bad_alloc &) {
stack.assign("error:bad_alloc");
}
delete create_arg;
koom::Log::info(holder_tag, "AddThread finish");
}
这里拿到了线程创建时间和id等信息。
JoinThread
void ThreadHolder::JoinThread(pthread_t threadId) {
bool valid = threadMap.count(threadId) > 0;
koom::Log::info(holder_tag, "JoinThread tid:%p", threadId);
if (valid) {
threadMap[threadId].thread_detached = true;
} else {
leakThreadMap.erase(threadId);
}
}
ExitThread
void ThreadHolder::ExitThread(pthread_t threadId, std::string &threadName,
long long int time) {
bool valid = threadMap.count(threadId) > 0;
if (!valid) return;
auto &item = threadMap[threadId];
koom::Log::info(holder_tag, "ExitThread tid:%p name:%s", threadId,
item.name.c_str());
item.exitTime = time;
item.name.assign(threadName);
if (!item.thread_detached) {
// 泄露了
koom::Log::error(holder_tag,
"Exited thread Leak! Not joined or detached!\n tid:%p",
threadId);
leakThreadMap[threadId] = item;
}
threadMap.erase(threadId);
koom::Log::info(holder_tag, "ExitThread finish");
}
DetachThread
void ThreadHolder::DetachThread(pthread_t threadId) {
bool valid = threadMap.count(threadId) > 0;
koom::Log::info(holder_tag, "DetachThread tid:%p", threadId);
if (valid) {
threadMap[threadId].thread_detached = true;
} else {
leakThreadMap.erase(threadId);
}
}
可以发现,代码是通过thread_detached这个参数来判断线程是否泄漏了,假如进程执行了ExitThread,但是thread_detached还没有解除,则判断为线程泄漏。
接着就是收集一些线程的信息,存储到容器里面。
这些为后续做堆栈回溯信息和整体信息的记录做了一些准备。
总结
通过通篇下来,关于线程泄漏监控相关的思路我们是有了,但是具体细节其实还有很多。
篇幅和时间原因,这篇就介绍到这里。