<blockquote>
基於Android 6.0源碼, 分析debuggerd守護進程的工作原理
/system/core/debuggerd/debuggerd.cpp
/system/core/debuggerd/tombstone.cpp
/system/core/debuggerd/backtrace.cpp
/system/core/debuggerd/arm/Machine.cpp
/system/core/debuggerd/arm64/Machine.cpp
/system/core/libcutils/debugger.c
/system/core/include/BacktraceMap.h
/system/core/libbacktrace/BacktraceMap.cpp
/system/core/libbacktrace/Backtrace.cpp
/bionic/linker/arch/arm/begin.S
/bionic/linker/linker.cpp
/bionic/linker/debugger.cpp
Android系統有監控程序異常退出的機制,這便是本文要講述得debuggerd守護進程。當發生native crash或者主動調用debuggerd時,會輸出進程相關的狀態信息到文件或者控制檯。輸出的debuggerd數據 保存在文件/data/tombstones/tombstone_XX
,該類型文件個數上限位10個,當超過時則每次覆蓋時間最老的文件。
針對進程出現的不同的狀態,Linux kernel會發送相應的signal給異常進程,捕獲signal並對其做相應的處理(通常動作是退出異常進程)。而Android在這機制的前提下,通過攔截這些信號來dump進程信息,方便開發人員調試分析。
debuggerd守護進程會打開socket服務端,當需要調用debuggerd服務時,先通過客戶端進程向debuggerd服務端建立socket連接,然後發送不同的請求給debuggerd服務端,當服務端收到不同的請求,則會採取相應的dump操作。
接下來從源碼角度來探索debuggerd客戶端和服務端的工作原理。
debuggerd -b <tid>
debuggerd <tid>
通過adb執行上面的命令都能觸發debuggerd進行相應的dump操作,其中參數-b
表示在控制檯中輸出backtrace,參數tid表示的是需要dump的進程或者線程id。這兩個命令的輸出結果相差較大,下面來一步步分析看看這兩個命令分別能觸發哪些操作,執行上述命令都會調用debuggerd的main方法()。
[-> /debuggerd/debuggerd.cpp]
int main(int argc, char** argv) {
...
bool dump_backtrace = false;
bool have_tid = false;
pid_t tid = 0;
//參數解析backtrace與tid信息
for (int i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-b")) {
dump_backtrace = true;
} else if (!have_tid) {
tid = atoi(argv[i]);
have_tid = true;
} else {
usage();
return 1;
}
}
//沒有指定tid則直接返回
if (!have_tid) {
usage();
return 1;
}
//【見小節2.2】
return do_explicit_dump(tid, dump_backtrace);
}
對於debuggerd命令,必須指定線程tid,否則不做任何操作,直接返回。
[-> /debuggerd/debuggerd.cpp]
static int do_explicit_dump(pid_t tid, bool dump_backtrace) {
fprintf(stdout, "Sending request to dump task %d.\n", tid);
if (dump_backtrace) {
fflush(stdout);
//輸出到控制檯【見小節2.3】
if (dump_backtrace_to_file(tid, fileno(stdout)) < 0) {
fputs("Error dumping backtrace.\n", stderr);
return 1;
}
} else {
char tombstone_path[PATH_MAX];
//輸出到tombstone文件【見小節2.4】
if (dump_tombstone(tid, tombstone_path, sizeof(tombstone_path)) < 0) {
fputs("Error dumping tombstone.\n", stderr);
return 1;
}
fprintf(stderr, "Tombstone written to: %s\n", tombstone_path);
}
return 0;
}
dump_backtrace等於true代表的是輸出backtrace到控制檯,否則意味著輸出到tombstone文件。
[-> libcutils/debugger.c]
int dump_backtrace_to_file(pid_t tid, int fd) {
return dump_backtrace_to_file_timeout(tid, fd, 0);
}
int dump_backtrace_to_file_timeout(pid_t tid, int fd, int timeout_secs) {
//向socket服務端發送dump backtrace的請求【見小節2.5】
int sock_fd = make_dump_request(DEBUGGER_ACTION_DUMP_BACKTRACE, tid, timeout_secs);
if (sock_fd < 0) {
return -1;
}
int result = 0;
char buffer[1024];
ssize_t n;
//阻塞等待,從sock_fd中讀取到服務端發送過來的數據,並寫入buffer
while ((n = TEMP_FAILURE_RETRY(read(sock_fd, buffer, sizeof(buffer)))) > 0) {
//再將buffer數據輸出到fd,此處是stdout文件描述符(屏幕終端)。
if (TEMP_FAILURE_RETRY(write(fd, buffer, n)) != n) {
result = -1;
break;
}
}
close(sock_fd);
return result;
}
該方法的功能:
DEBUGGER_ACTION_DUMP_BACKTRACE
請求,然後阻塞等待;[-> libcutils/debugger.c]
int dump_tombstone(pid_t tid, char* pathbuf, size_t pathlen) {
return dump_tombstone_timeout(tid, pathbuf, pathlen, 0);
}
int dump_tombstone_timeout(pid_t tid, char* pathbuf, size_t pathlen, int timeout_secs) {
//向socket服務端發送dump tombstone的請求【見小節2.5】
int sock_fd = make_dump_request(DEBUGGER_ACTION_DUMP_TOMBSTONE, tid, timeout_secs);
if (sock_fd < 0) {
return -1;
}
char buffer[100];
int result = 0; ,
//從sock_fd中讀取到服務端發送過來的tombstone文件名,並寫入buffer
ssize_t n = TEMP_FAILURE_RETRY(read(sock_fd, buffer, sizeof(buffer) - 1));
if (n <= 0) {
result = -1;
} else {
if (pathbuf && pathlen) {
if (n >= (ssize_t) pathlen) {
n = pathlen - 1;
}
buffer[n] = '\0';
//將buffer數據拷貝到pathbuf
memcpy(pathbuf, buffer, n + 1);
}
}
close(sock_fd);
return result;
}
該方法的功能:
DEBUGGER_ACTION_DUMP_TOMBSTONE
請求,然後阻塞等待;[-> libcutils/debugger.c]
static int make_dump_request(debugger_action_t action, pid_t tid, int timeout_secs) {
debugger_msg_t msg;
memset(&msg, 0, sizeof(msg));
msg.tid = tid;
msg.action = action;
//與debuggerd服務端建立socket通信,獲取client端描述符sock_fd
int sock_fd = socket_local_client(DEBUGGER_SOCKET_NAME, ANDROID_SOCKET_NAMESPACE_ABSTRACT,
SOCK_STREAM | SOCK_CLOEXEC);
...
//通過write()方法將msg信息寫入文件描述符sock_fd【見小節2.6】
if (send_request(sock_fd, &msg, sizeof(msg)) < 0) {
close(sock_fd);
return -1;
}
return sock_fd;
}
該函數的功能是與debuggerd服務端建立socket通信,併發送action請求,以執行相應操作。
static int send_request(int sock_fd, void* msg_ptr, size_t msg_len) {
int result = 0;
//寫入消息
if (TEMP_FAILURE_RETRY(write(sock_fd, msg_ptr, msg_len)) != (ssize_t) msg_len) {
result = -1;
} else {
char ack;
//等待應答消息
if (TEMP_FAILURE_RETRY(read(sock_fd, &ack, 1)) != 1) {
result = -1;
}
}
return result;
}
通過調用debuggerd <tid>
命令調用流程圖:
執行debuggerd命令最終都是調用send_request()方法,向debuggerd服務端發出DEBUGGER_ACTION_DUMP_TOMBSTONE
或者DEBUGGER_ACTION_DUMP_BACKTRACE
請求,那對於debuggerd服務端收到相應命令做了哪些操作呢,要想明白這個過程,接下來看看debuggerd服務端的工作。
在執行debuggerd命令之前,debuggerd服務端早早就以準備就緒,時刻等待著client請求的到來。
由init進程fork子進程來以daemon方式啟動,定義在debuggerd.rc文件(舊版本位於init.rc)
service debuggerd /system/bin/debuggerd
group root readproc
writepid /dev/cpuset/system-background/tasks
init進程會解析上述rc文件,調用/system/bin/debuggerd文件,進入main方法,此時不帶有任何參數。 接下來進入main()方法。
[-> /debuggerd/debuggerd.cpp]
int main(int argc, char** argv) {
union selinux_callback cb;
//當參數個數為1則啟動服務
if (argc == 1) {
cb.func_audit = audit_callback;
selinux_set_callback(SELINUX_CB_AUDIT, cb);
cb.func_log = selinux_log_callback;
selinux_set_callback(SELINUX_CB_LOG, cb);
//【見小節3.3】
return do_server();
}
...
}
[-> /debuggerd/debuggerd.cpp]
static int do_server() {
//忽略debuggerd進程自身crash的處理過程。重置所有crash handlers
signal(SIGABRT, SIG_DFL);
signal(SIGBUS, SIG_DFL);
signal(SIGFPE, SIG_DFL);
signal(SIGILL, SIG_DFL);
signal(SIGSEGV, SIG_DFL);
#ifdef SIGSTKFLT
signal(SIGSTKFLT, SIG_DFL);
#endif
signal(SIGTRAP, SIG_DFL);
//忽略向已關閉socket執行寫操作失敗的信號
signal(SIGPIPE, SIG_IGN);
//阻塞SIGCHLD
sigset_t sigchld;
sigemptyset(&sigchld);
sigaddset(&sigchld, SIGCHLD);
sigprocmask(SIG_SETMASK, &sigchld, nullptr);
//建立socket通信中的服務端
int s = socket_local_server(SOCKET_NAME, ANDROID_SOCKET_NAMESPACE_ABSTRACT,
SOCK_STREAM | SOCK_CLOEXEC);
if (s == -1) return 1;
// Fork子進程來發送信號(同樣具有root權限),並監聽pipe來暫停和恢復目標進程
if (!start_signal_sender()) {
ALOGE("debuggerd: failed to fork signal sender");
return 1;
}
ALOGI("debuggerd: starting\n");
for (;;) {
sockaddr_storage ss;
sockaddr* addrp = reinterpret_cast<sockaddr*>(&ss);
socklen_t alen = sizeof(ss);
//等待客戶端連接
ALOGV("waiting for connection\n");
int fd = accept4(s, addrp, &alen, SOCK_CLOEXEC);
if (fd == -1) {
ALOGE("accept failed: %s\n", strerror(errno));
continue;
}
//處理新連接的客戶端請求【見小節3.4】
handle_request(fd);
}
return 0;
}
主要功能:
[-> /debuggerd/debuggerd.cpp]
static void handle_request(int fd) {
ALOGV("handle_request(%d)\n", fd);
android::base::unique_fd closer(fd);
debugger_request_t request;
memset(&request, 0, sizeof(request));
//讀取client發送過來的請求【見小節3.5】
int status = read_request(fd, &request);
if (status != 0) {
return;
}
#if defined(LP64)
//對於32位的進程,重定向到32位debuggerd
if (is32bit(request.tid)) {
//僅僅dump backtrace和tombstone請求能重定向
if (request.action == DEBUGGER_ACTION_DUMP_BACKTRACE ||
request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
redirect_to_32(fd, &request);
}
return;
}
#endif
//fork子進程來處理其餘請求命令
pid_t fork_pid = fork();
if (fork_pid == -1) {
ALOGE("debuggerd: failed to fork: %s\n", strerror(errno));
} else if (fork_pid == 0) {
//子進程執行【見小節3.6】
worker_process(fd, request);
} else {
//父進程執行【見小節3.7】
monitor_worker_process(fork_pid, request);
}
}
[-> /debuggerd/debuggerd.cpp]
static int read_request(int fd, debugger_request_t* out_request) {
ucred cr;
socklen_t len = sizeof(cr);
//從fd獲取client進程的pid,uid,gid
int status = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
...
fcntl(fd, F_SETFL, O_NONBLOCK);
pollfd pollfds[1];
pollfds[0].fd = fd;
pollfds[0].events = POLLIN;
pollfds[0].revents = 0;
//讀取tid
status = TEMP_FAILURE_RETRY(poll(pollfds, 1, 3000));
debugger_msg_t msg;
memset(&msg, 0, sizeof(msg));
//從fd讀取數據並保存到結構體msg
status = TEMP_FAILURE_RETRY(read(fd, &msg, sizeof(msg)));
...
out_request->action = static_cast<debugger_action_t>(msg.action);
out_request->tid = msg.tid;
out_request->pid = cr.pid;
out_request->uid = cr.uid;
out_request->gid = cr.gid;
out_request->abort_msg_address = msg.abort_msg_address;
out_request->original_si_code = msg.original_si_code;
if (msg.action == DEBUGGER_ACTION_CRASH) {
// C/C++進程crash時發送過來的請求
char buf[64];
struct stat s;
snprintf(buf, sizeof buf, "/proc/%d/task/%d", out_request->pid, out_request->tid);
if (stat(buf, &s)) {
return -1; //tid不存在,忽略該顯式dump請求
}
} else if (cr.uid == 0
|| (cr.uid == AID_SYSTEM && msg.action == DEBUGGER_ACTION_DUMP_BACKTRACE)) {
//root權限既可以可收集backtraces,又可以dump tombstones;
//system權限只允許收集backtraces;
status = get_process_info(out_request->tid, &out_request->pid,
&out_request->uid, &out_request->gid);
if (status < 0) {
return -1; //tid不存在,忽略該顯式dump請求
}
<span class="hljs-keyword">if</span> (!selinux_action_allowed(fd, out_request))
return <span class="hljs-number">-1</span>; <span class="hljs-comment">//selinux權限不足,忽略該請求</span>
} else {
//其他情況,則直接忽略
return -1;
}
return 0;
}
該方法的功能是首先從socket獲取client進程的pid,uid,gid用於權限控制,能處理以下三種情況:
針對這些情況若相應的tid不存在或selinux權限不滿足,則都忽略該顯式dump請求。read_request執行完成後,則從socket通道中讀取到request信息。
處於client發送過來的請求,server端通過子進程來處理
[-> /debuggerd/debuggerd.cpp]
static void worker_process(int fd, debugger_request_t& request) {
std::string tombstone_path;
int tombstone_fd = -1;
switch (request.action) {
case DEBUGGER_ACTION_DUMP_TOMBSTONE: //case1:輸出tombstone文件
case DEBUGGER_ACTION_CRASH: //case2:出現native crash
//打開tombstone文件【見小節3.6.1】
tombstone_fd = open_tombstone(&tombstone_path);
if (tombstone_fd == -1) {
exit(1); //無法打開tombstone文件,則退出該進程
}
break;
<span class="hljs-keyword">case</span> DEBUGGER_ACTION_DUMP_BACKTRACE: <span class="hljs-comment">//case3:輸出backtrace</span>
<span class="hljs-keyword">break</span>;
<span class="hljs-keyword">default</span>:
<span class="hljs-built_in">exit</span>(<span class="hljs-number">1</span>); <span class="hljs-comment">//其他case則直接結束進程</span>
}
// Attach到目標進程
if (ptrace(PTRACE_ATTACH, request.tid, 0, 0) != 0) {
exit(1); //attach失敗則退出該進程
}
bool attach_gdb = should_attach_gdb(request);
if (attach_gdb) {
// 在特權模式降級之前,打開所有需要監聽的input設備
if (init_getevent() != 0) {
attach_gdb = false; //初始化input設備失敗,不再等待gdb
}
}
...
//生成backtrace【見小節3.6.2】
std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(request.pid));
int amfd = -1;
std::unique_ptr<std::string> amfd_data;
if (request.action == DEBUGGER_ACTION_CRASH) {
//當發生native crash,則連接到AMS【見小節3.6.3】
amfd = activity_manager_connect();
amfd_data.reset(new std::string);
}
bool succeeded = false;
//取消特權模式
if (!drop_privileges()) {
_exit(1); //操作失敗,則退出
}
int crash_signal = SIGKILL;
//執行dump操作,【見小節3.6.4】
succeeded = perform_dump(request, fd, tombstone_fd, backtrace_map.get(), siblings,
&crash_signal, amfd_data.get());
if (succeeded) {
if (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
if (!tombstone_path.empty()) {
android::base::WriteFully(fd, tombstone_path.c_str(), tombstone_path.length());
}
}
}
if (attach_gdb) {
//向目標進程發送SIGSTOP信號
if (!send_signal(request.pid, 0, SIGSTOP)) {
attach_gdb = false; //無法停止通過gdb attach的進程
}
}
if (!attach_gdb) {
//將進程crash情況告知AMS【見小節3.6.5】
activity_manager_write(request.pid, crash_signal, amfd, *amfd_data.get());
}
//detach目標進程
ptrace(PTRACE_DETACH, request.tid, 0, 0);
for (pid_t sibling : siblings) {
ptrace(PTRACE_DETACH, sibling, 0, 0);
}
if (!attach_gdb && request.action == DEBUGGER_ACTION_CRASH) {
//發送信號SIGKILL給目標進程
if (!send_signal(request.pid, request.tid, crash_signal)) {
ALOGE("debuggerd: failed to kill process %d: %s", request.pid, strerror(errno));
}
}
//如果需要則等待gdb
if (attach_gdb) {
wait_for_user_action(request);
//將進程crash情況告知AMS
activity_manager_write(request.pid, crash_signal, amfd, *amfd_data.get());
<span class="hljs-comment">//發送信號SIGCONT給目標進程</span>
<span class="hljs-keyword">if</span> (!send_signal(request.pid, <span class="hljs-number">0</span>, SIGCONT)) {
ALOGE(<span class="hljs-string">"debuggerd: failed to resume process %d: %s"</span>, request.pid, strerror(errno));
}
uninit_getevent();
}
close(amfd);
exit(!succeeded);
}
這個流程比較長,這裡介紹attach_gdb=false的執行流程
[-> tombstone.cpp]
int open_tombstone(std::string* out_path) {
char path[128];
int fd = -1;
int oldest = -1;
struct stat oldest_sb;
//遍歷查找
for (int i = 0; i < MAX_TOMBSTONES; i++) {
snprintf(path, sizeof(path), TOMBSTONE_TEMPLATE, i);
struct stat sb;
if (stat(path, &sb) == 0) {
//記錄修改時間最老的tombstone文件
if (oldest < 0 || sb.st_mtime < oldest_sb.st_mtime) {
oldest = i;
oldest_sb.st_mtime = sb.st_mtime;
}
continue;
}
//存在沒有使用的tombstone文件,則打開並賦給out_path,然後直接返回
fd = open(path, O_CREAT | O_EXCL | O_WRONLY | O_NOFOLLOW | O_CLOEXEC, 0600);
<span class="hljs-keyword">if</span> (out_path) {
*out_path = path;
}
fchown(fd, AID_SYSTEM, AID_SYSTEM);
<span class="hljs-keyword">return</span> fd;
}
//找不到最老的可用tombstone文件,則默認使用tombstone 0
if (oldest < 0) {
oldest = 0;
}
snprintf(path, sizeof(path), TOMBSTONE_TEMPLATE, oldest);
//打開最老的tombstone文件
fd = open(path, O_CREAT | O_TRUNC | O_WRONLY | O_NOFOLLOW | O_CLOEXEC, 0600);
...
if (out_path) {
*out_path = path;
}
fchown(fd, AID_SYSTEM, AID_SYSTEM);
return fd;
}
其中TOMBSTONE_TEMPLATE為data/tombstones/tombstone_%02d
,文件個數上限MAX_TOMBSTONES
=10
打開tombstone文件規則:
data/tombstones/tombstone_00
[-> BacktraceMap.cpp]
BacktraceMap* BacktraceMap::Create(pid_t pid, bool /*uncached*/) {
BacktraceMap* map = new BacktraceMap(pid);
if (!map->Build()) {
delete map;
return nullptr;
}
return map;
}
解析/proc/[pid]/maps, 生成BacktraceMap.
[-> debuggerd.cpp]
static int activity_manager_connect() {
android::base::unique_fd amfd(socket(PF_UNIX, SOCK_STREAM, 0));
if (amfd.get() < -1) {
return -1; ///無法連接到ActivityManager(socket失敗)
}
struct sockaddr_un address;
memset(&address, 0, sizeof(address));
address.sun_family = AF_UNIX;
//該路徑必須匹配NativeCrashListener.java中的定義
strncpy(address.sun_path, "/data/system/ndebugsocket", sizeof(address.sun_path));
if (TEMP_FAILURE_RETRY(connect(amfd.get(), reinterpret_cast<struct sockaddr*>(&address),
sizeof(address))) == -1) {
return -1; //無法連接到ActivityManager(connect失敗)
}
struct timeval tv;
memset(&tv, 0, sizeof(tv));
tv.tv_sec = 1;
if (setsockopt(amfd.get(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) {
ALOGE("debuggerd: Unable to connect to activity manager (setsockopt SO_SNDTIMEO failed: %s)",
strerror(errno));
return -1; //無法連接到ActivityManager(setsockopt SO_SNDTIMEO失敗)
}
tv.tv_sec = 3;
if (setsockopt(amfd.get(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) {
ALOGE("debuggerd: Unable to connect to activity manager (setsockopt SO_RCVTIMEO failed: %s)",
strerror(errno));
return -1; //無法連接到ActivityManager(setsockopt SO_RCVTIMEO失敗)
}
return amfd.release();
}
該方法的功能是建立與ActivityManager的socket連接。
根據接收到不同的signal採取相應的操作
[-> debuggerd.cpp]
static bool perform_dump(const debugger_request_t& request, int fd, int tombstone_fd,
BacktraceMap* backtrace_map, const std::set<pid_t>& siblings,
int* crash_signal, std::string* amfd_data) {
if (TEMP_FAILURE_RETRY(write(fd, "\0", 1)) != 1) {
ALOGE("debuggerd: failed to respond to client: %s\n", strerror(errno));
return false; //無法響應client端請求
}
int total_sleep_time_usec = 0;
while (true) {
//等待信號到來
int signal = wait_for_signal(request.tid, &total_sleep_time_usec);
switch (signal) {
case -1:
ALOGE("debuggerd: timed out waiting for signal");
return false; //等待超時
<span class="hljs-keyword">case</span> SIGSTOP:
<span class="hljs-keyword">if</span> (request.action == DEBUGGER_ACTION_DUMP_TOMBSTONE) {
ALOGV(<span class="hljs-string">"debuggerd: stopped -- dumping to tombstone"</span>);
<span class="hljs-comment">//【見小節4.1】</span>
engrave_tombstone(tombstone_fd, backtrace_map, request.pid, request.tid, siblings, signal,
request.original_si_code, request.abort_msg_address, amfd_data);
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> (request.action == DEBUGGER_ACTION_DUMP_BACKTRACE) {
ALOGV(<span class="hljs-string">"debuggerd: stopped -- dumping to fd"</span>);
<span class="hljs-comment">//【見小節4.4】</span>
dump_backtrace(fd, backtrace_map, request.pid, request.tid, siblings, <span class="hljs-literal">nullptr</span>);
} <span class="hljs-keyword">else</span> {
ALOGV(<span class="hljs-string">"debuggerd: stopped -- continuing"</span>);
<span class="hljs-keyword">if</span> (ptrace(PTRACE_CONT, request.tid, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>) != <span class="hljs-number">0</span>) {
ALOGE(<span class="hljs-string">"debuggerd: ptrace continue failed: %s"</span>, strerror(errno));
return <span class="hljs-literal">false</span>;
}
<span class="hljs-keyword">continue</span>; <span class="hljs-comment">//再次循環</span>
}
<span class="hljs-keyword">break</span>;
<span class="hljs-keyword">case</span> SIGABRT:
<span class="hljs-keyword">case</span> SIGBUS:
<span class="hljs-keyword">case</span> SIGFPE:
<span class="hljs-keyword">case</span> SIGILL:
<span class="hljs-keyword">case</span> SIGSEGV:
#ifdef SIGSTKFLT
case SIGSTKFLT:
#endif
case SIGTRAP:
ALOGV("stopped -- fatal signal\n");
*crash_signal = signal;
//【見小節4.1】
engrave_tombstone(tombstone_fd, backtrace_map, request.pid, request.tid, siblings, signal,
request.original_si_code, request.abort_msg_address, amfd_data);
break;
<span class="hljs-keyword">default</span>:
ALOGE(<span class="hljs-string">"debuggerd: process stopped due to unexpected signal %d\n"</span>, signal);
<span class="hljs-keyword">break</span>;
}
<span class="hljs-keyword">break</span>;
}
return true;
}
致命信號有SIGABRT,SIGBUS,SIGFPE,SIGILL,SIGSEGV,SIGSTKFLT,SIGTRAP共7個信息,能造成native crash。
[-> debuggerd.cpp]
static void activity_manager_write(int pid, int signal, int amfd, const std::string& amfd_data) {
if (amfd == -1) {
return;
}
//寫入32-bit pid和signal,以及原始dump信息,最後添加0以標記結束
uint32_t datum = htonl(pid);
if (!android::base::WriteFully(amfd, &datum, 4)) {
return; //AM pid寫入失敗
}
datum = htonl(signal);
if (!android::base::WriteFully(amfd, &datum, 4)) {
return;//AM signal寫入失敗
}
if (!android::base::WriteFully(amfd, amfd_data.c_str(), amfd_data.size())) {
return;//AM data寫入失敗
}
uint8_t eodMarker = 0;
if (!android::base::WriteFully(amfd, &eodMarker, 1)) {
return; //AM eod 寫入失敗
}
//讀取應答消息,如果3s超時未收到則讀取失敗
android::base::ReadFully(amfd, &eodMarker, 1);
}
父進程處理
[-> debuggerd.cpp]
static void monitor_worker_process(int child_pid, const debugger_request_t& request) {
struct timespec timeout = {.tv_sec = 10, .tv_nsec = 0 };
if (should_attach_gdb(request)) {
timeout.tv_sec = INT_MAX;
}
sigset_t signal_set;
sigemptyset(&signal_set);
sigaddset(&signal_set, SIGCHLD);
bool kill_worker = false;
bool kill_target = false;
bool kill_self = false;
int status;
siginfo_t siginfo;
int signal = TEMP_FAILURE_RETRY(sigtimedwait(&signal_set, &siginfo, &timeout));
if (signal == SIGCHLD) {
pid_t rc = waitpid(-1, &status, WNOHANG | WUNTRACED);
if (rc != child_pid) {
ALOGE("debuggerd: waitpid returned unexpected pid (%d), committing murder-suicide", rc);
<span class="hljs-keyword">if</span> (WIFEXITED(status)) {
ALOGW(<span class="hljs-string">"debuggerd: pid %d exited with status %d"</span>, rc, WEXITSTATUS(status));
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> (WIFSIGNALED(status)) {
ALOGW(<span class="hljs-string">"debuggerd: pid %d received signal %d"</span>, rc, WTERMSIG(status));
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> (WIFSTOPPED(status)) {
ALOGW(<span class="hljs-string">"debuggerd: pid %d stopped by signal %d"</span>, rc, WSTOPSIG(status));
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> (WIFCONTINUED(status)) {
ALOGW(<span class="hljs-string">"debuggerd: pid %d continued"</span>, rc);
}
<span class="hljs-built_in">kill</span>_worker = <span class="hljs-literal">true</span>;
<span class="hljs-built_in">kill</span>_target = <span class="hljs-literal">true</span>;
<span class="hljs-built_in">kill</span>_self = <span class="hljs-literal">true</span>;
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> (WIFSIGNALED(status)) {
ALOGE(<span class="hljs-string">"debuggerd: worker process %d terminated due to signal %d"</span>, child_pid, WTERMSIG(status));
<span class="hljs-built_in">kill</span>_worker = <span class="hljs-literal">false</span>;
<span class="hljs-built_in">kill</span>_target = <span class="hljs-literal">true</span>;
} <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> (WIFSTOPPED(status)) {
ALOGE(<span class="hljs-string">"debuggerd: worker process %d stopped due to signal %d"</span>, child_pid, WSTOPSIG(status));
<span class="hljs-built_in">kill</span>_worker = <span class="hljs-literal">true</span>;
<span class="hljs-built_in">kill</span>_target = <span class="hljs-literal">true</span>;
}
} else {
ALOGE("debuggerd: worker process %d timed out", child_pid);
kill_worker = true;
kill_target = true;
}
if (kill_worker) {
// Something bad happened, kill the worker.
if (kill(child_pid, SIGKILL) != 0) {
ALOGE("debuggerd: failed to kill worker process %d: %s", child_pid, strerror(errno));
} else {
waitpid(child_pid, &status, 0);
}
}
int exit_signal = SIGCONT;
if (kill_target && request.action == DEBUGGER_ACTION_CRASH) {
ALOGE("debuggerd: killing target %d", request.pid);
exit_signal = SIGKILL;
} else {
ALOGW("debuggerd: resuming target %d", request.pid);
}
if (kill(request.pid, exit_signal) != 0) {
ALOGE("debuggerd: failed to send signal %d to target: %s", exit_signal, strerror(errno));
}
if (kill_self) {
stop_signal_sender();
_exit(1);
}
}
調用流程:
debuggerd.main
do_server
handle_request
read_request
worker_process(子進程)
monitor_worker_process(父進程)
整個過程的核心方法為worker_process()
,其流程如下:
DEBUGGER_ACTION_CRASH
,則執行activity_manager_connect;DEBUGGER_ACTION_DUMP_BACKTRACE
,則dump_backtrace
()DEBUGGER_ACTION_DUMP_TOMBSTONE
,則engrave_tombstone
()engrave_tombstone
()DEBUGGER_ACTION_DUMP_TOMBSTONE
,則將向client端寫入tombstone數據;DEBUGGER_ACTION_CRASH
,發送信號SIGKILL給目標進程tid整個過程中,【見小節3.6.4】perform_dump是核心過程:對於DEBUGGER_ACTION_DUMP_BACKTRACE命令,則執行dump_backtrace
;否則執行engrave_tombstone
。接下來分別說說這兩個過程
[-> debuggerd/tombstone.cpp]
void engrave_tombstone(int tombstone_fd, BacktraceMap* map, pid_t pid, pid_t tid,
const std::set<pid_t>& siblings, int signal, int original_si_code,
uintptr_t abort_msg_address, std::string* amfd_data) {
log_t log;
log.current_tid = tid;
log.crashed_tid = tid;
if (tombstone_fd < 0) {
ALOGE("debuggerd: skipping tombstone write, nothing to do.\n");
return;
}
log.tfd = tombstone_fd;
log.amfd_data = amfd_data;
//【見小節4.2】
dump_crash(&log, map, pid, tid, siblings, signal, original_si_code, abort_msg_address);
}
[-> debuggerd/tombstone.cpp]
// Dump該pid所對應進程的所有tombstone信息
static void dump_crash(log_t* log, BacktraceMap* map, pid_t pid, pid_t tid,
const std::set<pid_t>& siblings, int signal, int si_code,
uintptr_t abort_msg_address) {
char value[PROPERTY_VALUE_MAX];
//當ro.debuggable =1,則輸出log信息
property_get("ro.debuggable", value, "0");
bool want_logs = (value[0] == '1');
_LOG(log, logtype::HEADER,
"*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n");
//tombstone頭部信息【見小節4.3】
dump_header_info(log);
//tombstone線程信息【見小節4.4】
dump_thread(log, pid, tid, map, signal, si_code, abort_msg_address, true);
if (want_logs) {
//輸出log信息【見小節4.5】
dump_logs(log, pid, 5);
}
if (!siblings.empty()) {
for (pid_t sibling : siblings) {
//tombstone兄弟線程信息【見小節4.4】
dump_thread(log, pid, sibling, map, 0, 0, 0, false);
}
}
if (want_logs) {
dump_logs(log, pid, 0);
}
}
主要輸出信息:
[-> debuggerd/tombstone.cpp]
static void dump_header_info(log_t* log) {
char fingerprint[PROPERTY_VALUE_MAX];
char revision[PROPERTY_VALUE_MAX];
property_get("ro.build.fingerprint", fingerprint, "unknown");
property_get("ro.revision", revision, "unknown");
_LOG(log, logtype::HEADER, "Build fingerprint: '%s'\n", fingerprint);
_LOG(log, logtype::HEADER, "Revision: '%s'\n", revision);
_LOG(log, logtype::HEADER, "ABI: '%s'\n", ABI_STRING);
}
例如:
Build fingerprint: 'xxx/xxx/MMB29M/gityuan06080845:userdebug/test-keys'
Revision: '0'
ABI: 'arm'
調用方法dump_thread(log, pid, tid, map, signal, si_code, abort_msg_address, true);
[-> debuggerd/tombstone.cpp]
static void dump_thread(log_t* log, pid_t pid, pid_t tid, BacktraceMap* map, int signal,
int si_code, uintptr_t abort_msg_address, bool primary_thread) {
log->current_tid = tid;
//【見小節4.4.1】
dump_thread_info(log, pid, tid);
if (signal) {
//【見小節4.4.2】
dump_signal_info(log, tid, signal, si_code);
}
//【見小節3.6.2】
std::unique_ptr<Backtrace> backtrace(Backtrace::Create(pid, tid, map));
if (primary_thread) {
//【見小節4.4.3】
dump_abort_message(backtrace.get(), log, abort_msg_address);
}
//【見小節4.4.4】
dump_registers(log, tid);
if (backtrace->Unwind(0)) {
//【見小節4.4.5】
dump_backtrace_and_stack(backtrace.get(), log);
} else {
ALOGE("Unwind failed: pid = %d, tid = %d", pid, tid);
}
if (primary_thread) {
//【見小節4.4.6】
dump_memory_and_code(log, backtrace.get());
if (map) {
//【見小節4.4.7】
dump_all_maps(backtrace.get(), map, log, tid);
}
}
log->current_tid = log->crashed_tid;
}
static void dump_thread_info(log_t* log, pid_t pid, pid_t tid) {
char path[64];
char threadnamebuf[1024];
char* threadname = nullptr;
FILE *fp;
//獲取/proc/<tid>/comm節點的線程名
snprintf(path, sizeof(path), "/proc/%d/comm", tid);
if ((fp = fopen(path, "r"))) {
threadname = fgets(threadnamebuf, sizeof(threadnamebuf), fp);
fclose(fp);
if (threadname) {
size_t len = strlen(threadname);
if (len && threadname[len - 1] == '\n') {
threadname[len - 1] = '\0';
}
}
}
// Blacklist logd, logd.reader, logd.writer, logd.auditd, logd.control ...
static const char logd[] = "logd";
if (threadname != nullptr && !strncmp(threadname, logd, sizeof(logd) - 1)
&& (!threadname[sizeof(logd) - 1] || (threadname[sizeof(logd) - 1] == '.'))) {
log->should_retrieve_logcat = false;
}
char procnamebuf[1024];
char* procname = nullptr;
//獲取/proc/<pid>/cmdline節點的進程名
snprintf(path, sizeof(path), "/proc/%d/cmdline", pid);
if ((fp = fopen(path, "r"))) {
procname = fgets(procnamebuf, sizeof(procnamebuf), fp);
fclose(fp);
}
_LOG(log, logtype::HEADER, "pid: %d, tid: %d, name: %s >>> %s <<<\n", pid, tid,
threadname ? threadname : "UNKNOWN", procname ? procname : "UNKNOWN");
}
/proc/<pid>/cmdline
/proc/<tid>/comm
例如:
//代表system_server進程的主線程system_server
pid: 1789, tid: 1789, name: system_server >>> system_server <<<
//代表system_server進程的子線程ActivityManager
pid: 1789, tid: 1827, name: ActivityManager >>> system_server <<<
static void dump_signal_info(log_t* log, pid_t tid, int signal, int si_code) {
siginfo_t si;
memset(&si, 0, sizeof(si));
if (ptrace(PTRACE_GETSIGINFO, tid, 0, &si) == -1) {
ALOGE("cannot get siginfo: %s\n", strerror(errno));
return;
}
si.si_code = si_code;
char addr_desc[32];
if (signal_has_si_addr(signal)) {
snprintf(addr_desc, sizeof(addr_desc), "%p", si.si_addr);
} else {
snprintf(addr_desc, sizeof(addr_desc), "--------");
}
_LOG(log, logtype::HEADER, "signal %d (%s), code %d (%s), fault addr %s\n",
signal, get_signame(signal), si.si_code, get_sigcode(signal, si.si_code), addr_desc);
}
SIGBUS
,SIGFPE
,SIGILL
,SIGSEGV
,SIGTRAP
時觸發的dump,則會輸出fault addr的具體地址,SIGSTOP
時,則輸出fault addr為”——–”例如:
signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xd140109c
signal 19 (SIGSTOP), code 0 (SI_USER), fault addr --------
此處get_sigcode
函數功能負責根據signal以及si_code來獲取相應信息,下面來列舉每種signal所包含的信息種類:
signal | get_sigcode |
---|---|
SIGILL | ILL_ILLOPC |
SIGILL | ILL_ILLOPN |
SIGILL | ILL_ILLADR |
SIGILL | ILL_ILLTRP |
SIGILL | ILL_PRVOPC |
SIGILL | ILL_PRVREG |
SIGILL | ILL_COPROC |
SIGILL | ILL_BADSTK |
signal | get_sigcode |
---|---|
SIGBUS | BUS_ADRALN |
SIGBUS | BUS_ADRERR |
SIGBUS | BUS_OBJERR |
SIGBUS | BUS_MCEERR_AR |
SIGBUS | BUS_MCEERR_AO |
signal | get_sigcode |
---|---|
SIGFPE | FPE_INTDIV |
SIGFPE | FPE_INTOVF |
SIGFPE | FPE_FLTDIV |
SIGFPE | FPE_FLTOVF |
SIGFPE | FPE_FLTUND |
SIGFPE | FPE_FLTRES |
SIGFPE | FPE_FLTINV |
SIGFPE | FPE_FLTSUB |
signal | get_sigcode |
---|---|
SIGSEGV | SEGV_MAPERR |
SIGSEGV | SEGV_ACCERR |
SIGSEGV | SEGV_BNDERR |
SIGSEGV | SEGV_MAPERR |
signal | get_sigcode |
---|---|
SIGTRAP | TRAP_BRKPT |
SIGTRAP | TRAP_TRACE |
SIGTRAP | TRAP_BRANCH |
SIGTRAP | TRAP_HWBKPT |
static void dump_abort_message(Backtrace* backtrace, log_t* log, uintptr_t address) {
if (address == 0) {
return;
}
address += sizeof(size_t); // Skip the buffer length.
char msg[512];
memset(msg, 0, sizeof(msg));
char* p = &msg[0];
while (p < &msg[sizeof(msg)]) {
word_t data;
size_t len = sizeof(word_t);
if (!backtrace->ReadWord(address, &data)) {
break;
}
address += sizeof(word_t);
<span class="hljs-keyword">while</span> (len > <span class="hljs-number">0</span> && (*p++ = (data >> (sizeof(<span class="hljs-keyword">word_t</span>) - len) * <span class="hljs-number">8</span>) & <span class="hljs-number">0xff</span>) != <span class="hljs-number">0</span>) {
len--;
}
}
msg[sizeof(msg) - 1] = '\0';
_LOG(log, logtype::HEADER, "Abort message: '%s'\n", msg);
}
輸出系統寄存器信息,這裡以arm為例來說明
[-> debuggerd/arm/Machine.cpp]
void dump_registers(log_t* log, pid_t tid) {
pt_regs r;
if (ptrace(PTRACE_GETREGS, tid, 0, &r)) {
ALOGE("cannot get registers: %s\n", strerror(errno));
return;
}
_LOG(log, logtype::REGISTERS, " r0 %08x r1 %08x r2 %08x r3 %08x\n",
static_cast<uint32_t>(r.ARM_r0), static_cast<uint32_t>(r.ARM_r1),
static_cast<uint32_t>(r.ARM_r2), static_cast<uint32_t>(r.ARM_r3));
_LOG(log, logtype::REGISTERS, " r4 %08x r5 %08x r6 %08x r7 %08x\n",
static_cast<uint32_t>(r.ARM_r4), static_cast<uint32_t>(r.ARM_r5),
static_cast<uint32_t>(r.ARM_r6), static_cast<uint32_t>(r.ARM_r7));
_LOG(log, logtype::REGISTERS, " r8 %08x r9 %08x sl %08x fp %08x\n",
static_cast<uint32_t>(r.ARM_r8), static_cast<uint32_t>(r.ARM_r9),
static_cast<uint32_t>(r.ARM_r10), static_cast<uint32_t>(r.ARM_fp));
_LOG(log, logtype::REGISTERS, " ip %08x sp %08x lr %08x pc %08x cpsr %08x\n",
static_cast<uint32_t>(r.ARM_ip), static_cast<uint32_t>(r.ARM_sp),
static_cast<uint32_t>(r.ARM_lr), static_cast<uint32_t>(r.ARM_pc),
static_cast<uint32_t>(r.ARM_cpsr));
user_vfp vfp_regs;
if (ptrace(PTRACE_GETVFPREGS, tid, 0, &vfp_regs)) {
ALOGE("cannot get FP registers: %s\n", strerror(errno));
return;
}
for (size_t i = 0; i < 32; i += 2) {
_LOG(log, logtype::FP_REGISTERS, " d%-2d %016llx d%-2d %016llx\n",
i, vfp_regs.fpregs[i], i+1, vfp_regs.fpregs[i+1]);
}
_LOG(log, logtype::FP_REGISTERS, " scr %08lx\n", vfp_regs.fpscr);
}
通過ptrace獲取寄存器狀態信息,這裡輸出r0-r9,sl,fp,ip,sp,lr,pc,cpsr 以及32個fpregs和一個fpscr.
[-> debuggerd/tombstone.cpp]
static void dump_backtrace_and_stack(Backtrace* backtrace, log_t* log) {
if (backtrace->NumFrames()) {
_LOG(log, logtype::BACKTRACE, "\nbacktrace:\n");
//【見小節4.4.5.1】
dump_backtrace_to_log(backtrace, log, " ");
_LOG(<span class="hljs-built_in">log</span>, logtype::STACK, <span class="hljs-string">"\nstack:\n"</span>);
<span class="hljs-comment">//【見小節4.4.5.2】</span>
dump_stack(backtrace, <span class="hljs-built_in">log</span>);
}
}
4.4.5.1 輸出backtrace信息
[-> debuggerd/Backtrace.cpp]
void dump_backtrace_to_log(Backtrace* backtrace, log_t* log, const char* prefix) {
for (size_t i = 0; i < backtrace->NumFrames(); i++) {
_LOG(log, logtype::BACKTRACE, "%s%s\n", prefix, backtrace->FormatFrameData(i).c_str());
}
}
4.4.5.2 輸出stack信息
[-> debuggerd/tombstone.cpp]
static void dump_stack(Backtrace* backtrace, log_t* log) {
size_t first = 0, last;
for (size_t i = 0; i < backtrace->NumFrames(); i++) {
const backtrace_frame_data_t* frame = backtrace->GetFrame(i);
if (frame->sp) {
if (!first) {
first = i+1;
}
last = i;
}
}
if (!first) {
return;
}
first--;
// Dump a few words before the first frame.
word_t sp = backtrace->GetFrame(first)->sp - STACK_WORDS * sizeof(word_t);
dump_stack_segment(backtrace, log, &sp, STACK_WORDS, -1);
// Dump a few words from all successive frames.
// Only log the first 3 frames, put the rest in the tombstone.
for (size_t i = first; i <= last; i++) {
const backtrace_frame_data_t* frame = backtrace->GetFrame(i);
if (sp != frame->sp) {
_LOG(log, logtype::STACK, " ........ ........\n");
sp = frame->sp;
}
if (i == last) {
dump_stack_segment(backtrace, log, &sp, STACK_WORDS, i);
if (sp < frame->sp + frame->stack_size) {
_LOG(log, logtype::STACK, " ........ ........\n");
}
} else {
size_t words = frame->stack_size / sizeof(word_t);
if (words == 0) {
words = 1;
} else if (words > STACK_WORDS) {
words = STACK_WORDS;
}
dump_stack_segment(backtrace, log, &sp, words, i);
}
}
}
[-> debuggerd/arm/Machine.cpp]
void dump_memory_and_code(log_t* log, Backtrace* backtrace) {
pt_regs regs;
if (ptrace(PTRACE_GETREGS, backtrace->Tid(), 0, ®s)) {
ALOGE("cannot get registers: %s\n", strerror(errno));
return;
}
static const char reg_names[] = "r0r1r2r3r4r5r6r7r8r9slfpipsp";
for (int reg = 0; reg < 14; reg++) {
dump_memory(log, backtrace, regs.uregs[reg], "memory near %.2s:", ®_names[reg * 2]);
}
dump_memory(log, backtrace, static_cast<uintptr_t>(regs.ARM_pc), "code around pc:");
if (regs.ARM_pc != regs.ARM_lr) {
dump_memory(log, backtrace, static_cast<uintptr_t>(regs.ARM_lr), "code around lr:");
}
}
[-> debuggerd/tombstone.cpp]
static void dump_all_maps(Backtrace* backtrace, BacktraceMap* map, log_t* log, pid_t tid) {
bool print_fault_address_marker = false;
uintptr_t addr = 0;
siginfo_t si;
memset(&si, 0, sizeof(si));
if (ptrace(PTRACE_GETSIGINFO, tid, 0, &si) != -1) {
print_fault_address_marker = signal_has_si_addr(si.si_signo);
addr = reinterpret_cast<uintptr_t>(si.si_addr);
} else {
ALOGE("Cannot get siginfo for %d: %s\n", tid, strerror(errno));
}
_LOG(log, logtype::MAPS, "\n");
if (!print_fault_address_marker) {
_LOG(log, logtype::MAPS, "memory map:\n");
} else {
_LOG(log, logtype::MAPS, "memory map: (fault address prefixed with --->)\n");
if (map->begin() != map->end() && addr < map->begin()->start) {
_LOG(log, logtype::MAPS, "--->Fault address falls at %s before any mapped regions\n",
get_addr_string(addr).c_str());
print_fault_address_marker = false;
}
}
std::string line;
for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
line = " ";
if (print_fault_address_marker) {
if (addr < it->start) {
_LOG(log, logtype::MAPS, "--->Fault address falls at %s between mapped regions\n",
get_addr_string(addr).c_str());
print_fault_address_marker = false;
} else if (addr >= it->start && addr < it->end) {
line = "--->";
print_fault_address_marker = false;
}
}
line += get_addr_string(it->start) + '-' + get_addr_string(it->end - 1) + ' ';
if (it->flags & PROT_READ) {
line += 'r';
} else {
line += '-';
}
if (it->flags & PROT_WRITE) {
line += 'w';
} else {
line += '-';
}
if (it->flags & PROT_EXEC) {
line += 'x';
} else {
line += '-';
}
line += android::base::StringPrintf(" %8" PRIxPTR " %8" PRIxPTR,
it->offset, it->end - it->start);
bool space_needed = true;
if (it->name.length() > 0) {
space_needed = false;
line += " " + it->name;
std::string build_id;
if ((it->flags & PROT_READ) && elf_get_build_id(backtrace, it->start, &build_id)) {
line += " (BuildId: " + build_id + ")";
}
}
if (it->load_base != 0) {
if (space_needed) {
line += ' ';
}
line += android::base::StringPrintf(" (load base 0x%" PRIxPTR ")", it->load_base);
}
_LOG(log, logtype::MAPS, "%s\n", line.c_str());
}
if (print_fault_address_marker) {
_LOG(log, logtype::MAPS, "--->Fault address falls at %s after any mapped regions\n",
get_addr_string(addr).c_str());
}
}
當內存出現故障時,可搜索關鍵詞:
memory map: (fault address prefixed with --->)
[-> debuggerd/tombstone.cpp]
static void dump_logs(log_t* log, pid_t pid, unsigned int tail) {
dump_log_file(log, pid, "system", tail); //輸出system log
dump_log_file(log, pid, "main", tail); //輸出main log
}
dump_thread(log, pid, sibling, map, 0, 0, 0, false);
[-> debuggerd/tombstone.cpp]
static void dump_thread(log_t* log, pid_t pid, pid_t tid, BacktraceMap* map, int signal,
int si_code, uintptr_t abort_msg_address, bool primary_thread) {
log->current_tid = tid;
if (!primary_thread) {
_LOG(log, logtype::THREAD, "--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---\n");
}
//【見小節4.4.1】
dump_thread_info(log, pid, tid);
std::unique_ptr<Backtrace> backtrace(Backtrace::Create(pid, tid, map));
//【見小節4.4.4】
dump_registers(log, tid);
if (backtrace->Unwind(0)) {
//【見小節4.4.5】
dump_backtrace_and_stack(backtrace.get(), log);
}
log->current_tid = log->crashed_tid;
}
兄弟線程dump_thread的輸出內容:
engrave_tombstone主要輸出信息:
兄弟線程調用dump_thread也會輸出的內容其實是主線程dump的第2,4,5,6項目。
這裡是dump_tombstone文件內容的組成:
*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
//【見小節4.3】dump_header_info
Build fingerprint: 'xxx/xxx/MMB29M/gityuan06080845:userdebug/test-keys'
Revision: '0'
ABI: 'arm'
//【見小節4.4.1】dump_thread_info
pid: 1789, tid: 1789, name: system_server >>> system_server <<<
//【見小節4.4.2】dump_signal_info
signal 19 (SIGSTOP), code 0 (SI_USER), fault addr --------
//【見小節4.4.4】dump_registers
r0 fffffffc r1 bed67e68 r2 00000010 r3 0000ea60
r4 00000000 r5 00000008 r6 00000000 r7 0000015a
...
//【見小節4.4.5】dump_backtrace_and_stack
backtrace:
#00 pc 00000000004489bc /data/dalvik-cache/arm64/system@framework@boot.oat (offset 0x3e2e000)
#01 pc 00000000003e8a74 /data/dalvik-cache/arm64/system@framework@boot.oat (offset 0x3e2e000)
stack:
0000007ff47b26b0 0000000012cf05e0 /dev/ashmem/dalvik-main space (deleted)
0000007ff47b26b8 0000000000000000
0000007ff47b26c0 0000000012cf05e0 /dev/ashmem/dalvik-main space (deleted)
//【見小節4.4.6】dump_memory_and_code
memory near r1:
...
code around pc:
code around lr:
//【見小節4.4.7】dump_all_maps
memory map:
//【見小節4.5】dump_logs
--- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
//【見小節4.4.1】dump_thread_info
pid: 1789, tid: 1803, name: Binder_1 >>> system_server <<<
//【見小節4.4.4】dump_registers
r0 0000000b r1 c0186201 r2 b3589868 r3 b3589860
//【見小節4.4.5】dump_backtrace_and_stack
backtrace:
#00 pc 00040aac /system/lib/libc.so (__ioctl+8)
#01 pc 00047529 /system/lib/libc.so (ioctl+14)
#02 pc 0001e909 /system/lib/libbinder.so (_ZN7android14IPCThreadState14talkWithDriverEb+132)
stack:
b3589810 00000000
b3589814 00000000
b3589818 b6ebf07c /system/lib/libcutils.so
b358981c b6eb4405 /system/lib/libcutils.so
所有兄弟線程是以一系列---
作為開頭的分割符。
這裡主要以源碼角度來分析debuggerd的原理,整個過程中最重要的產物便是tombstone文件,先留坑,後續再進一步講述如何分析tombstone文件。
debuggerd -b <tid>
:DEBUGGER_ACTION_DUMP_BACKTRACE
,則調用dump_backtrace()
;Native進程之Trace原理debuggerd <tid>
: 發送請求的action為DEBUGGER_ACTION_DUMP_TOMBSTONE
,則調用engrave_tombstone()
;native crash
: 發送請求的action為DEBUGGER_ACTION_CRASH
,且發送信號為SIGBUS等致命信號,則調用engrave_tombstone()
。