<!-- Post Container -->
<div class="
col-lg-8 col-lg-offset-2
col-md-10 col-md-offset-1
post-container">
<blockquote>
本文是基於Android 7.0源碼,來分析Native Crash流程。
從系統全局來說,Crash分為Framework/App Crash, Native Crash,以及Kernel Crash。
如果你是從事Android系統開發或者架構相關工作,或者遇到需要解系統性的疑難雜症,再或者需要寫JNI代碼,則就有可能遇到Native Crash,瞭解系統Native Crash處理流程就很有必要。
接下來介紹介紹Android N
的Native Crash處理流程,你沒有看錯,本文就是針對最新Android Nouget來分析的。Native crash的工作核心是由debuggerd守護進程來完成,在文章調試系列4:debuggerd源碼篇),已經介紹過Debuggerdd的工作原理。
要了解Native Crash,首先從應用程序入口位於begin.S
中的__linker_init
入手。
[-> arch/arm/begin.S]
ENTRY(_start)
mov r0, sp
//入口地址 【見小節1.2】
bl __linker_init
/* linker init returns the _entry address in the main image */
mov pc, r0
END(_start)
[-> linker.cpp]
extern "C" ElfW(Addr) __linker_init(void* raw_args) {
KernelArgumentBlock args(raw_args);
ElfW(Addr) linker_addr = args.getauxval(AT_BASE);
...
//【見小節1.3】
ElfW(Addr) start_address = __linker_init_post_relocation(args, linker_addr);
return start_address;
}
[-> linker.cpp]
static ElfW(Addr) __linker_init_post_relocation(KernelArgumentBlock& args, ElfW(Addr) linker_base) {
...
// Sanitize the environment.
__libc_init_AT_SECURE(args);
// Initialize system properties
__system_properties_init();
//【見小節1.4】
debuggerd_init();
...
}
[-> linker/debugger.cpp]
__LIBC_HIDDEN__ void debuggerd_init() {
struct sigaction action;
memset(&action, 0, sizeof(action));
sigemptyset(&action.sa_mask);
//【見小節1.5】
action.sa_sigaction = debuggerd_signal_handler;
//SA_RESTART代表中斷某個syscall,則會自動重新調用該syscall
//SA_SIGINFO代表信號附帶參數siginfo_t結構體可傳送到signal_handler函數
action.sa_flags = SA_RESTART | SA_SIGINFO;
//使用備用signal棧(如果可用),以便我們能捕獲棧溢出
action.sa_flags |= SA_ONSTACK;
sigaction(SIGABRT, &action, nullptr);
sigaction(SIGBUS, &action, nullptr);
sigaction(SIGFPE, &action, nullptr);
sigaction(SIGILL, &action, nullptr);
sigaction(SIGPIPE, &action, nullptr);
sigaction(SIGSEGV, &action, nullptr);
#if defined(SIGSTKFLT)
sigaction(SIGSTKFLT, &action, nullptr);
#endif
sigaction(SIGTRAP, &action, nullptr);
}
連接到bionic上的native程序(C/C++)出現異常時,kernel會發送相應的signal; 當進程捕獲致命的signal,通知debuggerd調用ptrace來獲取有價值的信息(發生crash之前)。
[-> linker/debugger.cpp]
static void debuggerd_signal_handler(int signal_number, siginfo_t* info, void*) {
if (!have_siginfo(signal_number)) {
info = nullptr; //SA_SIGINFO標識被意外清空,則info未定義
}
//防止debuggerd無法鏈接時,仍可以輸出一些簡要signal信息
log_signal_summary(signal_number, info);
//建立於debuggerd的socket通信連接 【見小節1.6】
send_debuggerd_packet(info);
//重置信號處理函數為SIG_DFL(默認操作)
signal(signal_number, SIG_DFL);
switch (signal_number) {
case SIGABRT:
case SIGFPE:
case SIGPIPE:
#if defined(SIGSTKFLT)
case SIGSTKFLT:
#endif
case SIGTRAP:
tgkill(getpid(), gettid(), signal_number);
break;
default: // SIGILL, SIGBUS, SIGSEGV
break;
}
}
[-> linker/debugger.cpp]
static void send_debuggerd_packet(siginfo_t* info) {
// Mutex防止多個crashing線程同一時間來來嘗試跟debuggerd進行通信
static pthread_mutex_t crash_mutex = PTHREAD_MUTEX_INITIALIZER;
int ret = pthread_mutex_trylock(&crash_mutex);
if (ret != 0) {
if (ret == EBUSY) {
__libc_format_log(ANDROID_LOG_INFO, "libc",
"Another thread contacted debuggerd first; not contacting debuggerd.");
//等待其他線程釋放該鎖,從而獲取該鎖
pthread_mutex_lock(&crash_mutex);
}
return;
}
//建立與debuggerd的socket通道
int s = socket_abstract_client(DEBUGGER_SOCKET_NAME, SOCK_STREAM | SOCK_CLOEXEC);
...
debugger_msg_t msg;
msg.action = DEBUGGER_ACTION_CRASH;
msg.tid = gettid();
msg.abort_msg_address = reinterpret_cast<uintptr_t>(g_abort_message);
msg.original_si_code = (info != nullptr) ? info->si_code : 0;
//將DEBUGGER_ACTION_CRASH消息發送給debuggerd服務端
ret = TEMP_FAILURE_RETRY(write(s, &msg, sizeof(msg)));
if (ret == sizeof(msg)) {
char debuggerd_ack;
//阻塞等待debuggerd服務端的迴應數據
ret = TEMP_FAILURE_RETRY(read(s, &debuggerd_ack, 1));
int saved_errno = errno;
notify_gdb_of_libraries();
errno = saved_errno;
}
close(s);
}
該方法的主要功能:
action = DEBUGGER_ACTION_CRASH
的消息發送給debuggerd服務端;接下來,看看debuggerd服務端接收到DEBUGGER_ACTION_CRASH
的處理流程
debuggerd 守護進程啟動後,一直在等待socket client的連接。當native crash發送後便會向debuggerd發送action = DEBUGGER_ACTION_CRASH
的消息。
[-> /debuggerd/debuggerd.cpp]
static int do_server() {
...
for (;;) {
sockaddr_storage ss;
sockaddr* addrp = reinterpret_cast<sockaddr*>(&ss);
socklen_t alen = sizeof(ss);
//等待客戶端連接
int fd = accept4(s, addrp, &alen, SOCK_CLOEXEC);
if (fd == -1) {
continue; //accept失敗
}
//處理native crash發送過來的請求【見小節2.2】
handle_request(fd);
}
return 0;
}
[-> /debuggerd/debuggerd.cpp]
static void handle_request(int fd) {
android::base::unique_fd closer(fd);
debugger_request_t request;
memset(&request, 0, sizeof(request));
//讀取client發送過來的請求【見小節2.3】
int status = read_request(fd, &request);
...
//fork子進程來處理其餘請求命令
pid_t fork_pid = fork();
if (fork_pid == -1) {
ALOGE("debuggerd: failed to fork: %s\n", strerror(errno));
} else if (fork_pid == 0) {
//子進程執行【見小節2.4】
worker_process(fd, request);
} else {
//父進程執行【見小節2.5】
monitor_worker_process(fork_pid, request);
}
}
[-> /debuggerd/debuggerd.cpp]
static int read_request(int fd, debugger_request_t* out_request) {
ucred cr;
socklen_t len = sizeof(cr);
//從fd獲取client進程的pid,uid,gid
int status = getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
...
fcntl(fd, F_SETFL, O_NONBLOCK);
pollfd pollfds[1];
pollfds[0].fd = fd;
pollfds[0].events = POLLIN;
pollfds[0].revents = 0;
//讀取tid
status = TEMP_FAILURE_RETRY(poll(pollfds, 1, 3000));
debugger_msg_t msg;
memset(&msg, 0, sizeof(msg));
//從fd讀取數據並保存到結構體msg
status = TEMP_FAILURE_RETRY(read(fd, &msg, sizeof(msg)));
...
out_request->action = static_cast<debugger_action_t>(msg.action);
out_request->tid = msg.tid;
out_request->pid = cr.pid;
out_request->uid = cr.uid;
out_request->gid = cr.gid;
out_request->abort_msg_address = msg.abort_msg_address;
out_request->original_si_code = msg.original_si_code;
if (msg.action == DEBUGGER_ACTION_CRASH) {
// native crash時發送過來的請求
char buf[64];
struct stat s;
snprintf(buf, sizeof buf, "/proc/%d/task/%d", out_request->pid, out_request->tid);
if (stat(buf, &s)) {
return -1; //tid不存在,忽略該顯式dump請求
}
} else if (cr.uid == 0
|| (cr.uid == AID_SYSTEM && msg.action == DEBUGGER_ACTION_DUMP_BACKTRACE)) {
...
} else {
return -1;
}
return 0;
}
read_request執行完成後,則從socket通道中讀取到out_request。
處於client發送過來的請求,server端通過子進程來處理
[-> /debuggerd/debuggerd.cpp]
static void worker_process(int fd, debugger_request_t& request) {
std::string tombstone_path;
int tombstone_fd = -1;
switch (request.action) {
case DEBUGGER_ACTION_CRASH:
//打開tombstone文件
tombstone_fd = open_tombstone(&tombstone_path);
if (tombstone_fd == -1) {
exit(1); //無法打開tombstone文件,則退出該進程
}
break;
...
}
// Attach到目標進程
if (ptrace(PTRACE_ATTACH, request.tid, 0, 0) != 0) {
exit(1); //attach失敗則退出該進程
}
...
//生成backtrace
std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(request.pid));
int amfd = -1;
std::unique_ptr<std::string> amfd_data;
if (request.action == DEBUGGER_ACTION_CRASH) {
//當發生native crash,則連接到AMS【見小節2.4.1】
amfd = activity_manager_connect();
amfd_data.reset(new std::string);
}
bool succeeded = false;
//取消特權模式
if (!drop_privileges()) {
_exit(1); //操作失敗則退出
}
int crash_signal = SIGKILL;
//執行dump操作,【見小節2.4.2】
succeeded = perform_dump(request, fd, tombstone_fd, backtrace_map.get(), siblings,
&crash_signal, amfd_data.get());
if (!attach_gdb) {
//將進程crash情況告知AMS【見小節2.4.3】
activity_manager_write(request.pid, crash_signal, amfd, *amfd_data.get());
}
//detach目標進程
ptrace(PTRACE_DETACH, request.tid, 0, 0);
for (pid_t sibling : siblings) {
ptrace(PTRACE_DETACH, sibling, 0, 0);
}
if (!attach_gdb && request.action == DEBUGGER_ACTION_CRASH) {
//發送信號SIGKILL給目標進程[【見小節2.4.4】
if (!send_signal(request.pid, request.tid, crash_signal)) {
ALOGE("debuggerd: failed to kill process %d: %s", request.pid, strerror(errno));
}
}
...
}
整個過程比較複雜,下面只介紹attach_gdb=false的執行流程:
engrave_tombstone
(),這是核心方法[-> debuggerd.cpp]
static int activity_manager_connect() {
android::base::unique_fd amfd(socket(PF_UNIX, SOCK_STREAM, 0));
if (amfd.get() < -1) {
return -1; ///無法連接到ActivityManager(socket失敗)
}
struct sockaddr_un address;
memset(&address, 0, sizeof(address));
address.sun_family = AF_UNIX;
//該路徑必須匹配NativeCrashListener.java中的定義
strncpy(address.sun_path, "/data/system/ndebugsocket", sizeof(address.sun_path));
if (TEMP_FAILURE_RETRY(connect(amfd.get(), reinterpret_cast<struct sockaddr*>(&address),
sizeof(address))) == -1) {
return -1; //無法連接到ActivityManager(connect失敗)
}
struct timeval tv;
memset(&tv, 0, sizeof(tv));
tv.tv_sec = 1;
if (setsockopt(amfd.get(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) {
return -1; //無法連接到ActivityManager(setsockopt SO_SNDTIMEO失敗)
}
tv.tv_sec = 3;
if (setsockopt(amfd.get(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) {
return -1; //無法連接到ActivityManager(setsockopt SO_RCVTIMEO失敗)
}
return amfd.release();
}
該方法的功能是建立跟上層ActivityManager
的socket連接。對於”/data/system/ndebugsocket”的服務端是在,NativeCrashListener.java方法中創建並啟動的。
根據接收到不同的signal採取相應的操作
[-> debuggerd.cpp]
static bool perform_dump(const debugger_request_t& request, int fd, int tombstone_fd,
BacktraceMap* backtrace_map, const std::set<pid_t>& siblings,
int* crash_signal, std::string* amfd_data) {
if (TEMP_FAILURE_RETRY(write(fd, "\0", 1)) != 1) {
return false; //無法響應client端請求
}
int total_sleep_time_usec = 0;
while (true) {
//等待信號到來
int signal = wait_for_signal(request.tid, &total_sleep_time_usec);
switch (signal) {
...
<span class="hljs-keyword">case</span> SIGABRT:
<span class="hljs-keyword">case</span> SIGBUS:
<span class="hljs-keyword">case</span> SIGFPE:
<span class="hljs-keyword">case</span> SIGILL:
<span class="hljs-keyword">case</span> SIGSEGV:
#ifdef SIGSTKFLT
case SIGSTKFLT:
#endif
case SIGTRAP:
ALOGV("stopped -- fatal signal\n");
*crash_signal = signal;
<span class="hljs-comment">//這是輸出tombstone信息最為核心的方法</span>
engrave_tombstone(tombstone_fd, backtrace_map, request.pid, request.tid, siblings, signal,
request.original_si_code, request.abort_msg_address, amfd_data);
<span class="hljs-keyword">break</span>;
<span class="hljs-keyword">default</span>:
ALOGE(<span class="hljs-string">"debuggerd: process stopped due to unexpected signal %d\n"</span>, signal);
<span class="hljs-keyword">break</span>;
}
<span class="hljs-keyword">break</span>;
}
return true;
}
對於以下信號都是致命的信號:
另外,上篇文章已介紹過engrave_tombstone的功能內容,這裡就不再累贅了。
[-> debuggerd.cpp]
static void activity_manager_write(int pid, int signal, int amfd, const std::string& amfd_data) {
if (amfd == -1) {
return;
}
//寫入pid和signal,以及原始dump信息,最後添加0以標記結束
uint32_t datum = htonl(pid);
if (!android::base::WriteFully(amfd, &datum, 4)) {
return; //AM pid寫入失敗
}
datum = htonl(signal);
if (!android::base::WriteFully(amfd, &datum, 4)) {
return;//AM signal寫入失敗
}
if (!android::base::WriteFully(amfd, amfd_data.c_str(), amfd_data.size())) {
return;//AM data寫入失敗
}
uint8_t eodMarker = 0;
if (!android::base::WriteFully(amfd, &eodMarker, 1)) {
return; //AM eod 寫入失敗
}
//讀取應答消息,如果3s超時未收到則讀取失敗
android::base::ReadFully(amfd, &eodMarker, 1);
}
debuggerd與AMS的NativeCrashListener建立socket連接後,再通過該方法發送數據,數據項包括pid、signal、dump信息。
此處只是向目標進程發送SIGKILL信號,用於殺掉目標進程,文章理解殺進程的實現原理已詳細講述過發送SIGKILL信號的處理流程。
static void monitor_worker_process(int child_pid, const debugger_request_t& request) {
struct timespec timeout = {.tv_sec = 10, .tv_nsec = 0 };
if (should_attach_gdb(request)) {
//如果使能wait_for_gdb,則將timeout設置為非常大
timeout.tv_sec = INT_MAX;
}
sigset_t signal_set;
sigemptyset(&signal_set);
sigaddset(&signal_set, SIGCHLD);
bool kill_worker = false;
bool kill_target = false;
bool kill_self = false;
int status;
siginfo_t siginfo;
int signal = TEMP_FAILURE_RETRY(sigtimedwait(&signal_set, &siginfo, &timeout));
if (signal == SIGCHLD) {
pid_t rc = waitpid(-1, &status, WNOHANG | WUNTRACED);
if (rc != child_pid) {
ALOGE("debuggerd: waitpid returned unexpected pid (%d), committing murder-suicide", rc);
if (WIFEXITED(status)) {
ALOGW("debuggerd: pid %d exited with status %d", rc, WEXITSTATUS(status));
} else if (WIFSIGNALED(status)) {
ALOGW("debuggerd: pid %d received signal %d", rc, WTERMSIG(status));
} else if (WIFSTOPPED(status)) {
ALOGW("debuggerd: pid %d stopped by signal %d", rc, WSTOPSIG(status));
} else if (WIFCONTINUED(status)) {
ALOGW("debuggerd: pid %d continued", rc);
}
kill_worker = true;
kill_target = true;
kill_self = true;
} else if (WIFSIGNALED(status)) {
ALOGE("debuggerd: worker process %d terminated due to signal %d", child_pid, WTERMSIG(status));
kill_worker = false;
kill_target = true;
} else if (WIFSTOPPED(status)) {
ALOGE("debuggerd: worker process %d stopped due to signal %d", child_pid, WSTOPSIG(status));
kill_worker = true;
kill_target = true;
}
} else {
ALOGE("debuggerd: worker process %d timed out", child_pid);
kill_worker = true;
kill_target = true;
}
該方法是運行在debuggerd父進程,用於監控子進程的執行情況。
debuggerd服務端調用鏈:
do_server
handle_request
read_request
worker_process(子進程執行)
open_tombstone
ptrace(PTRACE_ATTACH, request.tid, 0, 0)
backtrace_map
activity_manager_connect
perform_dump
activity_manager_write
ptrace(PTRACE_DETACH, request.tid, 0, 0);
send_signal
monitor_worker_process(父進程執行)
handle_request方法中通過fork機制,創建子進程來執行worker_process,由於fork返回兩次,返回到父進程則執行monitor_worker_process。
[-> SystemServer.java]
private void startOtherServices() {
...
mActivityManagerService.systemReady(new Runnable() {
@Override
public void run() {
mSystemServiceManager.startBootPhase(
SystemService.PHASE_ACTIVITY_MANAGER_READY);
try {
//【見小節3.2】
mActivityManagerService.startObservingNativeCrashes();
} catch (Throwable e) {
reportWtf("observing native crashes", e);
}
}
}
}
當開機過程中啟動服務啟動到階段PHASE_ACTIVITY_MANAGER_READY
(550),即服務可以廣播自己的Intents,然後啟動native crash的監聽進程。
[-> ActivityManagerService.java]
public void startObservingNativeCrashes() {
//【見小節3.3】
final NativeCrashListener ncl = new NativeCrashListener(this);
ncl.start();
}
NativeCrashListener繼承於Thread
,可見這是線程,通過調用start方法來啟動線程開始工作。
[-> NativeCrashListener.java]
public void run() {
final byte[] ackSignal = new byte[1];
{
//此處DEBUGGERD_SOCKET_PATH= "/data/system/ndebugsocket"
File socketFile = new File(DEBUGGERD_SOCKET_PATH);
if (socketFile.exists()) {
socketFile.delete();
}
}
<span class="hljs-keyword">try</span> {
FileDescriptor serverFd = Os.socket(AF_UNIX, SOCK_STREAM, <span class="hljs-number">0</span>);
<span class="hljs-comment">//創建socket服務端</span>
<span class="hljs-keyword">final</span> UnixSocketAddress sockAddr = UnixSocketAddress.createFileSystem(
DEBUGGERD_SOCKET_PATH);
Os.bind(serverFd, sockAddr);
Os.listen(serverFd, <span class="hljs-number">1</span>);
<span class="hljs-keyword">while</span> (<span class="hljs-keyword">true</span>) {
FileDescriptor peerFd = <span class="hljs-keyword">null</span>;
<span class="hljs-keyword">try</span> {
<span class="hljs-comment">//等待debuggerd建立連接</span>
peerFd = Os.accept(serverFd, <span class="hljs-keyword">null</span> <span class="hljs-comment">/* peerAddress */</span>);
<span class="hljs-comment">//獲取debuggerd的socket文件描述符</span>
<span class="hljs-keyword">if</span> (peerFd != <span class="hljs-keyword">null</span>) {
<span class="hljs-comment">//只有超級用戶才被允許通過該socket進行通信</span>
StructUcred credentials =
Os.getsockoptUcred(peerFd, SOL_SOCKET, SO_PEERCRED);
<span class="hljs-keyword">if</span> (credentials.uid == <span class="hljs-number">0</span>) {
<span class="hljs-comment">//【見小節3.4】處理native crash信息</span>
consumeNativeCrashData(peerFd);
}
}
} <span class="hljs-keyword">catch</span> (Exception e) {
Slog.w(TAG, <span class="hljs-string">"Error handling connection"</span>, e);
} <span class="hljs-keyword">finally</span> {
<span class="hljs-comment">//應答debuggerd已經建立連接</span>
<span class="hljs-keyword">if</span> (peerFd != <span class="hljs-keyword">null</span>) {
Os.write(peerFd, ackSignal, <span class="hljs-number">0</span>, <span class="hljs-number">1</span>);<span class="hljs-comment">//寫入應答消息</span>
Os.close(peerFd);<span class="hljs-comment">//關閉socket</span>
...
}
}
}
} <span class="hljs-keyword">catch</span> (Exception e) {
Slog.e(TAG, <span class="hljs-string">"Unable to init native debug socket!"</span>, e);
}
}
該方法主要功能:
[-> NativeCrashListener.java]
void consumeNativeCrashData(FileDescriptor fd) {
//進入該方法,標識著debuggerd已經與AMS建立連接
final byte[] buf = new byte[4096];
final ByteArrayOutputStream os = new ByteArrayOutputStream(4096);
<span class="hljs-keyword">try</span> {
<span class="hljs-comment">//此處SOCKET_TIMEOUT_MILLIS=2s</span>
StructTimeval timeout = StructTimeval.fromMillis(SOCKET_TIMEOUT_MILLIS);
Os.setsockoptTimeval(fd, SOL_SOCKET, SO_RCVTIMEO, timeout);
Os.setsockoptTimeval(fd, SOL_SOCKET, SO_SNDTIMEO, timeout);
<span class="hljs-comment">//1.讀取pid和signal number</span>
<span class="hljs-keyword">int</span> headerBytes = readExactly(fd, buf, <span class="hljs-number">0</span>, <span class="hljs-number">8</span>);
<span class="hljs-keyword">if</span> (headerBytes != <span class="hljs-number">8</span>) {
<span class="hljs-keyword">return</span>; <span class="hljs-comment">//讀取失敗</span>
}
<span class="hljs-keyword">int</span> pid = unpackInt(buf, <span class="hljs-number">0</span>);
<span class="hljs-keyword">int</span> signal = unpackInt(buf, <span class="hljs-number">4</span>);
<span class="hljs-comment">//2.讀取dump內容</span>
<span class="hljs-keyword">if</span> (pid > <span class="hljs-number">0</span>) {
<span class="hljs-keyword">final</span> ProcessRecord pr;
<span class="hljs-keyword">synchronized</span> (mAm.mPidsSelfLocked) {
pr = mAm.mPidsSelfLocked.get(pid);
}
<span class="hljs-keyword">if</span> (pr != <span class="hljs-keyword">null</span>) {
<span class="hljs-comment">//persistent應用,直接忽略</span>
<span class="hljs-keyword">if</span> (pr.persistent) {
<span class="hljs-keyword">return</span>;
}
<span class="hljs-keyword">int</span> bytes;
do {
<span class="hljs-comment">//獲取數據</span>
bytes = Os.read(fd, buf, <span class="hljs-number">0</span>, buf.length);
<span class="hljs-keyword">if</span> (bytes > <span class="hljs-number">0</span>) {
<span class="hljs-keyword">if</span> (buf[bytes-<span class="hljs-number">1</span>] == <span class="hljs-number">0</span>) {
<span class="hljs-comment">//到達文件EOD, 忽略該字節</span>
os.write(buf, <span class="hljs-number">0</span>, bytes-<span class="hljs-number">1</span>);
<span class="hljs-keyword">break</span>;
}
os.write(buf, <span class="hljs-number">0</span>, bytes);
}
} <span class="hljs-keyword">while</span> (bytes > <span class="hljs-number">0</span>);
<span class="hljs-keyword">synchronized</span> (mAm) {
pr.crashing = <span class="hljs-keyword">true</span>;
pr.forceCrashReport = <span class="hljs-keyword">true</span>;
}
<span class="hljs-keyword">final</span> String reportString = <span class="hljs-keyword">new</span> String(os.toByteArray(), <span class="hljs-string">"UTF-8"</span>);
<span class="hljs-comment">//異常處理native crash報告【見小節3.5】</span>
(<span class="hljs-keyword">new</span> NativeCrashReporter(pr, signal, reportString)).start();
}
}
} <span class="hljs-keyword">catch</span> (Exception e) {
Slog.e(TAG, <span class="hljs-string">"Exception dealing with report"</span>, e);
}
}
讀取debuggerd那端發送過來的數據,再通過NativeCrashReporter來把native crash事件報告給framework層。
[-> NativeCrashListener.java]
class NativeCrashReporter extends Thread {
public void run() {
try {
CrashInfo ci = new CrashInfo();
ci.exceptionClassName = "Native crash";
ci.exceptionMessage = Os.strsignal(mSignal);
ci.throwFileName = "unknown";
ci.throwClassName = "unknown";
ci.throwMethodName = "unknown";
ci.stackTrace = mCrashReport;
//AMS真正處理crash的過程
mAm.handleApplicationCrashInner("native_crash", mApp, mApp.processName, ci);
} catch (Exception e) {
Slog.e(TAG, "Unable to report native crash", e);
}
}
}
不論是Native crash還是framework crash最終都會調用到handleApplicationCrashInner()
,該方法見文章理解Android Crash處理流程。
system_server進程啟動過程中,調用startOtherServices
來啟動各種其他系統Service時,也正是這個時機會創建一個用於監聽native crash事件的NativeCrashListener對象(繼承於線程),通過socket機制來監聽,等待即debuggerd與該線程創建連接,並處理相應事件。緊接著調用handleApplicationCrashInner
來處理crash流程。
NativeCrashListener的主要工作:
點擊查看大圖
Native程序通過link連接後,當發生Native Crash時,則kernel會發送相應的signal
,當進程捕獲致命的signal
,通知debuggerd
調用ptrace
來獲取有價值的信息(這是發生在crash前)。
這個流程圖只是從整體來概要介紹native crash流程,其中有兩個部分是核心方法:
perform_dump
是整個debuggerd的核心工作,該方法內部調用engrave_tombstone
,該方法的具體工作見文章ebuggerd守護進程的功能內容,這個過程還需要與target進程通信來獲取target進程更多信息。