Android6.0系统启动流程分析

Android6.0系统启动流程分析一:init进程
阳光玻璃杯
https://blog.csdn.net/u011913612/article/details/53204253
到了Android6.0,Init进程使用c++来写了,不过没有关系,它和c写的init没有太大的区别。
Init进程的入口代码是:system\core\init\init.cpp
main函数:
int main(int argc, char** argv) {
    if (!strcmp(basename(argv[0]), "ueventd")) {
        return ueventd_main(argc, argv);
    }
    if (!strcmp(basename(argv[0]), "watchdogd")) {
        return watchdogd_main(argc, argv);
    }
    // Clear the umask.
    umask(0);
    add_environment("PATH", _PATH_DEFPATH);
    bool is_first_stage = (argc == 1) || (strcmp(argv[1], "--second-stage") != 0);
    // Get the basic filesystem setup we need put together in the initramdisk
    // on / and then we'll let the rc file figure out the rest.
    if (is_first_stage) {
        mount("tmpfs", "/dev", "tmpfs", MS_NOSUID, "mode=0755");
        mkdir("/dev/pts", 0755);
        mkdir("/dev/socket", 0755);
        mount("devpts", "/dev/pts", "devpts", 0, NULL);
        mount("proc", "/proc", "proc", 0, NULL);
        mount("sysfs", "/sys", "sysfs", 0, NULL);
    }
    // We must have some place other than / to create the device nodes for
    // kmsg and null, otherwise we won't be able to remount / read-only
    // later on. Now that tmpfs is mounted on /dev, we can actually talk
    // to the outside world.
    open_devnull_stdio();
    klog_init();
    klog_set_level(KLOG_NOTICE_LEVEL);
    NOTICE("init%s started!\n", is_first_stage ? "" : " second stage");
    if (!is_first_stage) {
        // Indicate that booting is in progress to background fw loaders, etc.
        close(open("/dev/.booting", O_WRONLY | O_CREAT | O_CLOEXEC, 0000));
        property_init();
        // If arguments are passed both on the command line and in DT,
        // properties set in DT always have priority over the command-line ones.
        process_kernel_dt();
        process_kernel_cmdline();
        // Propogate the kernel variables to internal variables
        // used by init as well as the current required properties.
        export_kernel_boot_props();
    }
    // Set up SELinux, including loading the SELinux policy if we're in the kernel domain.
    selinux_initialize(is_first_stage);
    // If we're in the kernel domain, re-exec init to transition to the init domain now
    // that the SELinux policy has been loaded.
    if (is_first_stage) {
        if (restorecon("/init") == -1) {
            ERROR("restorecon failed: %s\n", strerror(errno));
            security_failure();
        }
        char* path = argv[0];
        char* args[] = { path, const_cast<char*>("--second-stage"), nullptr };
        if (execv(path, args) == -1) {
            ERROR("execv(\"%s\") failed: %s\n", path, strerror(errno));
            security_failure();
        }
    }
    // These directories were necessarily created before initial policy load
    // and therefore need their security context restored to the proper value.
    // This must happen before /dev is populated by ueventd.
    INFO("Running restorecon...\n");
    restorecon("/dev");
    restorecon("/dev/socket");
    restorecon("/dev/__properties__");
    restorecon_recursive("/sys");
    epoll_fd = epoll_create1(EPOLL_CLOEXEC);
    if (epoll_fd == -1) {
        ERROR("epoll_create1 failed: %s\n", strerror(errno));
        exit(1);
    }
    signal_handler_init();
    property_load_boot_defaults();
    start_property_service();
    init_parse_config_file("/init.rc");
    action_for_each_trigger("early-init", action_add_queue_tail);
    // Queue an action that waits for coldboot done so we know ueventd has set up all of /dev...
    queue_builtin_action(wait_for_coldboot_done_action, "wait_for_coldboot_done");
    // ... so that we can start queuing up actions that require stuff from /dev.
    queue_builtin_action(mix_hwrng_into_linux_rng_action, "mix_hwrng_into_linux_rng");
    queue_builtin_action(keychord_init_action, "keychord_init");
    queue_builtin_action(console_init_action, "console_init");
    // Trigger all the boot actions to get us started.
    action_for_each_trigger("init", action_add_queue_tail);
    // Repeat mix_hwrng_into_linux_rng in case /dev/hw_random or /dev/random
    // wasn't ready immediately after wait_for_coldboot_done
    queue_builtin_action(mix_hwrng_into_linux_rng_action, "mix_hwrng_into_linux_rng");
    // Don't mount filesystems or start core system services in charger mode.
    char bootmode[PROP_VALUE_MAX];
    if (property_get("ro.bootmode", bootmode) > 0 && strcmp(bootmode, "charger") == 0) {
        action_for_each_trigger("charger", action_add_queue_tail);
    } else {
        action_for_each_trigger("late-init", action_add_queue_tail);
    }
    // Run all property triggers based on current state of the properties.
    queue_builtin_action(queue_property_triggers_action, "queue_property_triggers");
    while (true) {
        if (!waiting_for_exec) {
            execute_one_command();
            restart_processes();
        }
        int timeout = -1;
        if (process_needs_restart) {
            timeout = (process_needs_restart - gettime()) * 1000;
            if (timeout < 0)
                timeout = 0;
        }
        if (!action_queue_empty() || cur_action) {
            timeout = 0;
        }
        bootchart_sample(&timeout);
        epoll_event ev;
        int nr = TEMP_FAILURE_RETRY(epoll_wait(epoll_fd, &ev, 1, timeout));
        if (nr == -1) {
            ERROR("epoll_wait failed: %s\n", strerror(errno));
        } else if (nr == 1) {
            ((void (*)()) ev.data.ptr)();
        }
    }
    return 0;
}
1.这个函数是否往下执行取决于传入的参数,如果第0个参数的basename为ueventd,则执行ueventd_main(argc, argv);如果basename为watchdogd_main,则执行watchdogd_main(argc, argv);只有basename不为这二者时,才会继续往下执行。
2.如果argv[1]不为”–second-stage”或者只有一个参数的话,那么is_first_stage就为true,就会创建/dev/pts和”/dev/socket”两个设备文件节点,并挂载一个文件系统。可以看出来init进程分两个阶段,不同的阶段有不同的行为。具体的内涵鄙人还没搞明白。
3.启动属性服务。创建一个socket,并在之后的死循环中监听这个socket返回的文件描述符。
3.解析init.rc。这个过程也是我最感兴趣的,也是最重要的复杂的。
4.对各个阶段的action排序。
5.进入死循环。
6.第一次进入死循环后,action_queue里面有很多时间,因此需要不断调用execute_one_command来执行命令。此时,action_queue_empty为假,timeout 为0,init线程不会在epoll_wait方法中休眠,因为设置的timeout=0哦,这一点曾一度困扰了我。
7.所有的命令执行完后,init进程进入休眠,监听property_set_fd和signal_read_fd两个文件描述符,一点他们有事件过来,立刻被唤醒,进而做事件处理。
init.rc梳理
在我们分析init.rc的解析过程之前,我们还需要先对init.rc有个基本的认识。
先看一张我根据理解绘制的图:

从图来看,init.rc主要有section组成,section由on,import,section三个关键字标示。其中on标示的section叫做action。
import就不用说了,和c语言中的include功能有点类似。
service格式如下
service <name> <pathname> [ <argument> ]* 
   <option> 
   <option> 
   ... 
action后面会跟一个触发器,然后另起一行开始放置命令(command),格式如下:
on <trigger> 
   <command> 
   <command> 
   <command> 
跟在service后面的是option,跟在action后面的是command.command都会对应一个处理函数,定义在keywords.h中:
...
    KEYWORD(loglevel,    COMMAND, 1, do_loglevel)
    KEYWORD(mkdir,       COMMAND, 1, do_mkdir)
    KEYWORD(mount_all,   COMMAND, 1, do_mount_all)
    KEYWORD(mount,       COMMAND, 3, do_mount)
    ...
命名也是很有规则的。比如mkdir,对应的函数就是do_mkdir。我们看看do_mkdir做了什么:
int do_mkdir(int nargs, char **args)
{
    mode_t mode = 0755;
    int ret;
    /* mkdir <path> [mode] [owner] [group] */
    if (nargs >= 3) {
        mode = strtoul(args[2], 0, 8);
    }
    ret = make_dir(args[1], mode);
    /* chmod in case the directory already exists */
    if (ret == -1 && errno == EEXIST) {
        ret = fchmodat(AT_FDCWD, args[1], mode, AT_SYMLINK_NOFOLLOW);
    }
    if (ret == -1) {
        return -errno;
    }
    if (nargs >= 4) {
        uid_t uid = decode_uid(args[3]);
        gid_t gid = -1;
        if (nargs == 5) {
            gid = decode_uid(args[4]);
        }
        if (lchown(args[1], uid, gid) == -1) {
            return -errno;
        }
        /* chown may have cleared S_ISUID and S_ISGID, chmod again */
        if (mode & (S_ISUID | S_ISGID)) {
            ret = fchmodat(AT_FDCWD, args[1], mode, AT_SYMLINK_NOFOLLOW);
            if (ret == -1) {
                return -errno;
            }
        }
    }
    return e4crypt_set_directory_policy(args[1]);
}
其实就是调用了make_dir并做了一些权限等方面的操作。所以,跟在action后面的命令并不能随随便便乱加,而是要确保这个命令被定义了,不然就会出错。
init.rc的解析过程(以import为例)
因为init.rc的第一行代码就是Import语句。万事开头难,只要我们理清了第一行的解析过程,后面行的解析分析起来就不怎么费劲了。所以下面我们主要看看init.rc中第一行的解析过程。
init.tc的解析函数为:init_parse_config_file
int init_parse_config_file(const char* path) {
    INFO("Parsing %s...\n", path);
    Timer t;
    std::string data;
    if (!read_file(path, &data)) {
        return -1;
    }
    data.push_back('\n'); // TODO: fix parse_config.
    parse_config(path, data);
    dump_parser_state();
    // MStar Android Patch Begin
    INFO("(Parsing %s took %.2fs.)\n", path, t.duration());
    // MStar Android Patch End
    return 0;
}
这个函数把/init.rc中的内容读出来,并让data这个string类型的变量指向它。
把读出来的data传递给parse_config函数做真正的解析工作。parse_config函数如下:
static void parse_config(const char *fn, const std::string& data)
{
    char *args[UEVENTD_PARSER_MAXARGS];
    int nargs = 0;
    parse_state state;
    state.filename = fn;
    state.line = 1;
    state.ptr = strdup(data.c_str());  // TODO: fix this code!
    state.nexttoken = 0;
    state.parse_line = parse_line_no_op;
    for (;;) {
        int token = next_token(&state);
        switch (token) {
        case T_EOF:
            parse_line(&state, args, nargs);
            return;
        case T_NEWLINE:
            if (nargs) {
                parse_line(&state, args, nargs);
                nargs = 0;
            }
            state.line++;
            break;
        case T_TEXT:
            if (nargs < UEVENTD_PARSER_MAXARGS) {
                args[nargs++] = state.text;
            }
            break;
        }
    }
}
我看到这个函数的时候,我想起了xml解析方法之一的pull解析,感觉挺像的。每次循环都会找到一个token,token就是一个特定的符号,然后根据这个toke做不同的处理。这里使用到了parse_state结构,启动以如下:
struct parse_state
{
    char *ptr;
    char *text;
    int line;
    int nexttoken;
    void *context;
    void (*parse_line)(struct parse_state *state, int nargs, char **args);
    const char *filename;
    void *priv;
};
这个就够中:ptr执行init.rc字符流的,text后面会用到,用来保存参数,line当然就是行数了,nexttoken保存下一个token,filename保存init.rc的文件描述符,filename当然是/init.rc了.parse_line是一个函数指针。context暂时没明白…state.priv 指向Import的一个文件链表。
我们打开Init.rc看看,从头分析它的解析过程。
import /init.environ.rc
import /init.usb.rc
import /init.${ro.hardware}.rc
import /init.${ro.zygote}.rc
import /init.trace.rc
...
init.rc前面几行都是import语句,我们看看一开始的解析流程。
这个时候,parse_satate的状态为:
    state.filename = fn;
    state.line = 1;
    state.ptr = strdup(data.c_str());  // TODO: fix this code!
    state.nexttoken = 0;
    state.parse_line = parse_line_no_op;
        list_init(&import_list);
    state.priv = &import_list;
step 1.第一次循环
然后进入死循环,第一次调用next_token函数:
int next_token(struct parse_state *state)
{
    char *x = state->ptr;
    char *s;
    if (state->nexttoken) {
        int t = state->nexttoken;
        state->nexttoken = 0;
        return t;
    }
    for (;;) {
        switch (*x) {
        case 0:
            state->ptr = x;
            return T_EOF;
        case '\n':
            x++;
            state->ptr = x;
            return T_NEWLINE;
        case ' ':
        case '\t':
        case '\r':
            x++;
            continue;
        case '#':
            while (*x && (*x != '\n')) x++;
            if (*x == '\n') {
                state->ptr = x+1;
                return T_NEWLINE;
            } else {
                state->ptr = x;
                return T_EOF;
            }
        default:
            goto text;
        }
    }
textdone:
    state->ptr = x;
    *s = 0;
    return T_TEXT;
text:
    state->text = s = x;
textresume:
    for (;;) {
        switch (*x) {
        case 0:
            goto textdone;
        case ' ':
        case '\t':
        case '\r':
            x++;
            goto textdone;
        case '\n':
            state->nexttoken = T_NEWLINE;
            x++;
            goto textdone;
        case '"':
            x++;
            for (;;) {
                switch (*x) {
                case 0:
                        /* unterminated quoted thing */
                    state->ptr = x;
                    return T_EOF;
                case '"':
                    x++;
                    goto textresume;
                default:
                    *s++ = *x++;
                }
            }
            break;
        case '\\':
            x++;
            switch (*x) {
            case 0:
                goto textdone;
            case 'n':
                *s++ = '\n';
                break;
            case 'r':
                *s++ = '\r';
                break;
            case 't':
                *s++ = '\t';
                break;
            case '\\':
                *s++ = '\\';
                break;
            case '\r':
                    /* \ <cr> <lf> -> line continuation */
                if (x[1] != '\n') {
                    x++;
                    continue;
                }
            case '\n':
                    /* \ <lf> -> line continuation */
                state->line++;
                x++;
                    /* eat any extra whitespace */
                while((*x == ' ') || (*x == '\t')) x++;
                continue;
            default:
                    /* unknown escape -- just copy */
                *s++ = *x++;
            }
            continue;
        default:
            *s++ = *x++;
        }
    }
    return T_EOF;
}
这时候,init.rc中的第一个符号应该是i(impor,省去空格),所以next_token直接进入到text:标签执行,执行的结果是state->text = s = ‘i’;然后继续执行textresume:标签后面的内容:
标签后面的for死循环中,发现第一个字符是i,于是执行default分支:*s++ = *x++;这样直到import的t被检测完以后,在下一次循环变得到一个空格,于是执行x++,并goto textdone.。textdown执行完后函数返回,返回后,state->ptr 指向’/’符号 。*s = 0;意味着state.text就是字符串“import”,因为0就是字符串结束符了。注意返回值为T_TEXT。这个时候执行parse_config函数中的case T_TEXT:分支。
        case T_TEXT:
            if (nargs < INIT_PARSER_MAXARGS) {
                args[nargs++] = state.text;
            }
这个时候,nargs为0,state.text位import,于是args数组的第0项就存了”import”字符串了。然后nargs++,也就是等于1了。然后进入下次循环。
step 2.第二次循环
第二次循环再次调用next_token函数,这次state->ptr=’\’,这我们分析过了。因此next_token函数不断执行defaulty分支,最终state.text = “/init.environ.rc”,返回类型还是T_TEXT。于是和之前一样,args[1]=”/init.environ.rc”,nargs=2。
step 3.第三次循环
这个时候一行结束,parse_config函数进入case T_NEWLINE:分支。
这个分支中,首先执行lookup_keyword函数,从名字来看是查找关键字。肯定就是import了,它肯定是关键字。不信请看代码:
static int lookup_keyword(const char *s)
{
    switch (*s++) {
    case 'b':
        if (!strcmp(s, "ootchart_init")) return K_bootchart_init;
        break;
    case 'c':
        if (!strcmp(s, "opy")) return K_copy;
        if (!strcmp(s, "lass")) return K_class;
        if (!strcmp(s, "lass_start")) return K_class_start;
        if (!strcmp(s, "lass_stop")) return K_class_stop;
        if (!strcmp(s, "lass_reset")) return K_class_reset;
        if (!strcmp(s, "onsole")) return K_console;
        if (!strcmp(s, "hown")) return K_chown;
        if (!strcmp(s, "hmod")) return K_chmod;
        if (!strcmp(s, "ritical")) return K_critical;
        break;
    case 'd':
        if (!strcmp(s, "isabled")) return K_disabled;
        if (!strcmp(s, "omainname")) return K_domainname;
        break;
    case 'e':
        if (!strcmp(s, "nable")) return K_enable;
        if (!strcmp(s, "xec")) return K_exec;
        if (!strcmp(s, "xport")) return K_export;
        break;
    case 'g':
        if (!strcmp(s, "roup")) return K_group;
        break;
    case 'h':
        if (!strcmp(s, "ostname")) return K_hostname;
        break;
    case 'i':
        if (!strcmp(s, "oprio")) return K_ioprio;
        if (!strcmp(s, "fup")) return K_ifup;
        if (!strcmp(s, "nsmod")) return K_insmod;
        if (!strcmp(s, "mport")) return K_import;
        if (!strcmp(s, "nstallkey")) return K_installkey;
        break;
    case 'k':
        if (!strcmp(s, "eycodes")) return K_keycodes;
        break;
    case 'l':
        if (!strcmp(s, "oglevel")) return K_loglevel;
        if (!strcmp(s, "oad_persist_props")) return K_load_persist_props;
        if (!strcmp(s, "oad_all_props")) return K_load_all_props;
        break;
    case 'm':
        if (!strcmp(s, "kdir")) return K_mkdir;
        if (!strcmp(s, "ount_all")) return K_mount_all;
        if (!strcmp(s, "ount")) return K_mount;
        break;
    case 'o':
        if (!strcmp(s, "n")) return K_on;
        if (!strcmp(s, "neshot")) return K_oneshot;
        if (!strcmp(s, "nrestart")) return K_onrestart;
        break;
    case 'p':
        if (!strcmp(s, "owerctl")) return K_powerctl;
        break;
    case 'r':
        if (!strcmp(s, "estart")) return K_restart;
        if (!strcmp(s, "estorecon")) return K_restorecon;
        if (!strcmp(s, "estorecon_recursive")) return K_restorecon_recursive;
        if (!strcmp(s, "mdir")) return K_rmdir;
        if (!strcmp(s, "m")) return K_rm;
        break;
    case 's':
        if (!strcmp(s, "eclabel")) return K_seclabel;
        if (!strcmp(s, "ervice")) return K_service;
        if (!strcmp(s, "etenv")) return K_setenv;
        if (!strcmp(s, "etprop")) return K_setprop;
        if (!strcmp(s, "etrlimit")) return K_setrlimit;
        if (!strcmp(s, "ocket")) return K_socket;
        if (!strcmp(s, "tart")) return K_start;
        if (!strcmp(s, "top")) return K_stop;
        if (!strcmp(s, "wapon_all")) return K_swapon_all;
        if (!strcmp(s, "ymlink")) return K_symlink;
        if (!strcmp(s, "ysclktz")) return K_sysclktz;
        break;
    case 't':
        if (!strcmp(s, "rigger")) return K_trigger;
        break;
    case 'u':
        if (!strcmp(s, "ser")) return K_user;
        break;
    case 'v':
        if (!strcmp(s, "erity_load_state")) return K_verity_load_state;
        if (!strcmp(s, "erity_update_state")) return K_verity_update_state;
        break;
    case 'w':
        if (!strcmp(s, "rite")) return K_write;
        if (!strcmp(s, "ritepid")) return K_writepid;
        if (!strcmp(s, "ait")) return K_wait;
        break;
    }
    return K_UNKNOWN;
}
调用这个函数的时候,我们传入的参数args[0]=”import”.显而易见该函数返回K_import。它是一个整数。返回以后使用kw_is函数看他是不是一个Section。当然是一个section了,import也是一个section。不信看代码:
#define kw_is(kw, type) (keyword_info[kw].flags & (type))
1
keyword_info定义在system/core/init/keywords.h中:
    ...
    KEYWORD(group,       OPTION,  0, 0)
    KEYWORD(hostname,    COMMAND, 1, do_hostname)
    KEYWORD(ifup,        COMMAND, 1, do_ifup)
    KEYWORD(import,      SECTION, 1, 0)
    ...
截取含有import的一部分代码,后面SECTION已经表明它是个Section了。KEYWORD自后一个参数是这个关键字对应的处理函数。比如这其中的hostname。如果你在init.rc中使用hostname 关键字,那么最终会调用do_hostname函数来处理。
既然import是一个section。那么parce_config就会调用state.parse_line函数,这里是一个函数指针,其实调用的是parse_line_no_op,不记得回去看下state的初始就知道了。
static void parse_line_no_op(struct parse_state*, int, char**) {
}
这个函数是空的。接下来调用parse_new_section函数:
static void parse_new_section(struct parse_state *state, int kw,
                       int nargs, char **args)
{
    printf("[ %s %s ]\n", args[0],
           nargs > 1 ? args[1] : "");
    switch(kw) {
    case K_service:
        state->context = parse_service(state, nargs, args);
        if (state->context) {
            state->parse_line = parse_line_service;
            return;
        }
        break;
    case K_on:
        state->context = parse_action(state, nargs, args);
        if (state->context) {
            state->parse_line = parse_line_action;
            return;
        }
        break;
    case K_import:
        parse_import(state, nargs, args);
        break;
    }
    state->parse_line = parse_line_no_op;
}
我们当然是执行case K_import:分支了,想都不用想。所以接下来执行parse_import方法:
static void parse_import(struct parse_state *state, int nargs, char **args)
{
    struct listnode *import_list = (listnode*) state->priv;
    char conf_file[PATH_MAX];
    int ret;
    if (nargs != 2) {
        ERROR("single argument needed for import\n");
        return;
    }
    ret = expand_props(conf_file, args[1], sizeof(conf_file));
    if (ret) {
        ERROR("error while handling import on line '%d' in '%s'\n",
              state->line, state->filename);
        return;
    }
    struct import* import = (struct import*) calloc(1, sizeof(struct import));
    import->filename = strdup(conf_file);
    list_add_tail(import_list, &import->list);
    INFO("Added '%s' to import list\n", import->filename);
}
这个函数首先使用expand_props方法对args[1]也就是“/init.environ.rc”做进一步处理。这个函数如下:
int expand_props(char *dst, const char *src, int dst_size)
{
    char *dst_ptr = dst;
    const char *src_ptr = src;
    int ret = 0;
    int left = dst_size - 1;
    if (!src || !dst || dst_size == 0)
        return -1;
    /* - variables can either be $x.y or ${x.y}, in case they are only part
     *   of the string.
     * - will accept $$ as a literal $.
     * - no nested property expansion, i.e. ${foo.${bar}} is not supported,
     *   bad things will happen
     */
    while (*src_ptr && left > 0) {
        char *c;
        char prop[PROP_NAME_MAX + 1];
        char prop_val[PROP_VALUE_MAX];
        int prop_len = 0;
        int prop_val_len;
        c = strchr(src_ptr, '$');
        if (!c) {
            while (left-- > 0 && *src_ptr)
                *(dst_ptr++) = *(src_ptr++);
            break;
        }
        ...
可以看出,这个函数的作用是拓展args[1].这里不需要拓展,因为我们的args[1]=”/init.environ.rc”没有$符号,所以直接就跳出循环了。这里应该是对那些有包含变量的字符串,把变量的内容展开。
然后构建了一个import结构体。import中的filename项赋值为”/init.environ.rc”.并把它加入到import_list链表中。
import定义如下:
struct import {
    struct listnode list;
    const char *filename;
};
这样,第一行就分析完了,从而我们也彻底明白了怎么解析一个import 的section。
只要能看懂一个,其他的就简单了。因为,他们都是类似的。
service的解析与启动
service的解析
和import解析过程类似,遇到service关键字后,service关键字和后面的参数会保存在args[]数组中。然后通过对args[0]提取关键字,发现args[0]=”service”,于是开始执行parse_new_section函数。此时这个函数必然会进入 case K_service:分支执行:
    case K_service:
        state->context = parse_service(state, nargs, args);
        if (state->context) {
            state->parse_line = parse_line_service;
            return;
        }
        break;
这里做了两件事情非常重要,一件是调用parse_service解析service这个section。另一件事情是给state->parse_line赋值为parse_line_service。也就是service 关键字所在的行后面的那些options行都是使用parse_line_service函数来解析的。我们从parse_service看起:
static void *parse_service(struct parse_state *state, int nargs, char **args)
{
    if (nargs < 3) {
        parse_error(state, "services must have a name and a program\n");
        return 0;
    }
    if (!valid_name(args[1])) {
        parse_error(state, "invalid service name '%s'\n", args[1]);
        return 0;
    }
    service* svc = (service*) service_find_by_name(args[1]);
    if (svc) {
        parse_error(state, "ignored duplicate definition of service '%s'\n", args[1]);
        return 0;
    }
    nargs -= 2;
    svc = (service*) calloc(1, sizeof(*svc) + sizeof(char*) * nargs);
    if (!svc) {
        parse_error(state, "out of memory\n");
        return 0;
    }
    svc->name = strdup(args[1]);
    svc->classname = "default";
    memcpy(svc->args, args + 2, sizeof(char*) * nargs);
    trigger* cur_trigger = (trigger*) calloc(1, sizeof(*cur_trigger));
    svc->args[nargs] = 0;
    svc->nargs = nargs;
    list_init(&svc->onrestart.triggers);
    cur_trigger->name = "onrestart";
    list_add_tail(&svc->onrestart.triggers, &cur_trigger->nlist);
    list_init(&svc->onrestart.commands);
    list_add_tail(&service_list, &svc->slist);
    return svc;
}
可以看到和import做的事情差不多。import解析的最后,会创建一个import结构体,并把它添加到import_list双向链表中。service解析从这里看,也是构建一个service结构体,然后把service结构体添加到service_list链表中。
我们看下service结构体:
struct service {
    void NotifyStateChange(const char* new_state);
        /* list of all services */
    struct listnode slist;
    char *name;
    const char *classname;
    unsigned flags;
    pid_t pid;
    time_t time_started;    /* time of last start */
    time_t time_crashed;    /* first crash within inspection window */
    int nr_crashed;         /* number of times crashed within window */
    uid_t uid;
    gid_t gid;
    gid_t supp_gids[NR_SVC_SUPP_GIDS];
    size_t nr_supp_gids;
    const char* seclabel;
    struct socketinfo *sockets;
    struct svcenvinfo *envvars;
    struct action onrestart;  /* Actions to execute on restart. */
    std::vector<std::string>* writepid_files_;
    /* keycodes for triggering this service via /dev/keychord */
    int *keycodes;
    int nkeycodes;
    int keychord_id;
    IoSchedClass ioprio_class;
    int ioprio_pri;
    int nargs;
    /* "MUST BE AT THE END OF THE STRUCT" */
    char *args[1];
}; /*    
socketinfo 用来保存socket option的相关信息。
classname 给service定义一个类名,如果多个service使用相同的类型,可以方便进行批量操作。
nargs 保存参数的个数。
很多字段不理解,没关系,我们看看parse_service函数给service做了那些初始化:
1. svc->name = strdup(args[1]);名字就是service 关键字后面的第一个参数
2. svc->classname = “default”; 类别名是default
3. memcpy(svc->args, args + 2, sizeof(char*) * nargs); svc->args[nargs] = 0;
把所有参数保存在args数组中,并把最有一个成员只为0。
4. svc->nargs = nargs; nargs保存参数个数
5. trigger* cur_trigger = (trigger*) calloc(1, sizeof(*cur_trigger));
list_init(&svc->onrestart.triggers);
cur_trigger->name = “onrestart”;
list_add_tail(&svc->onrestart.triggers, &cur_trigger->nlist);
构建一个触发器,并把它添加到service中的onrestart.triger列表中。
因此,我们可以知道么一个service都会有一个onrestart的action,这个action有一个触发器。这个action用来重启service。
解析完service后,就会解析service后面的option了,这个时候会调用parse_line_service。
static void parse_line_service(struct parse_state *state, int nargs, char **args)
{
    struct service *svc = (service*) state->context;
    struct command *cmd;
    int i, kw, kw_nargs;
    if (nargs == 0) {
        return;
    }
    svc->ioprio_class = IoSchedClass_NONE;
    kw = lookup_keyword(args[0]);
    switch (kw) {
    case K_class:
        if (nargs != 2) {
            parse_error(state, "class option requires a classname\n");
        } else {
            svc->classname = args[1];
        }
        break;
    case K_console:
        svc->flags |= SVC_CONSOLE;
        break;
    case K_disabled:
        svc->flags |= SVC_DISABLED;
        svc->flags |= SVC_RC_DISABLED;
        break;
    case K_ioprio:
        if (nargs != 3) {
            parse_error(state, "ioprio optin usage: ioprio <rt|be|idle> <ioprio 0-7>\n");
        } else {
            svc->ioprio_pri = strtoul(args[2], 0, 8);
            if (svc->ioprio_pri < 0 || svc->ioprio_pri > 7) {
                parse_error(state, "priority value must be range 0 - 7\n");
                break;
            }
            if (!strcmp(args[1], "rt")) {
                svc->ioprio_class = IoSchedClass_RT;
            } else if (!strcmp(args[1], "be")) {
                svc->ioprio_class = IoSchedClass_BE;
            } else if (!strcmp(args[1], "idle")) {
                svc->ioprio_class = IoSchedClass_IDLE;
            } else {
                parse_error(state, "ioprio option usage: ioprio <rt|be|idle> <0-7>\n");
            }
        }
        break;
    case K_group:
        if (nargs < 2) {
            parse_error(state, "group option requires a group id\n");
        } else if (nargs > NR_SVC_SUPP_GIDS + 2) {
            parse_error(state, "group option accepts at most %d supp. groups\n",
                        NR_SVC_SUPP_GIDS);
        } else {
            int n;
            svc->gid = decode_uid(args[1]);
            for (n = 2; n < nargs; n++) {
                svc->supp_gids[n-2] = decode_uid(args[n]);
            }
            svc->nr_supp_gids = n - 2;
        }
        break;
    case K_keycodes:
        if (nargs < 2) {
            parse_error(state, "keycodes option requires atleast one keycode\n");
        } else {
            svc->keycodes = (int*) malloc((nargs - 1) * sizeof(svc->keycodes[0]));
            if (!svc->keycodes) {
                parse_error(state, "could not allocate keycodes\n");
            } else {
                svc->nkeycodes = nargs - 1;
                for (i = 1; i < nargs; i++) {
                    svc->keycodes[i - 1] = atoi(args[i]);
                }
            }
        }
        break;
    case K_oneshot:
        svc->flags |= SVC_ONESHOT;
        break;
    case K_onrestart:
        nargs--;
        args++;
        kw = lookup_keyword(args[0]);
        if (!kw_is(kw, COMMAND)) {
            parse_error(state, "invalid command '%s'\n", args[0]);
            break;
        }
        kw_nargs = kw_nargs(kw);
        if (nargs < kw_nargs) {
            parse_error(state, "%s requires %d %s\n", args[0], kw_nargs - 1,
                kw_nargs > 2 ? "arguments" : "argument");
            break;
        }
        cmd = (command*) malloc(sizeof(*cmd) + sizeof(char*) * nargs);
        cmd->func = kw_func(kw);
        cmd->nargs = nargs;
        memcpy(cmd->args, args, sizeof(char*) * nargs);
        list_add_tail(&svc->onrestart.commands, &cmd->clist);
        break;
    case K_critical:
        svc->flags |= SVC_CRITICAL;
        break;
    case K_setenv: { /* name value */
        if (nargs < 3) {
            parse_error(state, "setenv option requires name and value arguments\n");
            break;
        }
        svcenvinfo* ei = (svcenvinfo*) calloc(1, sizeof(*ei));
        if (!ei) {
            parse_error(state, "out of memory\n");
            break;
        }
        ei->name = args[1];
        ei->value = args[2];
        ei->next = svc->envvars;
        svc->envvars = ei;
        break;
    }
    case K_socket: {/* name type perm [ uid gid context ] */
        if (nargs < 4) {
            parse_error(state, "socket option requires name, type, perm arguments\n");
            break;
        }
        if (strcmp(args[2],"dgram") && strcmp(args[2],"stream")
                && strcmp(args[2],"seqpacket")) {
            parse_error(state, "socket type must be 'dgram', 'stream' or 'seqpacket'\n");
            break;
        }
        socketinfo* si = (socketinfo*) calloc(1, sizeof(*si));
        if (!si) {
            parse_error(state, "out of memory\n");
            break;
        }
        si->name = args[1];
        si->type = args[2];
        si->perm = strtoul(args[3], 0, 8);
        if (nargs > 4)
            si->uid = decode_uid(args[4]);
        if (nargs > 5)
            si->gid = decode_uid(args[5]);
        if (nargs > 6)
            si->socketcon = args[6];
        si->next = svc->sockets;
        svc->sockets = si;
        break;
    }
    case K_user:
        if (nargs != 2) {
            parse_error(state, "user option requires a user id\n");
        } else {
            svc->uid = decode_uid(args[1]);
        }
        break;
    case K_seclabel:
        if (nargs != 2) {
            parse_error(state, "seclabel option requires a label string\n");
        } else {
            svc->seclabel = args[1];
        }
        break;
    case K_writepid:
        if (nargs < 2) {
            parse_error(state, "writepid option requires at least one filename\n");
            break;
        }
        svc->writepid_files_ = new std::vector<std::string>;
        for (int i = 1; i < nargs; ++i) {
            svc->writepid_files_->push_back(args[i]);
        }
        break;
    default:
        parse_error(state, "invalid option '%s'\n", args[0]);
    }
}
这个函数中定义了所有的option,每一个option处理方法都不相同,大家遇到感兴趣的option可以自行分析。
service解析完成后,是时候看看service的启动了。
service的启动
service解析完成以后,有了一个service_list的链表,可以service是在是么地方启动的呢?当然是在action中了,action中有个命名叫start,它对应的处理函数是do_start,着我们在前面已经说过了,do_start函数如下:
int do_start(int nargs, char **args)
{
    struct service *svc;
    svc = service_find_by_name(args[1]);
    if (svc) {
        service_start(svc, NULL);
    }
    return 0;
}
这个函数非常见到,找到服务,启动它。
查找service的过程:
struct service *service_find_by_name(const char *name)
{
    struct listnode *node;
    struct service *svc;
    list_for_each(node, &service_list) {
        svc = node_to_item(node, struct service, slist);
        if (!strcmp(svc->name, name)) {
            return svc;
        }
    }
    return 0;
}
遍历service_list,对比名字,相同就返回。
启动service过程:
void service_start(struct service *svc, const char *dynamic_args)
{
    // Starting a service removes it from the disabled or reset state and
    // immediately takes it out of the restarting state if it was in there.
    svc->flags &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET|SVC_RESTART|SVC_DISABLED_START));
    svc->time_started = 0;
    // Running processes require no additional work --- if they're in the
    // process of exiting, we've ensured that they will immediately restart
    // on exit, unless they are ONESHOT.
    if (svc->flags & SVC_RUNNING) {
        return;
    }
    bool needs_console = (svc->flags & SVC_CONSOLE);
    if (needs_console && !have_console) {
        ERROR("service '%s' requires console\n", svc->name);
        svc->flags |= SVC_DISABLED;
        return;
    }
    struct stat s;
    if (stat(svc->args[0], &s) != 0) {
        ERROR("cannot find '%s', disabling '%s'\n", svc->args[0], svc->name);
        svc->flags |= SVC_DISABLED;
        return;
    }
    if ((!(svc->flags & SVC_ONESHOT)) && dynamic_args) {
        ERROR("service '%s' must be one-shot to use dynamic args, disabling\n",
               svc->args[0]);
        svc->flags |= SVC_DISABLED;
        return;
    }
    char* scon = NULL;
    if (is_selinux_enabled() > 0) {
        if (svc->seclabel) {
            scon = strdup(svc->seclabel);
            if (!scon) {
                ERROR("Out of memory while starting '%s'\n", svc->name);
                return;
            }
        } else {
            char *mycon = NULL, *fcon = NULL;
            INFO("computing context for service '%s'\n", svc->args[0]);
            int rc = getcon(&mycon);
            if (rc < 0) {
                ERROR("could not get context while starting '%s'\n", svc->name);
                return;
            }
            rc = getfilecon(svc->args[0], &fcon);
            if (rc < 0) {
                ERROR("could not get context while starting '%s'\n", svc->name);
                freecon(mycon);
                return;
            }
            rc = security_compute_create(mycon, fcon, string_to_security_class("process"), &scon);
            if (rc == 0 && !strcmp(scon, mycon)) {
                ERROR("Warning!  Service %s needs a SELinux domain defined; please fix!\n", svc->name);
            }
            freecon(mycon);
            freecon(fcon);
            if (rc < 0) {
                ERROR("could not get context while starting '%s'\n", svc->name);
                return;
            }
        }
    }
    NOTICE("Starting service '%s'...\n", svc->name);
    pid_t pid = fork();
    if (pid == 0) {
        struct socketinfo *si;
        struct svcenvinfo *ei;
        char tmp[32];
        int fd, sz;
        umask(077);
        if (properties_initialized()) {
            get_property_workspace(&fd, &sz);
            snprintf(tmp, sizeof(tmp), "%d,%d", dup(fd), sz);
            add_environment("ANDROID_PROPERTY_WORKSPACE", tmp);
        }
        for (ei = svc->envvars; ei; ei = ei->next)
            add_environment(ei->name, ei->value);
        for (si = svc->sockets; si; si = si->next) {
            int socket_type = (
                    !strcmp(si->type, "stream") ? SOCK_STREAM :
                        (!strcmp(si->type, "dgram") ? SOCK_DGRAM : SOCK_SEQPACKET));
            int s = create_socket(si->name, socket_type,
                                  si->perm, si->uid, si->gid, si->socketcon ?: scon);
            if (s >= 0) {
                publish_socket(si->name, s);
            }
        }
        freecon(scon);
        scon = NULL;
        if (svc->writepid_files_) {
            std::string pid_str = android::base::StringPrintf("%d", pid);
            for (auto& file : *svc->writepid_files_) {
                if (!android::base::WriteStringToFile(pid_str, file)) {
                    ERROR("couldn't write %s to %s: %s\n",
                          pid_str.c_str(), file.c_str(), strerror(errno));
                }
            }
        }
        if (svc->ioprio_class != IoSchedClass_NONE) {
            if (android_set_ioprio(getpid(), svc->ioprio_class, svc->ioprio_pri)) {
                ERROR("Failed to set pid %d ioprio = %d,%d: %s\n",
                      getpid(), svc->ioprio_class, svc->ioprio_pri, strerror(errno));
            }
        }
        if (needs_console) {
            setsid();
            open_console();
        } else {
            zap_stdio();
        }
        if (false) {
            for (size_t n = 0; svc->args[n]; n++) {
                INFO("args[%zu] = '%s'\n", n, svc->args[n]);
            }
            for (size_t n = 0; ENV[n]; n++) {
                INFO("env[%zu] = '%s'\n", n, ENV[n]);
            }
        }
        setpgid(0, getpid());
        // As requested, set our gid, supplemental gids, and uid.
        if (svc->gid) {
            if (setgid(svc->gid) != 0) {
                ERROR("setgid failed: %s\n", strerror(errno));
                _exit(127);
            }
        }
        if (svc->nr_supp_gids) {
            if (setgroups(svc->nr_supp_gids, svc->supp_gids) != 0) {
                ERROR("setgroups failed: %s\n", strerror(errno));
                _exit(127);
            }
        }
        if (svc->uid) {
            if (setuid(svc->uid) != 0) {
                ERROR("setuid failed: %s\n", strerror(errno));
                _exit(127);
            }
        }
        if (svc->seclabel) {
            if (is_selinux_enabled() > 0 && setexeccon(svc->seclabel) < 0) {
                ERROR("cannot setexeccon('%s'): %s\n", svc->seclabel, strerror(errno));
                _exit(127);
            }
        }
        if (!dynamic_args) {
            if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0) {
                ERROR("cannot execve('%s'): %s\n", svc->args[0], strerror(errno));
            }
        } else {
            char *arg_ptrs[INIT_PARSER_MAXARGS+1];
            int arg_idx = svc->nargs;
            char *tmp = strdup(dynamic_args);
            char *next = tmp;
            char *bword;
            /* Copy the static arguments */
            memcpy(arg_ptrs, svc->args, (svc->nargs * sizeof(char *)));
            while((bword = strsep(&next, " "))) {
                arg_ptrs[arg_idx++] = bword;
                if (arg_idx == INIT_PARSER_MAXARGS)
                    break;
            }
            arg_ptrs[arg_idx] = NULL;
            execve(svc->args[0], (char**) arg_ptrs, (char**) ENV);
        }
        _exit(127);
    }
    freecon(scon);
    if (pid < 0) {
        ERROR("failed to start '%s'\n", svc->name);
        svc->pid = 0;
        return;
    }
    svc->time_started = gettime();
    svc->pid = pid;
    svc->flags |= SVC_RUNNING;
    if ((svc->flags & SVC_EXEC) != 0) {
        INFO("SVC_EXEC pid %d (uid %d gid %d+%zu context %s) started; waiting...\n",
             svc->pid, svc->uid, svc->gid, svc->nr_supp_gids,
             svc->seclabel ? : "default");
        waiting_for_exec = true;
    }
    svc->NotifyStateChange("running");
}
这个函数虽然长,但总结起来无非就做了这些事情:
1.解析参数
2.fork一个进程。
3.初始化子进程,主要是根据service结构体中的信息创建一个写东西。比如,根据sockets创建socket等。
4.执行execve,也就是加载可执行文件了。
也就是说,当init.rc等rc配置脚本解析完成后,开始执行action中的命令,并通过start命令来启动service。
action的解析与命令的执行
action的解析
对比service的解析来看,action的解析应该是调用parse_action函数:
static void *parse_action(struct parse_state *state, int nargs, char **args)
{
    struct trigger *cur_trigger;
    int i;
    if (nargs < 2) {
        parse_error(state, "actions must have a trigger\n");
        return 0;
    }
    action* act = (action*) calloc(1, sizeof(*act));
    list_init(&act->triggers);
    for (i = 1; i < nargs; i++) {
        if (!(i % 2)) {
            if (strcmp(args[i], "&&")) {
                struct listnode *node;
                struct listnode *node2;
                parse_error(state, "& is the only symbol allowed to concatenate actions\n");
                list_for_each_safe(node, node2, &act->triggers) {
                    struct trigger *trigger = node_to_item(node, struct trigger, nlist);
                    free(trigger);
                }
                free(act);
                return 0;
            } else
                continue;
        }
        cur_trigger = (trigger*) calloc(1, sizeof(*cur_trigger));
        cur_trigger->name = args[i];
        list_add_tail(&act->triggers, &cur_trigger->nlist);
    }
    list_init(&act->commands);
    list_init(&act->qlist);
    list_add_tail(&action_list, &act->alist);
        /* XXX add to hash */
    return act;
}
action中命令的执行
可以看到所有的section解析都是类似的,构建一个结构体并添加到对应的链表中。
这里就不继续展开分析了。我们回到init.cpp的main函数中,看看action_list中存放的action是如何被执行的。
在解析init.rc结束后,会有如下函数:
 action_for_each_trigger("early-init", action_add_queue_tail);
 action_for_each_trigger("init", action_add_queue_tail);
 action_for_each_trigger("charger", action_add_queue_tail);
 action_for_each_trigger("late-init", action_add_queue_tail);
action_for_each_trigger函数如下:
void action_for_each_trigger(const char *trigger,
                             void (*func)(struct action *act))
{
    struct listnode *node, *node2;
    struct action *act;
    struct trigger *cur_trigger;
    list_for_each(node, &action_list) {
        act = node_to_item(node, struct action, alist);
        list_for_each(node2, &act->triggers) {
            cur_trigger = node_to_item(node2, struct trigger, nlist);
            if (!strcmp(cur_trigger->name, trigger)) {
                func(act);
            }
        }
    }
}
也就是说,这个函数的作用是遍历action_list链表,找到对应名字的触发器,然后盗用传入的func函数,也就是action_add_queue_tail函数,这个函数如下:
void action_add_queue_tail(struct action *act)
{
    if (list_empty(&act->qlist)) {
        list_add_tail(&action_queue, &act->qlist);
    }
}
再把action添加到action_queuw中。这里是不是可以理解为给action排队呢?按照action的名字(early-init,init…)把action排好顺序。这个时候还是没有执行action中的命令。
继续往下看,看到execute_one_command函数,从名字来看是执行一个命令,我们看看是怎么执行的,execute_one_command函数如下:
void execute_one_command() {
    Timer t;
    char cmd_str[256] = "";
    char name_str[256] = "";
    if (!cur_action || !cur_command || is_last_command(cur_action, cur_command)) {
        cur_action = action_remove_queue_head();
        cur_command = NULL;
        if (!cur_action) {
            return;
        }
        build_triggers_string(name_str, sizeof(name_str), cur_action);
        INFO("processing action %p (%s)\n", cur_action, name_str);
        cur_command = get_first_command(cur_action);
    } else {
        cur_command = get_next_command(cur_action, cur_command);
    }
    if (!cur_command) {
        return;
    }
    int result = cur_command->func(cur_command->nargs, cur_command->args);
    if (klog_get_level() >= KLOG_INFO_LEVEL) {
        for (int i = 0; i < cur_command->nargs; i++) {
            strlcat(cmd_str, cur_command->args[i], sizeof(cmd_str));
            if (i < cur_command->nargs - 1) {
                strlcat(cmd_str, " ", sizeof(cmd_str));
            }
        }
        char source[256];
        if (cur_command->filename) {
            snprintf(source, sizeof(source), " (%s:%d)", cur_command->filename, cur_command->line);
        } else {
            *source = '\0';
        }
        INFO("Command '%s' action=%s%s returned %d took %.2fs\n",
             cmd_str, cur_action ? name_str : "", source, result, t.duration());
    }
}
这个函数就是从aciton_queue中取出头部的action,然后执行command中的函数。那这个函数是什么呢?我们在文章一开始就说过了,每一个command都对应一个do_xxxx的函数来处理该命令。是不是这样呢?
我们需要从command的解析说起。和解析service的option一样。解析command使用的是parse_line_action函数。这里不明白的可以返回去看看service的解析过程。
parse_line_action定义在system/core/init/init_parser.cpp中,该函数如下:
static void parse_line_action(struct parse_state* state, int nargs, char **args)
{
    struct action *act = (action*) state->context;
    int kw, n;
    if (nargs == 0) {
        return;
    }
    kw = lookup_keyword(args[0]);
    if (!kw_is(kw, COMMAND)) {
        parse_error(state, "invalid command '%s'\n", args[0]);
        return;
    }
    n = kw_nargs(kw);
    if (nargs < n) {
        parse_error(state, "%s requires %d %s\n", args[0], n - 1,
            n > 2 ? "arguments" : "argument");
        return;
    }
    command* cmd = (command*) malloc(sizeof(*cmd) + sizeof(char*) * nargs);
    cmd->func = kw_func(kw);
    cmd->line = state->line;
    cmd->filename = state->filename;
    cmd->nargs = nargs;
    memcpy(cmd->args, args, sizeof(char*) * nargs);
    list_add_tail(&act->commands, &cmd->clist);
}
关键的代码只有一行cmd->func = kw_func(kw),kw_func函数定义在相同文件下,是一个宏:
#define kw_func(kw) (keyword_info[kw].func)
1
这里keyword_info数组又出现了吧,这个数组也定义在system/core/init/init_parser.cpp中:
static struct {
    const char *name;
    int (*func)(int nargs, char **args);
    unsigned char nargs;
    unsigned char flags;
} keyword_info[KEYWORD_COUNT] = {
    [ K_UNKNOWN ] = { "unknown", 0, 0, 0 },
#include "keywords.h"
};
可以看到数组的初始化使用的是#include “keywords.h”。这中写法我还是第一次见到。
keywords.h:
#ifndef KEYWORD
int do_bootchart_init(int nargs, char **args);
int do_class_start(int nargs, char **args);
int do_class_stop(int nargs, char **args);
int do_class_reset(int nargs, char **args);
int do_domainname(int nargs, char **args);
int do_enable(int nargs, char **args);
int do_exec(int nargs, char **args);
int do_export(int nargs, char **args);
int do_hostname(int nargs, char **args);
int do_ifup(int nargs, char **args);
int do_insmod(int nargs, char **args);
int do_installkey(int nargs, char **args);
int do_mkdir(int nargs, char **args);
int do_mount_all(int nargs, char **args);
int do_mount(int nargs, char **args);
int do_powerctl(int nargs, char **args);
int do_restart(int nargs, char **args);
int do_restorecon(int nargs, char **args);
int do_restorecon_recursive(int nargs, char **args);
int do_rm(int nargs, char **args);
int do_rmdir(int nargs, char **args);
int do_setprop(int nargs, char **args);
int do_setrlimit(int nargs, char **args);
int do_start(int nargs, char **args);
int do_stop(int nargs, char **args);
int do_swapon_all(int nargs, char **args);
int do_trigger(int nargs, char **args);
int do_symlink(int nargs, char **args);
int do_sysclktz(int nargs, char **args);
int do_write(int nargs, char **args);
int do_copy(int nargs, char **args);
int do_chown(int nargs, char **args);
int do_chmod(int nargs, char **args);
int do_loglevel(int nargs, char **args);
int do_load_persist_props(int nargs, char **args);
int do_load_all_props(int nargs, char **args);
int do_verity_load_state(int nargs, char **args);
int do_verity_update_state(int nargs, char **args);
int do_wait(int nargs, char **args);
#define __MAKE_KEYWORD_ENUM__
#define KEYWORD(symbol, flags, nargs, func) K_##symbol,
enum {
    K_UNKNOWN,
#endif
    KEYWORD(bootchart_init,        COMMAND, 0, do_bootchart_init)
    KEYWORD(chmod,       COMMAND, 2, do_chmod)
    KEYWORD(chown,       COMMAND, 2, do_chown)
    KEYWORD(class,       OPTION,  0, 0)
    KEYWORD(class_reset, COMMAND, 1, do_class_reset)
    KEYWORD(class_start, COMMAND, 1, do_class_start)
    KEYWORD(class_stop,  COMMAND, 1, do_class_stop)
    KEYWORD(console,     OPTION,  0, 0)
    KEYWORD(copy,        COMMAND, 2, do_copy)
    KEYWORD(critical,    OPTION,  0, 0)
    KEYWORD(disabled,    OPTION,  0, 0)
    KEYWORD(domainname,  COMMAND, 1, do_domainname)
    KEYWORD(enable,      COMMAND, 1, do_enable)
    KEYWORD(exec,        COMMAND, 1, do_exec)
    KEYWORD(export,      COMMAND, 2, do_export)
    KEYWORD(group,       OPTION,  0, 0)
    KEYWORD(hostname,    COMMAND, 1, do_hostname)
    KEYWORD(ifup,        COMMAND, 1, do_ifup)
    KEYWORD(import,      SECTION, 1, 0)
    KEYWORD(insmod,      COMMAND, 1, do_insmod)
    KEYWORD(installkey,  COMMAND, 1, do_installkey)
    KEYWORD(ioprio,      OPTION,  0, 0)
    KEYWORD(keycodes,    OPTION,  0, 0)
    KEYWORD(load_all_props,        COMMAND, 0, do_load_all_props)
    KEYWORD(load_persist_props,    COMMAND, 0, do_load_persist_props)
    KEYWORD(loglevel,    COMMAND, 1, do_loglevel)
    KEYWORD(mkdir,       COMMAND, 1, do_mkdir)
    KEYWORD(mount_all,   COMMAND, 1, do_mount_all)
    KEYWORD(mount,       COMMAND, 3, do_mount)
    KEYWORD(oneshot,     OPTION,  0, 0)
    KEYWORD(onrestart,   OPTION,  0, 0)
    KEYWORD(on,          SECTION, 0, 0)
    KEYWORD(powerctl,    COMMAND, 1, do_powerctl)
    KEYWORD(restart,     COMMAND, 1, do_restart)
    KEYWORD(restorecon,  COMMAND, 1, do_restorecon)
    KEYWORD(restorecon_recursive,  COMMAND, 1, do_restorecon_recursive)
    KEYWORD(rm,          COMMAND, 1, do_rm)
    KEYWORD(rmdir,       COMMAND, 1, do_rmdir)
    KEYWORD(seclabel,    OPTION,  0, 0)
    KEYWORD(service,     SECTION, 0, 0)
    KEYWORD(setenv,      OPTION,  2, 0)
    KEYWORD(setprop,     COMMAND, 2, do_setprop)
    KEYWORD(setrlimit,   COMMAND, 3, do_setrlimit)
    KEYWORD(socket,      OPTION,  0, 0)
    KEYWORD(start,       COMMAND, 1, do_start)
    KEYWORD(stop,        COMMAND, 1, do_stop)
    KEYWORD(swapon_all,  COMMAND, 1, do_swapon_all)
    KEYWORD(symlink,     COMMAND, 1, do_symlink)
    KEYWORD(sysclktz,    COMMAND, 1, do_sysclktz)
    KEYWORD(trigger,     COMMAND, 1, do_trigger)
    KEYWORD(user,        OPTION,  0, 0)
    KEYWORD(verity_load_state,      COMMAND, 0, do_verity_load_state)
    KEYWORD(verity_update_state,    COMMAND, 0, do_verity_update_state)
    KEYWORD(wait,        COMMAND, 1, do_wait)
    KEYWORD(write,       COMMAND, 2, do_write)
    KEYWORD(writepid,    OPTION,  0, 0)
#ifdef __MAKE_KEYWORD_ENUM__
    KEYWORD_COUNT,
};
#undef __MAKE_KEYWORD_ENUM__
#undef KEYWORD
#endif
这其中定义了所有的init.rc中需要的关键字。从中可以知道哪些是命令,哪些是option,哪些是sercion。此外,也验证我们说的命令(command)都对应这一个do_xxxx的函数,执行这些命令其实就是执行这些函数。
Android6.0系统启动流程分析二:zygote进程
上一篇ndroid6.0系统启动流程分析一:init进程博客我们分析了init进程,主要分析了init.rc的解析过程和command的执行流程。我们说Init.rc中配置的service都是在init.rc中的action中使用start命令启动的,start命令对应的处理函数是do_start。我们也分析了这个函数。那么这一节,我们分析zygote进程,zygote进程在init.rc中也被配置为一个服务,那么它是不是使用start命令来启动的呢?我在Init.rc中搜索发现并没有,难道我错了?
zygote进程的启动
看下zygote在rc文件中的定义:
service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server
    class main
    socket zygote stream 660 root system
    onrestart write /sys/android_power/request_state wake
    onrestart write /sys/power/state on
    onrestart restart media
    onrestart restart netd
这个时候,大家不要忘了class main这行。我们在上片博客中不是说过class的作用吗?用于批量管理service。搜索main果然发现有如下语句:
on nonencrypted
    class_start main
    class_start late_start
on property:vold.decrypt=trigger_restart_min_framework
    class_start main
on property:vold.decrypt=trigger_restart_framework
    class_start main
    class_start late_start
但是发现有三个地方出现 class_start main,所以本菜就迷糊了,不知道是哪里启动的zygote,希望路过的大神能指点下迷津…
class_start 对应的处理方法是do_class_start,该方法定义在system\core\init\builtins.cpp中:
int do_class_start(int nargs, char **args)
{
        /* Starting a class does not start services
         * which are explicitly disabled.  They must
         * be started individually.
         */
    service_for_each_class(args[1], service_start_if_not_disabled);
    return 0;
}
调用service_for_each_class方法进一步处理:
void service_for_each_class(const char *classname,
                            void (*func)(struct service *svc))
{
    struct listnode *node;
    struct service *svc;
    list_for_each(node, &service_list) {
        svc = node_to_item(node, struct service, slist);
        if (!strcmp(svc->classname, classname)) {
            func(svc);
        }
    }
}
遍历service_list链表,没找到一个classname为main的service,就调用service_start_if_not_disabled来进一步处理,service_start_if_not_disabled方法如下:
static void service_start_if_not_disabled(struct service *svc)
{
    if (!(svc->flags & SVC_DISABLED)) {
        service_start(svc, NULL);
    } else {
        svc->flags |= SVC_DISABLED_START;
    }
}
可以看到最终还是调用了service_start方法来启动service,这个启动单个service一样了。这份函数我们在上节已经分析过了,这里就不再啰嗦了。
Zygote进程的native部分
zygote进程的可执行文件就是/system/bin/app_process,源码在\frameworks\base\cmds\app_process/app_main.cpp中。我们从它的main函数看起:
int main(int argc, char* const argv[])
{
    if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
        // Older kernels don't understand PR_SET_NO_NEW_PRIVS and return
        // EINVAL. Don't die on such kernels.
        if (errno != EINVAL) {
            LOG_ALWAYS_FATAL("PR_SET_NO_NEW_PRIVS failed: %s", strerror(errno));
            return 12;
        }
    }
    AppRuntime runtime(argv[0], computeArgBlockSize(argc, argv));
    // Process command line arguments
    // ignore argv[0]
    argc--;
    argv++;
    // Everything up to '--' or first non '-' arg goes to the vm.
    //
    // The first argument after the VM args is the "parent dir", which
    // is currently unused.
    //
    // After the parent dir, we expect one or more the following internal
    // arguments :
    //
    // --zygote : Start in zygote mode
    // --start-system-server : Start the system server.
    // --application : Start in application (stand alone, non zygote) mode.
    // --nice-name : The nice name for this process.
    //
    // For non zygote starts, these arguments will be followed by
    // the main class name. All remaining arguments are passed to
    // the main method of this class.
    //
    // For zygote starts, all remaining arguments are passed to the zygote.
    // main function.
    //
    // Note that we must copy argument string values since we will rewrite the
    // entire argument block when we apply the nice name to argv0.
    int i;
    for (i = 0; i < argc; i++) {
        if (argv[i][0] != '-') {
            break;
        }
        if (argv[i][1] == '-' && argv[i][2] == 0) {
            ++i; // Skip --.
            break;
        }
        runtime.addOption(strdup(argv[i]));
    }
    // Parse runtime arguments.  Stop at first unrecognized option.
    bool zygote = false;
    bool startSystemServer = false;
    bool application = false;
    String8 niceName;
    String8 className;
    ++i;  // Skip unused "parent dir" argument.
    while (i < argc) {
        const char* arg = argv[i++];
        if (strcmp(arg, "--zygote") == 0) {
            zygote = true;
            niceName = ZYGOTE_NICE_NAME;
        } else if (strcmp(arg, "--start-system-server") == 0) {
            startSystemServer = true;
        } else if (strcmp(arg, "--application") == 0) {
            application = true;
        } else if (strncmp(arg, "--nice-name=", 12) == 0) {
            niceName.setTo(arg + 12);
        } else if (strncmp(arg, "--", 2) != 0) {
            className.setTo(arg);
            break;
        } else {
            --i;
            break;
        }
    }
    Vector<String8> args;
    if (!className.isEmpty()) {
        // We're not in zygote mode, the only argument we need to pass
        // to RuntimeInit is the application argument.
        //
        // The Remainder of args get passed to startup class main(). Make
        // copies of them before we overwrite them with the process name.
        args.add(application ? String8("application") : String8("tool"));
        runtime.setClassNameAndArgs(className, argc - i, argv + i);
    } else {
        // We're in zygote mode.
        maybeCreateDalvikCache();
        if (startSystemServer) {
            args.add(String8("start-system-server"));
        }
        char prop[PROP_VALUE_MAX];
        if (property_get(ABI_LIST_PROPERTY, prop, NULL) == 0) {
            LOG_ALWAYS_FATAL("app_process: Unable to determine ABI list from property %s.",
                ABI_LIST_PROPERTY);
            return 11;
        }
        String8 abiFlag("--abi-list=");
        abiFlag.append(prop);
        args.add(abiFlag);
        // In zygote mode, pass all remaining arguments to the zygote
        // main() method.
        for (; i < argc; ++i) {
            args.add(String8(argv[i]));
        }
    }
    if (!niceName.isEmpty()) {
        runtime.setArgv0(niceName.string());
        set_process_name(niceName.string());
    }
    if (zygote) {
        runtime.start("com.android.internal.os.ZygoteInit", args, zygote);
    } else if (className) {
        runtime.start("com.android.internal.os.RuntimeInit", args, zygote);
    } else {
        fprintf(stderr, "Error: no class name or --zygote supplied.\n");
        app_usage();
        LOG_ALWAYS_FATAL("app_process: no class name or --zygote supplied.");
        return 10;
    }
}
因为zygote是在init进程中启动的,我们的启动它的时候传了一些参数上来,我们看看这些参数有什么:
在system/core/init/init.cpp中的service_start函数中,使用如下代码启动zygote。
            if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0) {
                ERROR("cannot execve('%s'): %s\n", svc->args[0], strerror(errno));
            }
svc->args[0]=“/system/bin/app_process”,因为在.rc中就是这么配置的。svc->args=“/system/bin/app_process -Xzygote /system/bin –zygote –start-system-server”,因为svc->args的构造是在service的解析中完成的,具体过程可以参考 Android6.0系统启动流程分析一:init进程
理清了参数,我们再来回到main函数。main函数主要做的事情有:
1. 构造一个AppRuntime实例。
2. 解析参数,通过解析参数,我们知道:
zygote = true;
startSystemServer = true;
application = false;
3. 调用AppRuntime的start方法。
前面没什么好说的,我们看看AppRuntime的start做了什么。
调用这个方法时,传入了如下参数:
参数一:”com.android.internal.os.ZygoteInit”
参数二:/system/bin/app_process -Xzygote /system/bin –zygote –start-system-server”
参数三:true
下面看看这个start方法:
/*
 * Start the Android runtime.  This involves starting the virtual machine
 * and calling the "static void main(String[] args)" method in the class
 * named by "className".
 *
 * Passes the main function two arguments, the class name and the specified
 * options string.
 */
void AndroidRuntime::start(const char* className, const Vector<String8>& options, bool zygote)
{
    ALOGD(">>>>>> START %s uid %d <<<<<<\n",
            className != NULL ? className : "(unknown)", getuid());
    static const String8 startSystemServer("start-system-server");
    /*
     * 'startSystemServer == true' means runtime is obsolete and not run from
     * init.rc anymore, so we print out the boot start event here.
     */
    for (size_t i = 0; i < options.size(); ++i) {
        if (options[i] == startSystemServer) {
           /* track our progress through the boot sequence */
           const int LOG_BOOT_PROGRESS_START = 3000;
           LOG_EVENT_LONG(LOG_BOOT_PROGRESS_START,  ns2ms(systemTime(SYSTEM_TIME_MONOTONIC)));
        }
    }
    const char* rootDir = getenv("ANDROID_ROOT");
    if (rootDir == NULL) {
        rootDir = "/system";
        if (!hasDir("/system")) {
            LOG_FATAL("No root directory specified, and /android does not exist.");
            return;
        }
        setenv("ANDROID_ROOT", rootDir, 1);
    }
    //const char* kernelHack = getenv("LD_ASSUME_KERNEL");
    //ALOGD("Found LD_ASSUME_KERNEL='%s'\n", kernelHack);
    /* start the virtual machine */
    JniInvocation jni_invocation;
    jni_invocation.Init(NULL);
    JNIEnv* env;
    if (startVm(&mJavaVM, &env, zygote) != 0) {
        return;
    }
    onVmCreated(env);
    /*
     * Register android functions.
     */
    if (startReg(env) < 0) {
        ALOGE("Unable to register all android natives\n");
        return;
    }
    /*
     * We want to call main() with a String array with arguments in it.
     * At present we have two arguments, the class name and an option string.
     * Create an array to hold them.
     */
    jclass stringClass;
    jobjectArray strArray;
    jstring classNameStr;
    stringClass = env->FindClass("java/lang/String");
    assert(stringClass != NULL);
    strArray = env->NewObjectArray(options.size() + 1, stringClass, NULL);
    assert(strArray != NULL);
    classNameStr = env->NewStringUTF(className);
    assert(classNameStr != NULL);
    env->SetObjectArrayElement(strArray, 0, classNameStr);
    for (size_t i = 0; i < options.size(); ++i) {
        jstring optionsStr = env->NewStringUTF(options.itemAt(i).string());
        assert(optionsStr != NULL);
        env->SetObjectArrayElement(strArray, i + 1, optionsStr);
    }
    /*
     * Start VM.  This thread becomes the main thread of the VM, and will
     * not return until the VM exits.
     */
    char* slashClassName = toSlashClassName(className);
    jclass startClass = env->FindClass(slashClassName);
    if (startClass == NULL) {
        ALOGE("JavaVM unable to locate class '%s'\n", slashClassName);
        /* keep going */
    } else {
        jmethodID startMeth = env->GetStaticMethodID(startClass, "main",
            "([Ljava/lang/String;)V");
        if (startMeth == NULL) {
            ALOGE("JavaVM unable to find main() in '%s'\n", className);
            /* keep going */
        } else {
            env->CallStaticVoidMethod(startClass, startMeth, strArray);
#if 0
            if (env->ExceptionCheck())
                threadExitUncaughtException(env);
#endif
        }
    }
    free(slashClassName);
    ALOGD("Shutting down VM\n");
    if (mJavaVM->DetachCurrentThread() != JNI_OK)
        ALOGW("Warning: unable to detach main thread\n");
    if (mJavaVM->DestroyJavaVM() != 0)
        ALOGW("Warning: VM did not shut down cleanly\n");
}
这份函数作如下事情:
1. 启动java虚拟机。对虚拟机感兴趣的,这里或许是一个重要的突破口。我对虚拟机了解不多,这里就不展开了。
2. 注册本地方法。注册本地方式调用startReg方法,这个方法可以看一看:
/*static*/ int AndroidRuntime::startReg(JNIEnv* env)
{
    /*
     * This hook causes all future threads created in this process to be
     * attached to the JavaVM.  (This needs to go away in favor of JNI
     * Attach calls.)
     */
    androidSetCreateThreadFunc((android_create_thread_fn) javaCreateThreadEtc);
    ALOGV("--- registering native functions ---\n");
    /*
     * Every "register" function calls one or more things that return
     * a local reference (e.g. FindClass).  Because we haven't really
     * started the VM yet, they're all getting stored in the base frame
     * and never released.  Use Push/Pop to manage the storage.
     */
    env->PushLocalFrame(200);
    if (register_jni_procs(gRegJNI, NELEM(gRegJNI), env) < 0) {
        env->PopLocalFrame(NULL);
        return -1;
    }
    env->PopLocalFrame(NULL);
    //createJavaThread("fubar", quickTest, (void*) "hello");
    return 0;
}
这个方法中,使用register_jni_procs方法完成注册,该方法如下:
static int register_jni_procs(const RegJNIRec array[], size_t count, JNIEnv* env)
{
    for (size_t i = 0; i < count; i++) {
        if (array[i].mProc(env) < 0) {
#ifndef NDEBUG
            ALOGD("----------!!! %s failed to load\n", array[i].mName);
#endif
            return -1;
        }
    }
    return 0;
}
这个方法传入的一个数组作为参数,这个方法的作用就是遍历这个数组,调用这个数组的mProc方法。这个数组就是gRegJNI:
static const RegJNIRec gRegJNI[] = {
    REG_JNI(register_com_android_internal_os_RuntimeInit),
    REG_JNI(register_android_os_SystemClock),
    REG_JNI(register_android_util_EventLog),
    REG_JNI(register_android_util_Log),
    REG_JNI(register_android_content_AssetManager),
    REG_JNI(register_android_content_StringBlock),
    REG_JNI(register_android_content_XmlBlock),
    REG_JNI(register_android_emoji_EmojiFactory),
     ...
};
也就是说这个方法会调用上述数组中的每一项REG_JNI中声明的函数来实现各个模块的jni函数的注册的。
回到start方法,接下来就是用jni规范,从c++中调用java中的静态方法了,这个方法就是ZygoteInit中的main方法。
Zygote进程的java部分
现在代码从c++转入到java部分了,我们来看ZygoteInit中的main方法:
 public static void main(String argv[]) {
        try {
            RuntimeInit.enableDdms();
            // Start profiling the zygote initialization.
            SamplingProfilerIntegration.start();
            boolean startSystemServer = false;
            String socketName = "zygote";
            String abiList = null;
            for (int i = 1; i < argv.length; i++) {
                if ("start-system-server".equals(argv[i])) {
                    startSystemServer = true;
                } else if (argv[i].startsWith(ABI_LIST_ARG)) {
                    abiList = argv[i].substring(ABI_LIST_ARG.length());
                } else if (argv[i].startsWith(SOCKET_NAME_ARG)) {
                    socketName = argv[i].substring(SOCKET_NAME_ARG.length());
                } else {
                    throw new RuntimeException("Unknown command line argument: " + argv[i]);
                }
            }
            if (abiList == null) {
                throw new RuntimeException("No ABI list supplied.");
            }
            registerZygoteSocket(socketName);
            EventLog.writeEvent(LOG_BOOT_PROGRESS_PRELOAD_START,
                SystemClock.uptimeMillis());
            preload();
            EventLog.writeEvent(LOG_BOOT_PROGRESS_PRELOAD_END,
                SystemClock.uptimeMillis());
            // Finish profiling the zygote initialization.
            SamplingProfilerIntegration.writeZygoteSnapshot();
            // Do an initial gc to clean up after startup
            gcAndFinalize();
            // Disable tracing so that forked processes do not inherit stale tracing tags from
            // Zygote.
            Trace.setTracingEnabled(false);
            if (startSystemServer) {
                startSystemServer(abiList, socketName);
            }
            Log.i(TAG, "Accepting command socket connections");
            runSelectLoop(abiList);
            closeServerSocket();
        } catch (MethodAndArgsCaller caller) {
            caller.run();
        } catch (RuntimeException ex) {
            Log.e(TAG, "Zygote died with exception", ex);
            closeServerSocket();
            throw ex;
        }
    }
这个方法做了如下事情:
1. 注册Zytote套接字。这个套接字是在system/core/init/init.cpp中的service_start方法中创建的,不清楚的请看上一篇博客。service_start方法创建完套接字以后会发布这个套接字,使用publish_socket方法。这个方法也是定义在system/core/init/init.cpp中:
static void publish_socket(const char *name, int fd)
{
    char key[64] = ANDROID_SOCKET_ENV_PREFIX;
    char val[64];
    strlcpy(key + sizeof(ANDROID_SOCKET_ENV_PREFIX) - 1,
            name,
            sizeof(key) - sizeof(ANDROID_SOCKET_ENV_PREFIX));
    snprintf(val, sizeof(val), "%d", fd);
    add_environment(key, val);
    /* make sure we don't close-on-exec */
    fcntl(fd, F_SETFD, 0);
}
这里主要使用add_environment方法把这个套接字添加到环境变量中,添加的结果是一对键值对:(ANDROID_SOCKET_zygote,fd)。那么我们在这里是不是就可以从环境变量中获取呢?
看看registerZygoteSocket方法:
    private static void registerZygoteSocket(String socketName) {
        if (sServerSocket == null) {
            int fileDesc;
            final String fullSocketName = ANDROID_SOCKET_PREFIX + socketName;
            try {
                String env = System.getenv(fullSocketName);
                fileDesc = Integer.parseInt(env);
            } catch (RuntimeException ex) {
                throw new RuntimeException(fullSocketName + " unset or invalid", ex);
            }
            try {
                FileDescriptor fd = new FileDescriptor();
                fd.setInt$(fileDesc);
                sServerSocket = new LocalServerSocket(fd);
            } catch (IOException ex) {
                throw new RuntimeException(
                        "Error binding to local socket '" + fileDesc + "'", ex);
            }
        }
    }
果不其然吧,就是通过System.getenv方法获取到我们在service_start方法中创建的套接字,然后对其进一步封装。并把封装的结果保存在sServerSocket 变量中。
第一件事情就做完了,回到main方法。
2.初始化gc:gcAndFinalize();
3.启动SystemServer。
SystemServer是Android系统非常和核心的服务,它会在创建后启动系统中的其他服务,然后成为所有服务的管理者,向应用程序和其他服务提供服务。这一部分我们放在下一节来分析。
4.进入监听状态:runSelectLoop
Zygote监听套接字
  private static void runSelectLoop(String abiList) throws MethodAndArgsCaller {
        ArrayList<FileDescriptor> fds = new ArrayList<FileDescriptor>();
        ArrayList<ZygoteConnection> peers = new ArrayList<ZygoteConnection>();
        fds.add(sServerSocket.getFileDescriptor());
        peers.add(null);
        while (true) {
            StructPollfd[] pollFds = new StructPollfd[fds.size()];
            for (int i = 0; i < pollFds.length; ++i) {
                pollFds[i] = new StructPollfd();
                pollFds[i].fd = fds.get(i);
                pollFds[i].events = (short) POLLIN;
            }
            try {
                Os.poll(pollFds, -1);
            } catch (ErrnoException ex) {
                throw new RuntimeException("poll failed", ex);
            }
            for (int i = pollFds.length - 1; i >= 0; --i) {
                if ((pollFds[i].revents & POLLIN) == 0) {
                    continue;
                }
                if (i == 0) {
                    ZygoteConnection newPeer = acceptCommandPeer(abiList);
                    peers.add(newPeer);
                    fds.add(newPeer.getFileDesciptor());
                } else {
                    boolean done = peers.get(i).runOnce();
                    if (done) {
                        peers.remove(i);
                        fds.remove(i);
                    }
                }
            }
        }
    }
这个方法开始进入zygote套接字的监听状态了。当zygote套接字接受到写入操作,Zygote进程唤醒,执行ZygoteConnection的runOnce方法。
我们知道Zygote俗称“受精卵”,它是所有java进程的祖先进程。这里的runOnce便是创建进程的开端了。虽然暂时不知道上层在什么情况下,如何向zytoge套接字写入数据,从而开启进程创建的,但是没有关系,我们依然可以顺着代码的思路,看看从runOnce开始,是如何创建出一个进程的。
Zygote创建子进程的过程
runOnce方法如下:
  /**
     * Reads one start command from the command socket. If successful,
     * a child is forked and a {@link ZygoteInit.MethodAndArgsCaller}
     * exception is thrown in that child while in the parent process,
     * the method returns normally. On failure, the child is not
     * spawned and messages are printed to the log and stderr. Returns
     * a boolean status value indicating whether an end-of-file on the command
     * socket has been encountered.
     *
     * @return false if command socket should continue to be read from, or
     * true if an end-of-file has been encountered.
     * @throws ZygoteInit.MethodAndArgsCaller trampoline to invoke main()
     * method in child process
     */
    boolean runOnce() throws ZygoteInit.MethodAndArgsCaller {
        String args[];
        Arguments parsedArgs = null;
        FileDescriptor[] descriptors;
        try {
            args = readArgumentList();
            descriptors = mSocket.getAncillaryFileDescriptors();
        } catch (IOException ex) {
            Log.w(TAG, "IOException on command socket " + ex.getMessage());
            closeSocket();
            return true;
        }
        if (args == null) {
            // EOF reached.
            closeSocket();
            return true;
        }
        /** the stderr of the most recent request, if avail */
        PrintStream newStderr = null;
        if (descriptors != null && descriptors.length >= 3) {
            newStderr = new PrintStream(
                    new FileOutputStream(descriptors[2]));
        }
        int pid = -1;
        FileDescriptor childPipeFd = null;
        FileDescriptor serverPipeFd = null;
        try {
            parsedArgs = new Arguments(args);
            if (parsedArgs.abiListQuery) {
                return handleAbiListQuery();
            }
            if (parsedArgs.permittedCapabilities != 0 || parsedArgs.effectiveCapabilities != 0) {
                throw new ZygoteSecurityException("Client may not specify capabilities: " +
                        "permitted=0x" + Long.toHexString(parsedArgs.permittedCapabilities) +
                        ", effective=0x" + Long.toHexString(parsedArgs.effectiveCapabilities));
            }
            applyUidSecurityPolicy(parsedArgs, peer);
            applyInvokeWithSecurityPolicy(parsedArgs, peer);
            applyDebuggerSystemProperty(parsedArgs);
            applyInvokeWithSystemProperty(parsedArgs);
            int[][] rlimits = null;
            if (parsedArgs.rlimits != null) {
                rlimits = parsedArgs.rlimits.toArray(intArray2d);
            }
            if (parsedArgs.invokeWith != null) {
                FileDescriptor[] pipeFds = Os.pipe2(O_CLOEXEC);
                childPipeFd = pipeFds[1];
                serverPipeFd = pipeFds[0];
                Os.fcntlInt(childPipeFd, F_SETFD, 0);
            }
            /**
             * In order to avoid leaking descriptors to the Zygote child,
             * the native code must close the two Zygote socket descriptors
             * in the child process before it switches from Zygote-root to
             * the UID and privileges of the application being launched.
             *
             * In order to avoid "bad file descriptor" errors when the
             * two LocalSocket objects are closed, the Posix file
             * descriptors are released via a dup2() call which closes
             * the socket and substitutes an open descriptor to /dev/null.
             */
            int [] fdsToClose = { -1, -1 };
            FileDescriptor fd = mSocket.getFileDescriptor();
            if (fd != null) {
                fdsToClose[0] = fd.getInt$();
            }
            fd = ZygoteInit.getServerSocketFileDescriptor();
            if (fd != null) {
                fdsToClose[1] = fd.getInt$();
            }
            fd = null;
            pid = Zygote.forkAndSpecialize(parsedArgs.uid, parsedArgs.gid, parsedArgs.gids,
                    parsedArgs.debugFlags, rlimits, parsedArgs.mountExternal, parsedArgs.seInfo,
                    parsedArgs.niceName, fdsToClose, parsedArgs.instructionSet,
                    parsedArgs.appDataDir);
        } catch (ErrnoException ex) {
            logAndPrintError(newStderr, "Exception creating pipe", ex);
        } catch (IllegalArgumentException ex) {
            logAndPrintError(newStderr, "Invalid zygote arguments", ex);
        } catch (ZygoteSecurityException ex) {
            logAndPrintError(newStderr,
                    "Zygote security policy prevents request: ", ex);
        }
        try {
            if (pid == 0) {
                // in child
                IoUtils.closeQuietly(serverPipeFd);
                serverPipeFd = null;
                handleChildProc(parsedArgs, descriptors, childPipeFd, newStderr);
                // should never get here, the child is expected to either
                // throw ZygoteInit.MethodAndArgsCaller or exec().
                return true;
            } else {
                // in parent...pid of < 0 means failure
                IoUtils.closeQuietly(childPipeFd);
                childPipeFd = null;
                return handleParentProc(pid, descriptors, serverPipeFd, parsedArgs);
            }
        } finally {
            IoUtils.closeQuietly(childPipeFd);
            IoUtils.closeQuietly(serverPipeFd);
        }
    }
这个方法比较长,我们先看看它的注释,大概意思如下:
这个方法会从socket中读取一个命令,成功的话就会创建一个子进程出来,并且会抛出一个异常:ZygoteInit.MethodAndArgsCaller 。
也就是说即使创建成功了也会抛出异常,然后会在Zygote的main方法中接受检测到异常,进而调用caller.run()方法作进一步处理。

整个过程有很多地方看不懂,这里主要梳理的是整个流程。大神请绕道…
下面对这个过程做简要分析。
runOnce方法主要调用了 Zygote.forkAndSpecialize方法进一步处理,这个方法执行完成以后 ,子进程就已经创建好了,这个时候pid=0也就是在子进程中执行,在执行handleChildProc时,子进程会抛出异常,异常被捕获后执行MethodAndArgsCaller类中的run方法被执行,这个方法最终会调用到ActivityThread的main方法。这个过程是在子进程中实现的。pid!=0就意味着后面的代码是在Zygote进程中执行,这个时候执行的handleParentProc方法,这个方法会做一些清理工作(从注释了解到的,具体code没能理解)。
接下来看看forkAndSpecialize方法,这个方法定义下Zygote类中,看看它怎么一步步创建子进程的。
    public static int forkAndSpecialize(int uid, int gid, int[] gids, int debugFlags,
          int[][] rlimits, int mountExternal, String seInfo, String niceName, int[] fdsToClose,
          String instructionSet, String appDataDir) {
        VM_HOOKS.preFork();
        int pid = nativeForkAndSpecialize(
                  uid, gid, gids, debugFlags, rlimits, mountExternal, seInfo, niceName, fdsToClose,
                  instructionSet, appDataDir);
        // Enable tracing as soon as possible for the child process.
        if (pid == 0) {
            Trace.setTracingEnabled(true);
            // Note that this event ends at the end of handleChildProc,
            Trace.traceBegin(Trace.TRACE_TAG_ACTIVITY_MANAGER, "PostFork");
        }
        VM_HOOKS.postForkCommon();
        return pid;
    }
三件事情:
1. VM_HOOKS.preFork(),做准备工作。
2. nativeForkAndSpecialize,创建子进程
3. VM_HOOKS.postForkCommon();启动Zygote的4个Daemon线程,java堆整理,引用队列,以及析构线程。
step 1
首先看看preFork方法,这个方法定义在libcore\dalvik\src\main\java\dalvik\system\ZygoteHooks类中。
    public void preFork() {
        Daemons.stop();
        waitUntilAllThreadsStopped();
        token = nativePreFork();
    }
Daemons.stop():
    public static void stop() {
        HeapTaskDaemon.INSTANCE.stop();
        ReferenceQueueDaemon.INSTANCE.stop();
        FinalizerDaemon.INSTANCE.stop();
        FinalizerWatchdogDaemon.INSTANCE.stop();
    }
停止四个线程:Daemon线程,java堆整理,引用队列,析构线程
也就是创建子进程的时候,不能有这几个线程搅和。
waitUntilAllThreadsStopped:
    private static void waitUntilAllThreadsStopped() {
        File tasks = new File("/proc/self/task");
        // All Java daemons are stopped already. We're just waiting for their OS counterparts to
        // finish as well. This shouldn't take much time so spinning is ok here.
        while (tasks.list().length > 1) {
          Thread.yield();
        }
    }
}
当/proc/self/task文件中记录的线程数大于1,就不断的让出cpu,直到只剩下一个线程。
nativePreFork:
这个方法定义在art\runtime\native\dalvik_system_ZygoteHooks.cc中:
static jlong ZygoteHooks_nativePreFork(JNIEnv* env, jclass) {
  Runtime* runtime = Runtime::Current();
  CHECK(runtime->IsZygote()) << "runtime instance not started with -Xzygote";
  runtime->PreZygoteFork();
  if (Trace::GetMethodTracingMode() != TracingMode::kTracingInactive) {
    // Tracing active, pause it.
    Trace::Pause();
  }
  // Grab thread before fork potentially makes Thread::pthread_key_self_ unusable.
  return reinterpret_cast<jlong>(ThreadForEnv(env));
}
调用PreZygoteFork方法,这个方法定义在art\runtime\runtime.cc中:
void Runtime::PreZygoteFork() {
  heap_->PreZygoteFork();
}
这个函数用来初始化堆。
step 2
nativeForkAndSpecialize方法定义在frameworks\base\core\jni\com_android_internal_os_Zygote.cpp中:
static jint com_android_internal_os_Zygote_nativeForkAndSpecialize(
        JNIEnv* env, jclass, jint uid, jint gid, jintArray gids,
        jint debug_flags, jobjectArray rlimits,
        jint mount_external, jstring se_info, jstring se_name,
        jintArray fdsToClose, jstring instructionSet, jstring appDataDir) {
    // Grant CAP_WAKE_ALARM to the Bluetooth process.
    jlong capabilities = 0;
    if (uid == AID_BLUETOOTH) {
        capabilities |= (1LL << CAP_WAKE_ALARM);
    }
    return ForkAndSpecializeCommon(env, uid, gid, gids, debug_flags,
            rlimits, capabilities, capabilities, mount_external, se_info,
            se_name, false, fdsToClose, instructionSet, appDataDir);
}
调用ForkAndSpecializeCommon方法进一步处理:
// Utility routine to fork zygote and specialize the child process.
static pid_t ForkAndSpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray javaGids,
                                     jint debug_flags, jobjectArray javaRlimits,
                                     jlong permittedCapabilities, jlong effectiveCapabilities,
                                     jint mount_external,
                                     jstring java_se_info, jstring java_se_name,
                                     bool is_system_server, jintArray fdsToClose,
                                     jstring instructionSet, jstring dataDir) {
  SetSigChldHandler();
  pid_t pid = fork();
  if (pid == 0) {
    // The child process.
    gMallocLeakZygoteChild = 1;
    // Clean up any descriptors which must be closed immediately
    DetachDescriptors(env, fdsToClose);
    // Keep capabilities across UID change, unless we're staying root.
    if (uid != 0) {
      EnableKeepCapabilities(env);
    }
    DropCapabilitiesBoundingSet(env);
    bool use_native_bridge = !is_system_server && (instructionSet != NULL)
        && android::NativeBridgeAvailable();
    if (use_native_bridge) {
      ScopedUtfChars isa_string(env, instructionSet);
      use_native_bridge = android::NeedsNativeBridge(isa_string.c_str());
    }
    if (use_native_bridge && dataDir == NULL) {
      // dataDir should never be null if we need to use a native bridge.
      // In general, dataDir will never be null for normal applications. It can only happen in
      // special cases (for isolated processes which are not associated with any app). These are
      // launched by the framework and should not be emulated anyway.
      use_native_bridge = false;
      ALOGW("Native bridge will not be used because dataDir == NULL.");
    }
    if (!MountEmulatedStorage(uid, mount_external, use_native_bridge)) {
      ALOGW("Failed to mount emulated storage: %s", strerror(errno));
      if (errno == ENOTCONN || errno == EROFS) {
        // When device is actively encrypting, we get ENOTCONN here
        // since FUSE was mounted before the framework restarted.
        // When encrypted device is booting, we get EROFS since
        // FUSE hasn't been created yet by init.
        // In either case, continue without external storage.
      } else {
        ALOGE("Cannot continue without emulated storage");
        RuntimeAbort(env);
      }
    }
    if (!is_system_server) {
        int rc = createProcessGroup(uid, getpid());
        if (rc != 0) {
            if (rc == -EROFS) {
                ALOGW("createProcessGroup failed, kernel missing CONFIG_CGROUP_CPUACCT?");
            } else {
                ALOGE("createProcessGroup(%d, %d) failed: %s", uid, pid, strerror(-rc));
            }
        }
    }
    SetGids(env, javaGids);
    SetRLimits(env, javaRlimits);
    if (use_native_bridge) {
      ScopedUtfChars isa_string(env, instructionSet);
      ScopedUtfChars data_dir(env, dataDir);
      android::PreInitializeNativeBridge(data_dir.c_str(), isa_string.c_str());
    }
    int rc = setresgid(gid, gid, gid);
    if (rc == -1) {
      ALOGE("setresgid(%d) failed: %s", gid, strerror(errno));
      RuntimeAbort(env);
    }
    rc = setresuid(uid, uid, uid);
    if (rc == -1) {
      ALOGE("setresuid(%d) failed: %s", uid, strerror(errno));
      RuntimeAbort(env);
    }
    if (NeedsNoRandomizeWorkaround()) {
        // Work around ARM kernel ASLR lossage (http://b/5817320).
        int old_personality = personality(0xffffffff);
        int new_personality = personality(old_personality | ADDR_NO_RANDOMIZE);
        if (new_personality == -1) {
            ALOGW("personality(%d) failed: %s", new_personality, strerror(errno));
        }
    }
    SetCapabilities(env, permittedCapabilities, effectiveCapabilities);
    SetSchedulerPolicy(env);
    const char* se_info_c_str = NULL;
    ScopedUtfChars* se_info = NULL;
    if (java_se_info != NULL) {
        se_info = new ScopedUtfChars(env, java_se_info);
        se_info_c_str = se_info->c_str();
        if (se_info_c_str == NULL) {
          ALOGE("se_info_c_str == NULL");
          RuntimeAbort(env);
        }
    }
    const char* se_name_c_str = NULL;
    ScopedUtfChars* se_name = NULL;
    if (java_se_name != NULL) {
        se_name = new ScopedUtfChars(env, java_se_name);
        se_name_c_str = se_name->c_str();
        if (se_name_c_str == NULL) {
          ALOGE("se_name_c_str == NULL");
          RuntimeAbort(env);
        }
    }
    rc = selinux_android_setcontext(uid, is_system_server, se_info_c_str, se_name_c_str);
    if (rc == -1) {
      ALOGE("selinux_android_setcontext(%d, %d, \"%s\", \"%s\") failed", uid,
            is_system_server, se_info_c_str, se_name_c_str);
      RuntimeAbort(env);
    }
    // Make it easier to debug audit logs by setting the main thread's name to the
    // nice name rather than "app_process".
    if (se_info_c_str == NULL && is_system_server) {
      se_name_c_str = "system_server";
    }
    if (se_info_c_str != NULL) {
      SetThreadName(se_name_c_str);
    }
    delete se_info;
    delete se_name;
    UnsetSigChldHandler();
    env->CallStaticVoidMethod(gZygoteClass, gCallPostForkChildHooks, debug_flags,
                              is_system_server ? NULL : instructionSet);
    if (env->ExceptionCheck()) {
      ALOGE("Error calling post fork hooks.");
      RuntimeAbort(env);
    }
  } else if (pid > 0) {
    // the parent process
  }
  return pid;
}
这个方法出现了我们非常熟悉的fork方法,然后在子进程中调用Zygote类中的callPostForkChildHooks方法:
    private static void callPostForkChildHooks(int debugFlags, String instructionSet) {
        VM_HOOKS.postForkChild(debugFlags, instructionSet);
    }
这里就不进一步追踪下去了,总之ForkAndSpecializeCommon方法调用fork系统调用创建了子进程,在进程中做了些初始化工作,然后返回了pid。这里会返回两次,子进程返回一次,父进程返回一次。
step 3
返回到nativeForkAndSpecialize方法后进一步返回,最终回到Zygote类的forkAndSpecialize方法,这个方法接下来会调用postForkCommon方法。postForkCommon方法定义在libcore\dalvik\src\main\java\dalvik\system\ZygoteHooks类中:
    /**
     * Called by the zygote in both the parent and child processes after
     * every fork. In the child process, this method is called after
     * {@code postForkChild}.
     */
    public void postForkCommon() {
        Daemons.start();
    }
注意这里子进程和父进程都会执行,也就是都会启动4个线程:Daemon线程,java堆整理,引用队列,以及析构线程。
父进程的线程在fork之前停止了,这里也要重新启动。
子进程和父进程都继续返回。
返回到ZygoteConnect的runOnce方法中,继续往下执行,则子进程执行handleChildProc方法,父进程执行handleParentProc方法。
父进程不是我们关注的,我们关注的是子进程,所以接下来看看handleChildProc方法,这个方法定义在frameworks\base\core\java\com\android\internal\os\ZygoteConnection中:
    private void handleChildProc(Arguments parsedArgs,
            FileDescriptor[] descriptors, FileDescriptor pipeFd, PrintStream newStderr)
            throws ZygoteInit.MethodAndArgsCaller {
        /**
         * By the time we get here, the native code has closed the two actual Zygote
         * socket connections, and substituted /dev/null in their place.  The LocalSocket
         * objects still need to be closed properly.
         */
        closeSocket();
        ZygoteInit.closeServerSocket();
        if (descriptors != null) {
            try {
                Os.dup2(descriptors[0], STDIN_FILENO);
                Os.dup2(descriptors[1], STDOUT_FILENO);
                Os.dup2(descriptors[2], STDERR_FILENO);
                for (FileDescriptor fd: descriptors) {
                    IoUtils.closeQuietly(fd);
                }
                newStderr = System.err;
            } catch (ErrnoException ex) {
                Log.e(TAG, "Error reopening stdio", ex);
            }
        }
        if (parsedArgs.niceName != null) {
            Process.setArgV0(parsedArgs.niceName);
        }
        // End of the postFork event.
        Trace.traceEnd(Trace.TRACE_TAG_ACTIVITY_MANAGER);
        if (parsedArgs.invokeWith != null) {
            WrapperInit.execApplication(parsedArgs.invokeWith,
                    parsedArgs.niceName, parsedArgs.targetSdkVersion,
                    VMRuntime.getCurrentInstructionSet(),
                    pipeFd, parsedArgs.remainingArgs);
        } else {
            RuntimeInit.zygoteInit(parsedArgs.targetSdkVersion,
                    parsedArgs.remainingArgs, null /* classLoader */);
        }
    }
子进程会继承父进程打开的文件描述符,所以子进程中有zygote套接字描述符,这里需要把它关掉。然后重要的是调用RuntimeInit.zygoteInit方法。这个方法定义在frameworks\base\core\java\com\android\internal\os\RuntimeInit中:
    public static final void zygoteInit(int targetSdkVersion, String[] argv, ClassLoader classLoader)
            throws ZygoteInit.MethodAndArgsCaller {
        if (DEBUG) Slog.d(TAG, "RuntimeInit: Starting application from zygote");
        Trace.traceBegin(Trace.TRACE_TAG_ACTIVITY_MANAGER, "RuntimeInit");
        redirectLogStreams();
        commonInit();
        nativeZygoteInit();
        applicationInit(targetSdkVersion, argv, classLoader);
    }
从名字看也是做初始化工作。重点来看看applicationInit方法吧,这个方法定义在frameworks\base\core\java\com\android\internal\os\RuntimeInit中:
  private static void applicationInit(int targetSdkVersion, String[] argv, ClassLoader classLoader)
            throws ZygoteInit.MethodAndArgsCaller {
        // If the application calls System.exit(), terminate the process
        // immediately without running any shutdown hooks.  It is not possible to
        // shutdown an Android application gracefully.  Among other things, the
        // Android runtime shutdown hooks close the Binder driver, which can cause
        // leftover running threads to crash before the process actually exits.
        nativeSetExitWithoutCleanup(true);
        // We want to be fairly aggressive about heap utilization, to avoid
        // holding on to a lot of memory that isn't needed.
        VMRuntime.getRuntime().setTargetHeapUtilization(0.75f);
        VMRuntime.getRuntime().setTargetSdkVersion(targetSdkVersion);
        final Arguments args;
        try {
            args = new Arguments(argv);
        } catch (IllegalArgumentException ex) {
            Slog.e(TAG, ex.getMessage());
            // let the process exit
            return;
        }
        // The end of of the RuntimeInit event (see #zygoteInit).
        Trace.traceEnd(Trace.TRACE_TAG_ACTIVITY_MANAGER);
        // Remaining arguments are passed to the start class's static main
        invokeStaticMain(args.startClass, args.startArgs, classLoader);
    }

调用invokeStaticMain方法进一步处理,这个方法还是在RuntimeInit中:
 private static void invokeStaticMain(String className, String[] argv, ClassLoader classLoader)
            throws ZygoteInit.MethodAndArgsCaller {
        Class<?> cl;
        try {
            cl = Class.forName(className, true, classLoader);
        } catch (ClassNotFoundException ex) {
            throw new RuntimeException(
                    "Missing class when invoking static main " + className,
                    ex);
        }
        Method m;
        try {
            m = cl.getMethod("main", new Class[] { String[].class });
        } catch (NoSuchMethodException ex) {
            throw new RuntimeException(
                    "Missing static main on " + className, ex);
        } catch (SecurityException ex) {
            throw new RuntimeException(
                    "Problem getting static main on " + className, ex);
        }
        int modifiers = m.getModifiers();
        if (! (Modifier.isStatic(modifiers) && Modifier.isPublic(modifiers))) {
            throw new RuntimeException(
                    "Main method is not public and static on " + className);
        }
        /*
         * This throw gets caught in ZygoteInit.main(), which responds
         * by invoking the exception's run() method. This arrangement
         * clears up all the stack frames that were required in setting
         * up the process.
         */
        throw new ZygoteInit.MethodAndArgsCaller(m, argv);
    }
这里会抛出异常,也就重新回到了ZygoteInit的main方法中:
            Log.i(TAG, "Accepting command socket connections");
            runSelectLoop(abiList);
            closeServerSocket();
        } catch (MethodAndArgsCaller caller) {
            caller.run();
        } catch (RuntimeException ex) {
            Log.e(TAG, "Zygote died with exception", ex);
            closeServerSocket();
            throw ex;
        }

这个时候会调用caller.run();也就是MethodAndArgsCaller中的run方法:
 public void run() {
            try {
                mMethod.invoke(null, new Object[] { mArgs });
            } catch (IllegalAccessException ex) {
                throw new RuntimeException(ex);
            } catch (InvocationTargetException ex) {
                Throwable cause = ex.getCause();
                if (cause instanceof RuntimeException) {
                    throw (RuntimeException) cause;
                } else if (cause instanceof Error) {
                    throw (Error) cause;
                }
                throw new RuntimeException(ex);
            }
        }
    }
接着调用mMethod.invoke方法,也就是Method类中的Invoke方法,这个方法是一本地方法,这个方法就展看了。根据Activity的启动流程来看,发起创建进程的请求的地方在ActivityManagerService中的startProcessLocked方法中,有如下代码:
            Process.ProcessStartResult startResult = Process.start(entryPoint,
                    app.processName, uid, uid, gids, debugFlags, mountExternal,
                    app.info.targetSdkVersion, app.info.seinfo, requiredAbi, instructionSet,
                    app.info.dataDir, entryPointArgs);

mMethod.invoke方法中的参数mArgs 是有这里传下去的,感兴趣可以追踪下,总之mMethod.invoke会调用ActivityThread中的main方法。
至此,Zygote进程创建子进程的流程便分析结束。
 

猜你喜欢

转载自blog.csdn.net/qingdaohaishanhu/article/details/87896565