do_fork浅析

Posted by Max on November 21, 2016
do_fork浅析 一般创建一个进程我们需要调用fork函数,fork其实又是调用了clone函数来实现的,而clone函数中最关键的函数就是do_fork函数。

1. do_fork()

do_fork定义在kernel/fork.c文件中,一言不合上代码——
/*
*  Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
* it and waits for it to finish using the VM if required.
*/
long _do_fork(unsigned long clone_flags,  //每比特都代表对子进程task_struct中的每种属性的设置
          unsigned long stack_start,  //子进程用户态堆栈的开始地址  
          unsigned long stack_size,   //未使用,为0
          int __user *parent_tidptr,  //父进程在用户态下pid的地址
          int __user *child_tidptr,   //子进程在用户态下pid的地址
          unsigned long tls)          //TLS(Thread Local Storage)机制下用于设置线程的本地存储区
{
    struct task_struct *p;
    int trace = 0;
    long nr;

    /*
     * Determine whether and which event to report to ptracer.  When
     * called from kernel_thread or CLONE_UNTRACED is explicitly
     * requested, no event is reported; otherwise, report if the event
     * for the type of forking is enabled.
     */
    if (!(clone_flags & CLONE_UNTRACED)) {
            if (clone_flags & CLONE_VFORK)
                    trace = PTRACE_EVENT_VFORK;
            else if ((clone_flags & CSIGNAL) != SIGCHLD)
                    trace = PTRACE_EVENT_CLONE;
            else
                    trace = PTRACE_EVENT_FORK;

            if (likely(!ptrace_event_enabled(current, trace)))
                    trace = 0;
    }

    p = copy_process(clone_flags, stack_start, stack_size,
                     child_tidptr, NULL, trace, tls, NUMA_NO_NODE);
    /*
     * Do this prior waking up the new thread - the thread pointer
     * might get invalid after that point, if the thread exits quickly.
     */
    if (!IS_ERR(p)) {
            struct completion vfork;
            struct pid *pid;

            //调整父进程和子进程的调度参数???
            trace_sched_process_fork(current, p);

            //取出 task 结构体内的 pid
            pid = get_task_pid(p, PIDTYPE_PID);
            nr = pid_vnr(pid);

            //????
            if (clone_flags & CLONE_PARENT_SETTID)
                    put_user(nr, parent_tidptr);

            //如果使用的是 vfork,初始化 vfork 完成处理信息。
            if (clone_flags & CLONE_VFORK) {
                    p->vfork_done = &vfork;
                    init_completion(&vfork);
                    get_task_struct(p);
            }

            //将子进程加入到调度器中,为其分配 CPU,准备执行
            wake_up_new_task(p);

            //fork 完成,子进程即将开始运行,告诉ptracer
            if (unlikely(trace))
                    ptrace_event_pid(trace, pid);

            //如果是 vfork,将父进程加入至等待队列,等待子进程完成
            if (clone_flags & CLONE_VFORK) {
                    if (!wait_for_vfork_done(p, &vfork))
                            ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
            }

            put_pid(pid);
    } else {
            nr = PTR_ERR(p);
    }
    return nr;
}
主要流程可以概括为: 1. 调用 copy_process 为子进程复制出一份进程信息 2. 如果是 vfork 初始化完成处理信息 3. 调用 wake_up_new_task 将子进程加入调度器,为之分配 CPU 4. 如果是 vfork,父进程等待子进程完成 exec 替换自己的地址空间

2. copy_process()

以下是copy_process()的一些实现细节。
/*
* This creates a new process as a copy of the old one,
* but does not actually start it yet.
*
* It copies the registers, and all the appropriate
* parts of the process environment (as per the clone
* flags). The actual kick-off is left to the caller.
*/
static struct task_struct *copy_process(unsigned long clone_flags,
                                    unsigned long stack_start,
                                    unsigned long stack_size,
                                    int __user *child_tidptr,
                                    struct pid *pid,
                                    int trace,
                                    unsigned long tls,
                                    int node)
{
    int retval;
    struct task_struct *p;  //创建进程描述符指针

    //检查clone_flags 所传标志的一致性
    if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
            return ERR_PTR(-EINVAL);

    if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
            return ERR_PTR(-EINVAL);

    /*
     * Thread groups must share signals as well, and detached threads
     * can only be started up within the thread group.
     */
    if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
            return ERR_PTR(-EINVAL);

    /*
     * Shared signal handlers imply shared VM. By way of the above,
     * thread groups also imply shared VM. Blocking this case allows
     * for various simplifications in other code.
     */
    if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
            return ERR_PTR(-EINVAL);

    /*
     * Siblings of global init remain as zombies on exit since they are
     * not reaped by their parent (swapper). To solve this and to avoid
     * multi-rooted process trees, prevent global and container-inits
     * from creating siblings.
     */
    if ((clone_flags & CLONE_PARENT) &&
                            current->signal->flags & SIGNAL_UNKILLABLE)
            return ERR_PTR(-EINVAL);

    /*
     * If the new process will be in a different pid or user namespace
     * do not allow it to share a thread group with the forking task.
     */
    if (clone_flags & CLONE_THREAD) {
            if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
                (task_active_pid_ns(current) !=
                            current->nsproxy->pid_ns_for_children))
                    return ERR_PTR(-EINVAL);
    }

    //安全检查
    retval = security_task_create(clone_flags);
    if (retval)
            goto fork_out;

    //复制当前的 task_struct
    retval = -ENOMEM;
    p = dup_task_struct(current, node);
    if (!p)
            goto fork_out;

    //???
    ftrace_graph_init_task(p);
    //初始化互斥变量
    rt_mutex_init_task(p);

    //检查进程数是否超过限制,由操作系统定义
    retval = -EAGAIN;
    if (atomic_read(&p->real_cred->user->processes) >=
                    task_rlimit(p, RLIMIT_NPROC)) {
            if (p->real_cred->user != INIT_USER &&
                !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
                    goto bad_fork_free;
    }
    current->flags &= ~PF_NPROC_EXCEEDED;

    retval = copy_creds(p, clone_flags);
    if (retval < 0)
            goto bad_fork_free;

    /*
     * If multiple threads are within copy_process(), then this check
     * triggers too late. This doesn't hurt, the check is only there
     * to stop root fork bombs.
     */
    //检查进程数是否超过 max_threads,由内存大小决定
    retval = -EAGAIN;
    if (nr_threads >= max_threads)
            goto bad_fork_cleanup_count;

    delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */
    p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
    p->flags |= PF_FORKNOEXEC;
    INIT_LIST_HEAD(&p->children);
    INIT_LIST_HEAD(&p->sibling);
    rcu_copy_process(p);
    p->vfork_done = NULL;
    spin_lock_init(&p->alloc_lock);  //初始化自旋锁

    init_sigpending(&p->pending);    //初始化挂起信号

    p->utime = p->stime = p->gtime = 0;
    p->utimescaled = p->stimescaled = 0;
    prev_cputime_init(&p->prev_cputime);  //初始化 CPU 定时器

    p->default_timer_slack_ns = current->timer_slack_ns;

    task_io_accounting_init(&p->ioac);
    acct_clear_integrals(p);

    posix_cpu_timers_init(p);

    p->start_time = ktime_get_ns();
    p->real_start_time = ktime_get_boot_ns();
    p->io_context = NULL;
    p->audit_context = NULL;
    cgroup_fork(p);

    p->pagefault_disabled = 0;

    /* Perform scheduler related setup. Assign this task to a CPU. */
    //完成对新进程调度程序数据结构的初始化,并把新进程的状态设置为TASK_RUNNING
    retval = sched_fork(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_policy;

    //复制所有进程信息,包括文件系统、信号处理函数、信号、内存管理等
    retval = perf_event_init_task(p);
    if (retval)
            goto bad_fork_cleanup_policy;
    retval = audit_alloc(p);
    if (retval)
            goto bad_fork_cleanup_perf;
    /* copy all the process information */
    shm_init_task(p);
    retval = copy_semundo(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_audit;
    retval = copy_files(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_semundo;
    retval = copy_fs(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_files;
    retval = copy_sighand(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_fs;
    retval = copy_signal(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_sighand;
    retval = copy_mm(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_signal;
    retval = copy_namespaces(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_mm;
    retval = copy_io(clone_flags, p);
    if (retval)
            goto bad_fork_cleanup_namespaces;
    /*
     * 初始化子进程内核栈。
     * linux-4.2新增处理TLS,之前版本是retval = copy_thread(clone_flags, stack_start, stack_size, p);
     */
    retval = copy_thread_tls(clone_flags, stack_start, stack_size, p, tls);
    if (retval)
            goto bad_fork_cleanup_io;

    //为新进程分配新的 pid
    if (pid != &init_struct_pid) {
            pid = alloc_pid(p->nsproxy->pid_ns_for_children);
            if (IS_ERR(pid)) {
                    retval = PTR_ERR(pid);
                    goto bad_fork_cleanup_thread;
            }
    }

    p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
    /*
     * Clear TID on mm_release()?
     */
    p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;

    /*
     * sigaltstack should be cleared when sharing the same VM
     */
    if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
            sas_ss_reset(p);

    /*
     * Syscall tracing and stepping should be turned off in the
     * child regardless of CLONE_PTRACE.
     */
    user_disable_single_step(p);
    clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

    clear_all_latency_tracing(p);

    /* ok, now we should be set up.. */
    //设置子进程 pid
    p->pid = pid_nr(pid);
    if (clone_flags & CLONE_THREAD) {
            p->exit_signal = -1;
            p->group_leader = current->group_leader;
            p->tgid = current->tgid;
    } else {
            if (clone_flags & CLONE_PARENT)
                    p->exit_signal = current->group_leader->exit_signal;
            else
                    p->exit_signal = (clone_flags & CSIGNAL);
            p->group_leader = p;
            p->tgid = p->pid;
    }

    p->nr_dirtied = 0;
    p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
    p->dirty_paused_when = 0;

    p->pdeath_signal = 0;
    INIT_LIST_HEAD(&p->thread_group);
    p->task_works = NULL;

    threadgroup_change_begin(current);
    /*
     * Ensure that the cgroup subsystem policies allow the new process to be
     * forked. It should be noted the the new process's css_set can be changed
     * between here and cgroup_post_fork() if an organisation operation is in
     * progress.
     */
    retval = cgroup_can_fork(p);
    if (retval)
            goto bad_fork_free_pid;

    /*
     * Make it visible to the rest of the system, but dont wake it up yet.
     * Need tasklist lock for parent etc handling!
     */
    write_lock_irq(&tasklist_lock);

    /* CLONE_PARENT re-uses the old parent */
    //调用fork的进程为其父进程
    if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
            p->real_parent = current->real_parent;
            p->parent_exec_id = current->parent_exec_id;
    } else {
            p->real_parent = current;
            p->parent_exec_id = current->self_exec_id;
    }

    spin_lock(&current->sighand->siglock);

    /*
     * Copy seccomp details explicitly here, in case they were changed
     * before holding sighand lock.
     */
    copy_seccomp(p);

    /*
     * Process group and session signals need to be delivered to just the
     * parent before the fork or both the parent and the child after the
     * fork. Restart if a signal comes in before we add the new process to
     * it's process group.
     * A fatal signal pending means that current will exit, so the new
     * thread can't slip out of an OOM kill (or normal SIGKILL).
    */
    recalc_sigpending();
    if (signal_pending(current)) {
            spin_unlock(&current->sighand->siglock);
            write_unlock_irq(&tasklist_lock);
            retval = -ERESTARTNOINTR;
            goto bad_fork_cancel_cgroup;
    }

    if (likely(p->pid)) {
            ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);

            init_task_pid(p, PIDTYPE_PID, pid);
            if (thread_group_leader(p)) {
                    init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
                    init_task_pid(p, PIDTYPE_SID, task_session(current));

                    if (is_child_reaper(pid)) {
                            ns_of_pid(pid)->child_reaper = p;
                            p->signal->flags |= SIGNAL_UNKILLABLE;
                    }

                    p->signal->leader_pid = pid;
                    p->signal->tty = tty_kref_get(current->signal->tty);
                    list_add_tail(&p->sibling, &p->real_parent->children);
                    list_add_tail_rcu(&p->tasks, &init_task.tasks);
                    attach_pid(p, PIDTYPE_PGID);
                    attach_pid(p, PIDTYPE_SID);
                    __this_cpu_inc(process_counts);
            } else {
                    current->signal->nr_threads++;
                    atomic_inc(&current->signal->live);
                    atomic_inc(&current->signal->sigcnt);
                    list_add_tail_rcu(&p->thread_group,
                                      &p->group_leader->thread_group);
                    list_add_tail_rcu(&p->thread_node,
                                      &p->signal->thread_head);
            }
            attach_pid(p, PIDTYPE_PID);
            nr_threads++;
    }

    total_forks++;
    spin_unlock(&current->sighand->siglock);
    syscall_tracepoint_update(p);
    write_unlock_irq(&tasklist_lock);

    proc_fork_connector(p);
    cgroup_post_fork(p);
    threadgroup_change_end(current);
    perf_event_fork(p);

    trace_task_newtask(p, clone_flags);
    uprobe_copy_process(p, clone_flags);

    return p;

bad_fork_cancel_cgroup:
    cgroup_cancel_fork(p);
bad_fork_free_pid:
    threadgroup_change_end(current);
    if (pid != &init_struct_pid)
            free_pid(pid);
bad_fork_cleanup_thread:
    exit_thread(p);
bad_fork_cleanup_io:
    if (p->io_context)
            exit_io_context(p);
bad_fork_cleanup_namespaces:
    exit_task_namespaces(p);
bad_fork_cleanup_mm:
    if (p->mm)
            mmput(p->mm);
bad_fork_cleanup_signal:
    if (!(clone_flags & CLONE_THREAD))
            free_signal_struct(p->signal);
bad_fork_cleanup_sighand:
    __cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
    exit_fs(p); /* blocking */
bad_fork_cleanup_files:
    exit_files(p); /* blocking */
bad_fork_cleanup_semundo:
    exit_sem(p);
bad_fork_cleanup_audit:
    audit_free(p);
bad_fork_cleanup_perf:
    perf_event_free_task(p);
bad_fork_cleanup_policy:
#ifdef CONFIG_NUMA
    mpol_put(p->mempolicy);
bad_fork_cleanup_threadgroup_lock:
#endif
    delayacct_tsk_free(p);
bad_fork_cleanup_count:
    atomic_dec(&p->cred->user->processes);
    exit_creds(p);
bad_fork_free:
    free_task(p);
fork_out:
    return ERR_PTR(retval);
}
主要流程可以概括为 1. 调用 dup_task_struct 复制当前的 task_struct 2. 检查进程数是否超过限制 3. 初始化自旋锁、挂起信号、CPU 定时器等 4. 调用 sched_fork 初始化进程数据结构,并把进程状态设置为 TASK_RUNNING 5. 复制所有进程信息,包括文件系统、信号处理函数、信号、内存管理等 6. 调用 copy_thread_tls 初始化子进程内核栈 7. 为新进程分配并设置新的 pid

3. dup_task_struct()

static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
{
    struct task_struct *tsk;
    unsigned long *stack;
    int err;

    //分配一个 task_struct 节点
    if (node == NUMA_NO_NODE)
            node = tsk_fork_get_node(orig);
    tsk = alloc_task_struct_node(node);
    if (!tsk)
            return NULL;

    //分配一个 thread_info 节点,包含进程的内核栈,stack 为栈底
    stack = alloc_thread_stack_node(tsk, node);
    if (!stack)
            goto free_tsk;

    err = arch_dup_task_struct(tsk, orig);
    if (err)
            goto free_stack;

    //将栈底的值赋给新节点的栈
    tsk->stack = stack;

    setup_thread_stack(tsk, orig);
    clear_user_return_notifier(tsk);
    clear_tsk_need_resched(tsk);
    set_task_stack_end_magic(tsk);

    /*
     * One for us, one for whoever does the "release_task()" (usually
     * parent)
     */
    atomic_set(&tsk->usage, 2);

    tsk->splice_pipe = NULL;
    tsk->task_frag.page = NULL;
    tsk->wake_q.next = NULL;

    account_kernel_stack(stack, 1);

    kcov_task_init(tsk);

    return tsk;

free_stack:
    free_thread_stack(stack);
free_tsk:
    free_task_struct(tsk);
    return NULL;
}
主要流程可以概括为 1. 调用alloc_task_struct_node分配一个 task_struct 节点 2. 调用alloc_thread_info_node分配一个 thread_info 节点,其实是分配了一个thread_union联合体,将栈底返回给 stack ``` union thread_union { struct thread_info thread_info; unsigned long stack[THREAD_SIZE/sizeof(long)]; }; `` 3. 最后将栈底的值 stack 赋值给新节点的栈 可以看到,最终执行完dup_task_struct之后,子进程除了tsk->stack指针不同之外,全部都一样!

4. sched_fork()

该函数定义见kernel/sched/core.c
/*
* fork()/clone()-time setup:
*/
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
    unsigned long flags;
    int cpu = get_cpu();

    __sched_fork(clone_flags, p);
    /*
     * We mark the process as NEW here. This guarantees that
     * nobody will actually run it, and a signal or other external
     * event cannot wake it up and insert it on the runqueue either.
     */
    // 设置子进程状态,旧版本为TASK_RUNNING
    p->state = TASK_NEW;

    /*
     * Make sure we do not leak PI boosting priority to the child.
     */
    p->prio = current->normal_prio;

    /*
     * Revert to default priority/policy on fork if requested.
     */
    if (unlikely(p->sched_reset_on_fork)) {
            if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
                    p->policy = SCHED_NORMAL;
                    p->static_prio = NICE_TO_PRIO(0);
                    p->rt_priority = 0;
            } else if (PRIO_TO_NICE(p->static_prio) < 0)
                    p->static_prio = NICE_TO_PRIO(0);

            p->prio = p->normal_prio = __normal_prio(p);
            set_load_weight(p);

            /*
             * We don't need the reset flag anymore after the fork. It has
             * fulfilled its duty:
             */
            p->sched_reset_on_fork = 0;
    }

    if (dl_prio(p->prio)) {
            put_cpu();
            return -EAGAIN;
    } else if (rt_prio(p->prio)) {
            p->sched_class = &rt_sched_class;
    } else {
            p->sched_class = &fair_sched_class;
    }

    init_entity_runnable_average(&p->se);

    /*
     * The child is not yet in the pid-hash so no cgroup attach races,
     * and the cgroup is pinned to this child due to cgroup_fork()
     * is ran before sched_fork().
     *
     * Silence PROVE_RCU.
     */
    raw_spin_lock_irqsave(&p->pi_lock, flags);
    /*
     * We're setting the cpu for the first time, we don't migrate,
     * so use __set_task_cpu().
     */
    //为子进程分配 CPU
    __set_task_cpu(p, cpu);
    if (p->sched_class->task_fork)
            p->sched_class->task_fork(p);
    raw_spin_unlock_irqrestore(&p->pi_lock, flags);

    init_task_preempt_count(p);

    put_cpu();
    return 0;
}
sched_fork大致完成了两项重要工作 1. 将子进程状态设置为 TASK_NEW/TASK_RUNNING, 2. 为子进程分配 CPU

5. copy_thread_tls()

Linux-4.2之后增加了copy_thread_tls()和CONFIG_HAVE_COPY_THREAD_TLS宏,但是如果未定义CONFIG_HAVE_COPY_THREAD_TLS宏默认则使用copy_thread同时将定义copy_thread_tls为copy_thread。可以在include/linux/sched.h
#ifdef CONFIG_HAVE_COPY_THREAD_TLS
extern int copy_thread_tls(unsigned long, unsigned long, unsigned long,
                    struct task_struct *, unsigned long);
#else
extern int copy_thread(unsigned long, unsigned long, unsigned long,
                    struct task_struct *);

/* Architectures that haven't opted into copy_thread_tls get the tls argument
* via pt_regs, so ignore the tls argument passed via C. */
static inline int copy_thread_tls(
            unsigned long clone_flags, unsigned long sp, unsigned long arg,
            struct task_struct *p, unsigned long tls)
{
    return copy_thread(clone_flags, sp, arg, p);
}
#endif
按架构,其实现分别位于arch/x86/kernel/process_32.carch/x86/kernel/process_64.c。以64位架构为例,上代码
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
            unsigned long arg, struct task_struct *p, unsigned long tls)
{
    int err;
    struct pt_regs *childregs;
    struct task_struct *me = current;

    //获取寄存器的信息
    p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
    childregs = task_pt_regs(p);
    p->thread.sp = (unsigned long) childregs;
    set_tsk_thread_flag(p, TIF_FORK);
    p->thread.io_bitmap_ptr = NULL;

    savesegment(gs, p->thread.gsindex);
    p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
    savesegment(fs, p->thread.fsindex);
    p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
    savesegment(es, p->thread.es);
    savesegment(ds, p->thread.ds);
    memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));

    if (unlikely(p->flags & PF_KTHREAD)) {
            /* kernel thread */
            //内核线程的设置
            memset(childregs, 0, sizeof(struct pt_regs));
            childregs->sp = (unsigned long)childregs;
            childregs->ss = __KERNEL_DS;
            childregs->bx = sp; /* function */
            childregs->bp = arg;
            childregs->orig_ax = -1;
            childregs->cs = __KERNEL_CS | get_kernel_rpl();
            childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
            return 0;
    }
    //将当前寄存器信息复制给子进程
    *childregs = *current_pt_regs();

    //子进程 eax 置 0,因此fork 在子进程返回0
    childregs->ax = 0;
    if (sp)
            childregs->sp = sp;

    err = -ENOMEM;
    if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
            p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
                                              IO_BITMAP_BYTES, GFP_KERNEL);
            if (!p->thread.io_bitmap_ptr) {
                    p->thread.io_bitmap_max = 0;
                    return -ENOMEM;
            }
            set_tsk_thread_flag(p, TIF_IO_BITMAP);
    }

    /*
     * Set a new TLS for the child thread?
     */
    if (clone_flags & CLONE_SETTLS) {
                    err = do_arch_prctl(p, ARCH_SET_FS, tls);
            if (err)
                    goto out;
    }
    err = 0;
out:
    if (err && p->thread.io_bitmap_ptr) {
            kfree(p->thread.io_bitmap_ptr);
            p->thread.io_bitmap_max = 0;
    }

    return err;
}

6. 总结

新进程的创建可以大致分为以下几个阶段: 1. do_fork 系统内核调用 2. copy_process 复制父进程的所有信息给子进程 1. dup_task_struct中为其分配了新的堆栈 2. 调用了sched_fork,将其置为TASK_RUNNING 3. copy_thread_tls中将父进程的寄存器上下文复制给子进程,保证了父子进程的堆栈信息是一致的 4. 将ret_from_fork的地址设置为eip寄存器的值 5. 最终子进程从ret_from_fork开始执行 进程状态图示进程状态流程

7. 参考

  1. Linux下进程的创建过程分析(_do_fork/do_fork详解)--AderStep
  2. Linux内核源码