fork | Lantern's 小站

头文件

#include<unistd.h>
#include<sys/types.h>

函数原型

pid_t fork(void)

程序包含位于内存的多个组成部分, 执行程序的过程将根据需要来访问这些内容, 包括文本段（text segment）、数据段（data segments）、栈（stack）和堆（heap）。文本段中存放CPU所执行的命令, 数据段存放进程操作的所有数据变量, 栈存放自动变量和函数数据, 堆存放动态内存分配情况数据。当进程被创建时, 子进程收到父进程的数据副本, 包括数据空间、堆、栈和进程描述符

返回值

若成功调用一次则返回两次, 子进程返回0, 父进程返回子进程ID；否则, 出错返回-1

主要工作

为子进程分配新的 pid, 并通过父进程 PCB（task_struct）创建新的子进程 PCB
检查进程数是否达到上限（分别检查用户限制和系统限制）
拷贝所有的进程信息（打开的文件 / 信号处理 / 进程地址空间等）, 这里需要拷贝的选项由调用 do_fork() 时传入的参数 clone_flags 决定
用父进程的内核栈初始化子进程的内核栈, 设置子进程的返回值为 0（eax = 0）
设置新进程的状态（TASK_RUNNING / TASK_STOPPED）, 调整父子进程调度
父进程 fork 返回子进程的 pid

系统调用流程

Linux 源码地址: fork.c

sys_fork -> _do_fork(SIGCHLD, 0, 0, NULL, NULL, 0) -> copy_process() -> copy_mm(clone_flags, p)

copy_mm

static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
    struct mm_struct *mm, *oldmm;
    ...
    oldmm = current->mm;
    ...
    if (clone_flags & CLONE_VM) {
        mmget(oldmm);
        mm = oldmm;
        goto good_mm;
    }
    retval = -ENOMEM;
    mm = dup_mm(tsk);
    ...
good_mm:
    tsk->mm = mm;
    tsk->active_mm = mm;
    return 0;
fail_nomem:
    return retval;
}

copy_mm 的流程为:

创建 mm_struct* mm, oldmm 结构体指针(内存描述符)

oldmm = current->mm; // oldmm 初始化为父进程的 mm_struct

如果 clone_flags 指定了 CLONE_VM, 对应的 clone() 系统调用创建线程, 则共享父进程的 mm 结构；
mmget(oldmm);
mm = oldmm;
否则属于创建进程需要调用 dup_mm, dup_mm 进而调用 dup_mmap 函数。

dup_mm

先给子进程分配了一个新的结构体, 然后调用 dup_mmap 拷贝父进程地址空间

static struct mm_struct *dup_mm(struct task_struct *tsk,                struct mm_struct *oldmm)
{
    struct mm_struct *mm;
    int err;

    mm = allocate_mm();
    ...
    err = dup_mmap(mm, oldmm);
    ...
}

dup_mmap

先复制父进程每个 vm_area_struct 线性区描述符, 插入到子进程的线性区链表和红黑树中, vm_area_struct 结构如下

struct vm_area_struct
{
    struct mm_struct * vm_mm; // 指向线性区所在的内存描述符
    unsigned long vm_start; // 当前线性区起始地址
    unsigned long vm_end; // 线性区尾地址
    struct vm_area_struct * vm_next; // 下一个线性区
    pgprot_t vm_page_prot; // 线性区访问权限
    struct rb_node vm_rb; // 用于红黑树搜索的节点
}

用 copy_page_range() 创建新的页表, 在新的 vm_area_struct 中链接并复制父进程的页表条目

Linux 从 2.6.11 开始采用四级分页模型, 分别是 pgd、pud、pmd、pte, 所以从 copy_page_range 一直调用到 copy_pte_range 都是拷贝相应的页表条目

copy_page_range

创建新的页表
复制父进程的页表来初始化子进程的新页表私有/可写的页（ VM_SHARED 标志关闭/ VM_MAYWRITE 标志打开）所对应的权限父子进程都设为只读, 以便于 Copy-on-write 机制处理。
其中调用的 copy_one_pte 函数

copy_one_pte

static inline void
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,        pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,        unsigned long addr, int *rss)
{
    unsigned long vm_flags = vma->vm_flags;
    pte_t pte = *src_pte;
    struct page *page;
    ...
    /*
     * If it's a COW mapping, write protect it both
     * in the parent and the child
     */
    if (is_cow_mapping(vm_flags)) {
        ptep_set_wrprotect(src_mm, addr, src_pte);
        pte = pte_wrprotect(pte);
    }
    ...

out_set_pte:
    set_pte_at(dst_mm, addr, dst_pte, pte);
}

上面的代码判断如果父进程的页支持写时复制, 就将父子进程的页都置为写保护, 清除 pte 的_PAGE_BIT_RW 标记。

接着新进程的线性区和页表复制完成, 返回至 copy_process()

copy_process

调用 copy_thread() 用父进程的内核栈来初始化子进程的内核栈

copy_thread()

将 eax 的值强制设置为0 ( fork 系统调用的返回值 )
childregs->eax = 0

sched_fork()

调用 sched_fork() 完成对新进程调度程序数据结构的初始化, 将新进程状态设为 TASK_RUNNING
为了公平起见, 父子进程共享父进程的时间片

返回 do_fork()

进程创建完成, 返回至 do_fork()

如果设置 CLONE_STOPPED, 就将子进程设置 TASK_STOPPED 状态并挂起；
否则调用 wake_up_new_task() 调整父子进程的调度参数；

wake_up_new_task()

如果父子进程运行在同一个 cpu 上, 并且不能共享同一组页表 (CLONE_VM 位为 0), 就把子进程插入运行队列中的父进程之前；
如果子进程创建之后调用 exec 执行新程序, 就可以避免写时拷贝机制执行不必要的页面复制；
否则, 如果运行在不同的cpu上, 或父子共享同一组页表, 就将子进程插入运行队列的队尾。

返回至 do_fork()

返回子进程的 pid

示例

单子进程创建

代码

/* 单子进程创建 */

#include <unistd.h>
#include <sys/wait.h>
#include <stdio.h>
#include <time.h>
#include <stdarg.h>

int tPrint (const char * fmt, ...);

int main(void) {
    pid_t pid;
    printf("Hello from Parent Process, PID is %d.\n", getpid());

    pid = fork(); //创建子进程

    if (pid == 0) {
        sleep(1);
        for (int k = 0; k < 3; ++k) {
            printf("Hello from Child Process %d. %d times\n", getpid(), k + 1);
        }
    } else if ( pid != -1) {
        tPrint("Parent process forked one child process--%d.\n", pid);
        tPrint("Parent process is waiting for child process to exit, \n");
        waitpid(pid, NULL, 0);
        tPrint("Child Process has exited.\n");
        tPrint("Parent process had exited.\n");
    } else {
        tPrint("Everything was done without error.\n");
    }

    return 0;
}

// 对输出信息进行优化
int tPrint (const char *fmt, ...) {
    va_list args;
    struct tm *tStruct;
    time_t tSec;
    tSec = time(NULL);
    tStruct = localtime(&tSec);
    printf("%02d:%02d:%02d: %5d|", tStruct->tm_hour, tStruct->tm_min, tStruct->tm_sec, getpid());
    va_start(args, fmt);
    return vprintf(fmt, args);
}

运行结果

流程图分析

单进程.png

用循环创建两（多）个子进程

代码1——预测错误

/* 多进程创建 */
#include <unistd.h>
#include <sys/wait.h>
#include <stdio.h>
#include <time.h>
#include <stdarg.h>

int tPrint (const char * fmt, ...);

int main(void) {
    pid_t pid = 0;
    printf("Hello from Parent Process, PID is %d.\n", getpid());

    for (int i = 0; i < 2; ++i) {
        pid = fork();
        if (pid != 0 && pid != -1) {
            tPrint("Parent process forked one child process--%d.\n", pid);
            tPrint("Parent process is waiting for child process to exit, \n");
        }
    }

    if (pid == 0) {
        sleep(1);
        printf("Hello from Child Process %d.\n", getpid());
    } else if ( pid != -1) {
        waitpid(pid, NULL, 0);
        tPrint("Child Process has exited.\n");
        tPrint("Parent Process has exited.\n");
    } else {
        tPrint("Everything was done without error.\n");
    }



    return 0;
}

int tPrint (const char *fmt, ...) {
    va_list args;
    struct tm *tStruct;
    time_t tSec;
    tSec = time(NULL);
    tStruct = localtime(&tSec);
    printf("%02d:%02d:%02d: %5d|", tStruct->tm_hour, tStruct->tm_min, tStruct->tm_sec, getpid());
    va_start(args, fmt);
    return vprintf(fmt, args);
}

运行结果

分析错误原因

看似只产生了两个子进程, 实际上产生了三个子进程。

fork() 函数是将父进程的数据副本进行复制, 包括数据空间、堆、栈和进程描述符, 所以第一个子进程还在循环之中, 此时 i = 0, i++ 后 再一次运行fork() 创建一个子进程:

PID
2342(Parent) -> 2343(Child1)
2343(Child1) -> 2344(Child3)
2142(Parent) -> 2145(Child2)

代码2

由于fork函数会返回两次, 在子进程中返回0值, 因此, 可以在循环中添加if (pid == 0 || pid == -1) break;则当第一个子进程创建后, 由于pid == 0则退出循环


#include <unistd.h>
#include <sys/wait.h>
#include <stdio.h>
#include <time.h>
#include <stdarg.h>

int tPrint (const char * fmt, ...);

int main(void) {
    pid_t pid = 0;
    printf("Hello from Parent Process, PID is %d.\n", getpid());

    for (int i = 0; i < 2; ++i) {
        pid = fork();
        if (pid != 0 && pid != -1) {
            tPrint("Parent process forked one child process--%d.\n", pid);
            tPrint("Parent process is waiting for child process to exit, \n");
        }
        if (pid == 0 || pid == -1) break;
    }

    if (pid == 0) {
        sleep(1);
        printf("Hello from Child Process %d.\n", getpid());
    } else if ( pid != -1) {
        waitpid(pid, NULL, 0);
        tPrint("Child Process has exited.\n");
        tPrint("Parent Process has exited.\n");
    } else {
        tPrint("Everything was done without error.\n");
    }



    return 0;
}

int tPrint (const char *fmt, ...) {
    va_list args;
    struct tm *tStruct;
    time_t tSec;
    tSec = time(NULL);
    tStruct = localtime(&tSec);
    printf("%02d:%02d:%02d: %5d|", tStruct->tm_hour, tStruct->tm_min, tStruct->tm_sec, getpid());
    va_start(args, fmt);
    return vprintf(fmt, args);
}