【UCB操作系统CS162项目】Pintos Lab2:用户程序 User Programs(下)

在上节中,我们已经完成了 Lab 2 要求的参数传递和系统调用中的 halt, exit 以及向 stdout 输出的 write,最终停在了 wait 的实现之前。本节就先从 wait 和 exec 继续。

Syscall wait + exec:实现父子进程

讲义中 wait 的要求是这样的,相当之长:

在这里插入图片描述

不难读懂,为了实现 wait,我们要先完成父子进程的设计。父进程应该持有一个记录所有子进程信息的列表,但这个列表的成员不能是子进程本身,因为即使子进程已经结束了,父进程应该仍能查询到其信息。所以在 thread.h 中添加一种新的数据结构:

/** Information of a thread's child */
struct child_entry
{
    
    
  tid_t tid;                          /**< Child's tid. */
  struct thread *t;                   /**< Pointer to child thread. Set to NULL when no longer alive. */
  bool is_alive;                      /**< Whether the child is still alive (not exited). */
  int exit_code;                      /**< Child's exit code. */
  bool is_waiting_on;                 /**< Whether the parent is waiting on the child. */
  struct semaphore wait_sema;         /**< Semaphore to let parent wait on the child. */
  struct list_elem elem;
};

这些成员就是父进程需要能获取的关于子进程信息。在 thread 类中:

struct thread
{
    
    
  ...
  struct thread *parent;              /**< Thread's parent. */
  struct list child_list;             /**< Thread's children. Member type is child_entry. */
  struct child_entry *as_child;       /**< Thread itself's child_entry. This will be added 
                                           to its parent's child_list and is heap-allocated 
                                           so that it lives after the thread dies. */
  ...
};

添加一个指向父进程的指针 parent,列表 child_list 和一个指针 as_child。一个进程作为子进程的信息在创建时(thread_create)中使用 malloc 生成,添加到父进程的 child_list 同时存储到自身的 as_child 指针。这样做一个进程就能方便地更新自己向父进程汇报的信息。初始化:

static void
init_thread (struct thread *t, const char *name, int priority)
{
    
    
  ...
  list_init(&t->child_list); // as_child initialization will be done later, see thread_create()
  ...
}

tid_t
thread_create (const char *name, int priority,
               thread_func *function, void *aux) 
{
    
    
  ...
  /* Initialize thread. */
  init_thread (t, name, priority);
  tid = t->tid = allocate_tid ();

  /* Initialize child entry here because we just got a tid. */
  t->as_child = malloc(sizeof(struct child_entry));
  t->as_child->tid = tid;
  t->as_child->t = t;
  t->as_child->is_alive = true;
  t->as_child->exit_code = 0;
  t->as_child->is_waiting_on = false;
  sema_init(&t->as_child->wait_sema, 0);

  /* Link child and parent thread. */
  t->parent = thread_current();
  list_push_back(&t->parent->child_list, &t->as_child->elem);
}

有了这些信息我们就能写出 process_wait() 了:查找自身的 child_list,如果不存在目标子线程,返回 -1;如果存在且子线程未结束,也没有 wait 过,用信号量卡住自身等待子线程运行结束;如果 wait 过,返回 -1;如果已经结束了,返回子线程留下来的 exit_code

int
process_wait (tid_t child_tid) 
{
    
    
  struct thread *t_cur = thread_current();
  
  struct list_elem *e;
  for (e = list_begin (&t_cur->child_list); e != list_end (&t_cur->child_list);
       e = list_next (e))
  {
    
    
    struct child_entry *entry = list_entry(e, struct child_entry, elem);
    if (entry->tid == child_tid) {
    
    
      if (!entry->is_waiting_on && entry->is_alive) {
    
    
        entry->is_waiting_on = true;
        sema_down(&entry->wait_sema); // wait for child process to exit
        return entry->exit_code;
      }
      else if (entry->is_waiting_on) {
    
     // already waiting on child
        return -1;
      }
      else {
    
     // child has terminated, retrieve exit_code
        return entry->exit_code;
      }
    }
  }
  // child_tid is not a child of current process
  return -1;
}

在一个进程结束时,要考虑好其与父子进程的关系。一方面,作为父进程,应该告诉所有它仍存活的子进程自身已退出(将 entry->t->parent 标为 NULL)。另一方面作为子进程,如果发现父进程已经退出了,则 as_child 记录的信息不会再被访问,应释放掉。否则,应该更新 as_child->exit_code(从自身的 exit_code 属性拷贝,因为外部与线程互动时都会设置 threadexit_code);如果父进程正用信号量等待该子进程,up 该信号量;设置 is_alive 为 false 并将指向自身的 t 指针设为 NULL

这样的思路是清晰的,child_entry 的信息既被正确记录,也不会造成资源泄漏。

void
thread_exit (void) 
{
    
    
  ...
  // as a parent, mark the parent of any child that hasn't exited as NULL
  for (e = list_begin (&t_cur->child_list); e != list_end (&t_cur->child_list);
       e = list_next (e))
  {
    
    
    struct child_entry *entry = list_entry(e, struct child_entry, elem);
    if (entry->is_alive) {
    
    
      entry->t->parent = NULL;
    }
  }
  // as a child, if the parent thread has exited, it's ok to free the as_child element
  if (t_cur->parent == NULL) {
    
    
    free(t_cur->as_child);
  } else {
    
     // otherwise, save our status as parent may visit it later (e.g., in wait())
    t_cur->as_child->exit_code = t_cur->exit_code;
    if (t_cur->as_child->is_waiting_on) {
    
    
      sema_up(&t_cur->as_child->wait_sema);
    }
    t_cur->as_child->is_alive = false;
    t_cur->as_child->t = NULL;
  }
  ...
}

在 wait 的 syscall 函数中,调用 process_wait 即可:

static void 
syscall_wait(struct intr_frame *f)
{
    
    
  int pid = *(int *)(f->esp + ptr_size);
  f->eax = process_wait(pid);
}

有了父子进程的基础,exec 的实现也比较自然了,讲义要求:

在这里插入图片描述
显然主体就和主线程的创建一样,调用 process_execute()

static void 
syscall_exec(struct intr_frame *f)
{
    
    
  char *cmd = *(char **)(f->esp + ptr_size);
  f->eax = process_execute(cmd);
}

不过这里有一个新要求是如果子进程没有正常运行起来,父进程要返回 -1。现在的代码中,process_execute() 内用 start_process 创建新线程后就返回了,没有等新线程跑起来;而子进程可能没有正常运行的原因就在 start_process 中:子进程的可执行程序可能因为找不到文件等原因而加载失败。所以要再加一个信号量,让父进程等待:

struct thread
{
    
    
  ...
  struct semaphore sema_exec;         /**< Semaphore for executing (spawning) a new process. 
                                           "UPed" after knowing whether the child has loaded 
                                           its executable successfully. */
  bool exec_success;                  /**< Whether new process successfully loaded its executable. */
  ...
};

tid_t
process_execute (const char *proc_cmd) 
{
    
    
  ...
  /* Create a new thread to execute PROC_CMD. */
  tid = thread_create (proc_name, PRI_DEFAULT, start_process, proc_cmd_copy2);
  ...

  sema_down(&thread_current()->sema_exec);
  if (!thread_current()->exec_success) {
    
    
    return -1;
  }
  thread_current()->exec_success = false; // reset the flag for next spawn

  return tid;
}

子进程加载是否成功,其实之前的代码已经告诉我们了,就是 success 标识。成功失败两种情况为 exec_success 设置相应的值并 up 信号量即可。

static void
start_process (void *proc_cmd_)
{
    
    
  ...
  success = load (proc_name, &if_.eip, &if_.esp);

  /* If load failed, quit. */
  if (!success) {
    
    
    ...
    thread_current()->as_child->is_alive = false;
    thread_current()->exit_code = -1;
    sema_up(&thread_current()->parent->sema_exec);
    thread_exit ();
  }
  ...
  /* load success. */
  thread_current()->parent->exec_success = 1;
  sema_up(&thread_current()->parent->sema_exec);
  ...
}

这一步做完跑一下测试会发现 exec 和 wait 相关的部分测试已经能通过了,但是有 fail 的,比如下面这个:
在这里插入图片描述
这可真是太暴力了…那么让我们休整一下,进入下一部分的实现吧。

迟来的 Task 3: 用户访存(指针)检查

好吧其实讲义上这部分是在 System Call 之前的,但自己做的时候反正博主是愿意先做正常 case 的逻辑能够早点看到效果,再做特殊情况处理。那么现在就是做后者的时间了。

现代操作系统,理论上应该用户再怎么乱折腾也就是自己的程序崩掉,不至于连着操作系统一块带崩(蓝屏警告doge)。这就要求 OS 要有对用户行为,主要就是通过指针的内存访问的检查功能。讲义告诉我们发现非法访存时处理方式也是立刻结束用户进程并释放资源,并提供了两种可选检测方法:

在这里插入图片描述
第一种在访问用户指针的内存前先做合法性检查:地址是否属于用户内存区域(小于 PHYS_BASE)以及地址是否属于当前进程的内存区域;第二种是仅做前者的检查然后就访问,如果不合法会引发 page fault,然后再处理这个异常。后者通常速度更快一些所以更经常被应用在实际系统中,那么我们也走这条路径。

讲义很贴心的给了我们非法访存引发异常后返回错误的方法:利用以下函数,并在 exception.c 中引发异常后将 EAX 的值拷贝到 EIP 中,然后设为 0xffffffff(-1):

/* Reads a byte at user virtual address UADDR.
   UADDR must be below PHYS_BASE.
   Returns the byte value if successful, -1 if a segfault
   occurred. */
static int
get_user (const uint8_t *uaddr)
{
    
    
  int result;
  asm ("movl $1f, %0; movzbl %1, %0; 1:"
       : "=&a" (result) : "m" (*uaddr));
  return result;
}

/* Writes BYTE to user address UDST.
   UDST must be below PHYS_BASE.
   Returns true if successful, false if a segfault occurred. */
static bool
put_user (uint8_t *udst, uint8_t byte)
{
    
    
  int error_code;
  asm ("movl $1f, %0; movb %b2, %1; 1:"
       : "=&a" (error_code), "=m" (*udst) : "q" (byte));
  return error_code != -1;
}

讲义没有告诉我们应该如此做的具体判断条件。来看 exception.c 的代码:


/** Page fault handler.  This is a skeleton that must be filled in
   to implement virtual memory.  Some solutions to project 2 may
   also require modifying this code.

   At entry, the address that faulted is in CR2 (Control Register
   2) and information about the fault, formatted as described in
   the PF_* macros in exception.h, is in F's error_code member.  The
   example code here shows how to parse that information.  You
   can find more information about both of these in the
   description of "Interrupt 14--Page Fault Exception (#PF)" in
   [IA32-v3a] section 5.15 "Exception and Interrupt Reference". */
static void
page_fault (struct intr_frame *f) 
{
    
    
  bool not_present;  /**< True: not-present page, false: writing r/o page. */
  bool write;        /**< True: access was write, false: access was read. */
  bool user;         /**< True: access by user, false: access by kernel. */
  void *fault_addr;  /**< Fault address. */

  /* Obtain faulting address, the virtual address that was
     accessed to cause the fault.  It may point to code or to
     data.  It is not necessarily the address of the instruction
     that caused the fault (that's f->eip).
     See [IA32-v2a] "MOV--Move to/from Control Registers" and
     [IA32-v3a] 5.15 "Interrupt 14--Page Fault Exception
     (#PF)". */
  asm ("movl %%cr2, %0" : "=r" (fault_addr));

  /* Turn interrupts back on (they were only off so that we could
     be assured of reading CR2 before it changed). */
  intr_enable ();

  /* Count page faults. */
  page_fault_cnt++;

  /* Determine cause. */
  not_present = (f->error_code & PF_P) == 0;
  write = (f->error_code & PF_W) != 0;
  user = (f->error_code & PF_U) != 0;


  /* To implement virtual memory, delete the rest of the function
     body, and replace it with code that brings in the page to
     which fault_addr refers. */
  printf ("Page fault at %p: %s error %s page in %s context.\n",
          fault_addr,
          not_present ? "not present" : "rights violation",
          write ? "writing" : "reading",
          user ? "user" : "kernel");
  kill (f);
}

显然异常处理返回应该放在 kill(f) 之前,那么如何判断呢?思考原代码中已经给出的几个 flag,不难发现关键点在于 usersyscall 引起异常处于 kernel 态,而理论上如果 kernel 代码逻辑没有错误的话,其它情况下 kernel 态是不应该产生 page fault 的。所以,如果 user 为 false,一定是因为 syscall 非法访存导致的。这样就找到了判断条件。补全代码如下:

  // user赋值之后:
  
  // The only chance that a page fault happens in kernel context is when dealing 
  // with user-provided pointer through system call, because kernel code shouldn't 
  // produce page faults (if we're writing it right...)
  if (!user) {
    
    
    f->eip = (void (*) (void)) f->eax;
    f->eax = -1;
    return;
  }
  
  kill(f);

顺便读一读 kill() 函数,当异常是用户引起时(例如,直接访问 NULL),不属于 syscall,会走 SEL_UCSEG 的分支直接使线程退出。因为我将线程的 exit_code 初始化为了 0(正常退出),这里应该加一句将 exit_code 改为 -1。

/** Handler for an exception (probably) caused by a user process. */
static void
kill (struct intr_frame *f) 
{
    
    
  /* The interrupt frame's code segment value tells us where the
     exception originated. */
  switch (f->cs)
    {
    
    
    case SEL_UCSEG:
      /* User's code segment, so it's a user exception, as we
         expected.  Kill the user process.  */
      printf ("%s: dying due to interrupt %#04x (%s).\n",
              thread_name (), f->vec_no, intr_name (f->vec_no));
      intr_dump_frame (f);
      thread_current()->exit_code = -1;
      thread_exit (); 
    ...
    }
}

处理好这里,让我们回到 syscall.c,首先将讲义提供的两个函数加上。这两个函数的功能是检查一个 Byte 的读写是否合法。用户提供的指针可能是指向各种大小的数据的,于是写出以下两个函数,实现任意大小数据的指针检查(如果不合法,直接调用 terminate_process,使线程以 -1 的返回值退出):

/** Check if a user-provided pointer is safe to read from. Return the pointer itself if safe, 
 * or call terminate_process() (which do not return) to kill the process with exit_code -1. */
static void * 
check_read_user_ptr(const void *ptr, size_t size)
{
    
    
  if (!is_user_vaddr(ptr)) {
    
    
    terminate_process();
  }
  for (size_t i = 0; i < size; i++) {
    
     // check if every byte is safe to read
    if (get_user(ptr + i) == -1) {
    
    
      terminate_process();
    }
  }
  return (void *)ptr; // remove const
}

/** Check if a user-provided pointer is safe to write to. Return the pointer itself if safe, 
 * or call terminate_process() (which do not return) to kill the process with exit_code -1. */
static void * 
check_write_user_ptr(void *ptr, size_t size)
{
    
    
  if (!is_user_vaddr(ptr)) {
    
    
    terminate_process();
  }
  for (size_t i = 0; i < size; i++) {
    
    
    if (!put_user(ptr + i, 0)) {
    
     // check if every byte is safe to write
      terminate_process();
    }
  }
  return ptr;
}

在解析系统调用参数时,这样使用即可:

static void 
syscall_wait(struct intr_frame *f)
{
    
    
  int pid = *(int *)check_read_user_ptr(f->esp + ptr_size, sizeof(int));
  f->eax = process_wait(pid);
}

其它类型同理。别忘了,入口的系统调用编号也是解引用一个用户指针得到的,也要检查:

static void
syscall_handler (struct intr_frame *f) 
{
    
    
  int syscall_type = *(int *)check_read_user_ptr(f->esp, sizeof(int));
  ...
}

字符串是一种特殊情况,其大小无法通过 sizeof(char *) 中获取,而是以出现 \0 字符为结束标识。为其写一个专用的函数:

/** Check if a user-provided string is safe to read from. Return the string itself if safe, 
 * or call terminate_process() (which do not return) to kill the process with exit_code -1. */
static char * 
check_read_user_str(const char *str)
{
    
    
  if (!is_user_vaddr(str)) {
    
    
    terminate_process();
  }

  uint8_t *_str = (uint8_t *)str;
  while (true) {
    
    
    int c = get_user(_str);
    if (c == -1) {
    
    
      terminate_process();
    } else if (c == '\0') {
    
     // reached the end of str
      return (char *)str; // remove const
    }
    ++_str;
  }
  NOT_REACHED();
}

使用时,注意先检查字符串的指针,再检查字符串本体:

static void 
syscall_exec(struct intr_frame *f)
{
    
    
  char *cmd = *(char **)check_read_user_ptr(f->esp + ptr_size, ptr_size);
  check_read_user_str(cmd);
  f->eax = process_execute(cmd);
}

至此,task 3 用户访存检查任务完成。

Syscall:create, remove, open…:文件系统调用

泪目,Pintos 良心地给了我们一套文件系统的实现,没有让我们从零手写,于是这部分我们主要就是做好线程文件资源的管理,具体的文件操作就是轻松愉悦的调包了(在 filesys.hfile.h 中)。讲义上提到的一些特殊要求,例如打开的文件也能被删除文件系统都帮我们做好了,不用特殊判断。

需要实现的要求之一,是文件系统暂时没有同步机制,需要自己加锁。我把锁放在了 filesys.h 中,所有文件系统相关的调用都要保护,例如:

static void 
syscall_create(struct intr_frame *f)
{
    
    
  char *file_name = *(char **)check_read_user_ptr(f->esp + ptr_size, ptr_size);
  check_read_user_str(file_name);
  unsigned file_size = *(unsigned *)check_read_user_ptr(f->esp + 2 * ptr_size, sizeof(unsigned));

  lock_acquire(&filesys_lock);
  bool res = filesys_create(file_name, file_size);
  f->eax = res;
  lock_release(&filesys_lock);
}

进程需要管理好其打开的文件,每个文件由一个整型 file descriptor 和一个 file 指针描述。FD=0 标识 STDIN,FD=1 标识 STDOUT,其它文件从 2 号开始,分配规则随意,我这里就使其自然增长。定义结构体 file_entry,并在 thread 类中添加以下成员:

/** Information of a thread's opened file */
struct file_entry
{
    
    
  int fd;                             /**< File descriptor. */
  struct file *f;                     /**< Pointer to file. */
  struct list_elem elem;
};

struct thread
{
    
    
  ...
  struct file *exec_file;             /**< The executable file loaded by the thread. Opened upon*/
  struct list file_list;              /**< Files opened by the thread. Member type is file_entry. */
  int next_fd;                        /**< Next file descriptor to be allocated.
  ...
};

exec_file 下面再解释。打开一个文件时,新建一个文件记录,添加到 file_list 中:

static void 
syscall_open(struct intr_frame *f)
{
    
    
  char *file_name = *(char **)check_read_user_ptr(f->esp + ptr_size, ptr_size);
  check_read_user_str(file_name);
  
  lock_acquire(&filesys_lock);
  struct file *opened_file = filesys_open(file_name);
  lock_release(&filesys_lock);

  if (opened_file == NULL) {
    
    
    f->eax = -1;
    return;
  }
  struct thread *t_cur = thread_current();
  struct file_entry *entry = malloc(sizeof(struct file_entry));
  entry->fd = t_cur->next_fd++;
  entry->f = opened_file;
  list_push_back(&t_cur->file_list, &entry->elem);
  f->eax = entry->fd;
}

文件相关系统调用的参数很多是 fd,这些操作都要针对已打开的文件,于是写一个由 fd 查找文件是否打开的函数:

/** Get pointer to a file entry owned by current process by its fd. 
 * Returns NULL if not found. */
static struct file_entry *
get_file(int fd)
{
    
    
  struct thread *t_cur = thread_current();
  struct list_elem *e;
  for (e = list_begin (&t_cur->file_list); e != list_end (&t_cur->file_list);
       e = list_next (e))
  {
    
    
    struct file_entry *entry = list_entry(e, struct file_entry, elem);
    if (entry->fd == fd) {
    
    
      return entry;
    }
  }
  return NULL;
}

各系统调用按照解析参数获取 fd → 根据 fd 获取 file 指针 → 调文件系统函数 → 写入返回值的流程处理即可。read 和 write 中注意 STDINSTDOUT 特殊情况的处理

调用 close 时,从 file_list 中将 file_entry 摘下,调用 file_close 后将 entry 的资源释放。线程退出时,也要把列表中仍存在(未关闭)的文件关闭并释放 entry 块,避免资源泄漏。

void
thread_exit (void) 
{
    
    
  ...
  struct list_elem *e;
  // close remaining opened files
  while (!list_empty(&t_cur->file_list))
  {
    
    
    e = list_pop_front(&t_cur->file_list);
    struct file_entry *entry = list_entry(e, struct file_entry, elem);
    lock_acquire(&filesys_lock);
    file_close(entry->f);
    lock_release(&filesys_lock);
    free(entry);
  }
  ...
}

至此文件系统相关的调用就大体 OK 了,但 Task 5 中还提出了最后一个要求:

  • Add code to deny writes to files in use as executables.
    • Many OSes do this because of the unpredictable results if a process tried to run code that was in the midst of being changed on disk.
    • This is especially important once virtual memory is implemented in project 3, but it can’t hurt even now.

一个进程自己正在跑的可执行文件不应该能被修改,非常合理。于是为每个线程添加一个特殊的文件指针 exec_file,在 start_process 中将自己的可执行文件打开存入该指针,并调用 file_deny_write() 拒绝写入,process_exit 中将其关闭(会自动允许写入):

static void
start_process (void *proc_cmd_)
{
    
    
  ...
  lock_acquire(&filesys_lock);
  struct file *f = filesys_open(proc_name);
  file_deny_write(f);
  lock_release(&filesys_lock);
  thread_current()->exec_file = f;
  ...
}

void
process_exit (void)
{
    
    
  ...
  // close the executable file
  lock_acquire(&filesys_lock);
  file_close(t_cur->exec_file);
  lock_release(&filesys_lock);
  ...
|

恭喜你到达了 Lab 2 的终点! 唯一需要注意的是 multi-oom 这个测试点,会尽可能攻击你的系统直至其资源耗尽(执行也会卡一会)。如果提示执行深度不够过不去,请在整个项目搜索 mallocpalloc_get_page检查你写的代码中是否存在资源泄漏的可能,包括启动进程时分配的那个命令行字符串

在这里插入图片描述
第 100 篇博客留念~

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/Altair_alpha/article/details/127177624
今日推荐