xv6中断处理浅析

前言

　　xv6是x86处理器上用ANSI标准C重新实现的Unix第六版(Unix V6，通常直接被称为V6)，本文主要对其中断处理过程进行分析。中断也是需要硬件和内核合作完成的，我也将从这两部分进行分析。

中断

　　上文xv6内存管理与进程调度过程浅析提到，在内核初始化完毕以后，开始进入无限循环的进程调度状态，我们再回顾一下scheduler函数。

//PAGEBREAK: 42
// Per-CPU process scheduler.
// Each CPU calls scheduler() after setting itself up.
// Scheduler never returns.  It loops, doing:
//  - choose a process to run
//  - swtch to start running that process
//  - eventually that process transfers control
//      via swtch back to the scheduler.
void
scheduler(void)
{
  struct proc *p;
  for(;;){
    // Enable interrupts on this processor.
    sti();
    // Loop over process table looking for process to run.
    acquire(&ptable.lock);
    for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){
      if(p->state != RUNNABLE)
        continue;
      // Switch to chosen process.  It is the process's job
      // to release ptable.lock and then reacquire it
      // before jumping back to us.
      proc = p;
      switchuvm(p);
      p->state = RUNNING;
      swtch(&cpu->scheduler, p->context);
      switchkvm();
      // Process is done running for now.
      // It should have changed its p->state before coming back.
      proc = 0;
    }
    release(&ptable.lock);
  }
}

　　通常认为，内核调用swtch以后，就进入某个进程的用户态执行阶段，作为系统的主要执行生命周期。中断，就是从这个主要执行周期中强制暂停，从用户态重新返回内核，然后内核执行相应的代码的过程。上面代码之所以能从swtch到switchkvm，就是因为中断把控制从用户进程重新交还给内核。

　　中断是一个统一的称呼，实际上可以分成3种类型：

系统调用
硬件中断
异常

　　系统调用是用户进程主动发起中断，把控制权交还给内核，并指定执行某个处理逻辑。由于操作系统要提供足够的隔离性，只有内核才有足够的权限直接控制硬件，或者是执行一些权限比较高的指令。内核对外只提供了部分的接口，用户进程只能通过调用这些接口才能访问硬件。用户进程调用这些内核提供的接口，就是发起系统调用的过程。

　　由于硬件是并行运作的，当CPU在不停地执行指令的时候，像磁盘、键盘、显示器这些外部硬件也会同时执行自己的工作，当CPU执行了某条硬件指令的时候，不会一直等待这些指令返回才继续执行，而是通过中断让这些硬件在就绪的时候通知CPU。这个过程叫做硬件中断。

　　软件也有可能发生一些错误，例如除0，或者访问了没有权限的内存空间，就会抛出异常。异常处理在内核中也是通过中断来实现的。

硬件处理中断

　　xv6将不同的中断进行分类，每个中断都对应了特定的中断处理函数。当中断发生的时候，硬件会做一系列的操作来准备内核的执行环境。内核初始化的时候会在内存设置一个中断函数表（IDT），把每个中断号对应的函数做好映射，那么在中断发生的时候知道执行那个中断处理函数。同时CPU有一个tr寄存器，保存了当前进程的任务状态，即TSS（task state segment），这个状态一个比较重要的字段就是内核栈的ss和esp寄存器内容，这样在中断的时候就知道要加载哪个内核栈了。这个TSS是在switchuvm，内存页表切换中设置的：

// Switch TSS and h/w page table to correspond to process p.
void
switchuvm(struct proc *p)
{
  if(p == 0)
    panic("switchuvm: no process");
  if(p->kstack == 0)
    panic("switchuvm: no kstack");
  if(p->pgdir == 0)
    panic("switchuvm: no pgdir");
  pushcli();
  cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0);
  cpu->gdt[SEG_TSS].s = 0;
  cpu->ts.ss0 = SEG_KDATA << 3;
  cpu->ts.esp0 = (uint)p->kstack + KSTACKSIZE;
  // setting IOPL=0 in eflags *and* iomb beyond the tss segment limit
  // forbids I/O instructions (e.g., inb and outb) from user space
  cpu->ts.iomb = (ushort) 0xFFFF;
  ltr(SEG_TSS << 3);
  lcr3(V2P(p->pgdir));  // switch to process's address space
  popcli();
}

　　当中断发生的时候，如果进程正在用户态执行，那么会加载TSS中内核栈的ss和esp，保存当前用户栈的ss和esp保存在新栈中。如果当前已经在内核态了，那么就不需要保存用户态的栈上下文。然后会保存eflags, cs, eip寄存器的值在内核栈中。如果是异常的话，那么还有一个error code需要保存。然后cpu会从IDT中加载相应中断的eip和cs。此时进入IDT的中断处理函数。

内核处理中断

保护现场

　　xv6的IDT中断处理函数会判断当前中断是否硬件已经push了一个error code到栈里，否则会把0 push进去，作为占位。然后把当前中断号也push到栈中。然后跳转到相应的中断处理函数中。xv6的中断处理实际上让所有的IDT中的中断处理函数都指向同一个函数，处理一些中断需要的共同逻辑。处理完共同逻辑后，然后是通过栈中的中断号，再进入各自的中断处理逻辑中。

　　所有中断的IDT处理函数在push了中断号之后，都会执行alltrap函数。

 # vectors.S sends all traps here.
.globl alltraps
alltraps:
  # Build trap frame.
  pushl %ds
  pushl %es
  pushl %fs
  pushl %gs
  pushal
  
  # Set up data and per-cpu segments.
  movw $(SEG_KDATA<<3), %ax
  movw %ax, %ds
  movw %ax, %es
  movw $(SEG_KCPU<<3), %ax
  movw %ax, %fs
  movw %ax, %gs
  # Call trap(tf), where tf=%esp
  pushl %esp
  call trap
  addl $4, %esp
  # Return falls through to trapret...
.globl trapret
trapret:
  popal
  popl %gs
  popl %fs
  popl %es
  popl %ds
  addl $0x8, %esp  # trapno and errcode
  iret

　　alltraps函数就是保护现场，把所有剩下的寄存器都入栈，然后把当前esp入栈，作为参数传递给之后执行的trap函数。这个参数就是trapframe结构体了。如果是系统调用的话，那么eax保存了当前的系统调用号。所有的系统调用都是同一个中断类型，因此不能通过中断号区分，而需要系统调用号再进一步区分。当trap返回的时候，就通过把栈指针+4让trap的参数出栈，然后执行trapret的函数，把所有寄存器都恢复，iret恢复cs,eip,ss,esp,从之前中断的地方重新开始执行。回顾xv6内存管理与进程调度过程浅析中init进程初始化以及fork调用，同样都是在内核栈中构造trapframe，然后通过trapret函数把控制权从内核交还给用户进程。可见，在中断处理中，trapframe的构造是硬件和内核共同完成的。

trap函数

void
trap(struct trapframe *tf)
{
  if(tf->trapno == T_SYSCALL){
    if(proc->killed)
      exit();
    proc->tf = tf;
    syscall();
    if(proc->killed)
      exit();
    return;
  }
  switch(tf->trapno){
  case T_IRQ0 + IRQ_TIMER:
    if(cpunum() == 0){
      acquire(&tickslock);
      ticks++;
      wakeup(&ticks);
      release(&tickslock);
    }
    lapiceoi();
    break;
  case T_IRQ0 + IRQ_IDE:
    ideintr();
    lapiceoi();
    break;
  case T_IRQ0 + IRQ_IDE+1:
    // Bochs generates spurious IDE1 interrupts.
    break;
  case T_IRQ0 + IRQ_KBD:
    kbdintr();
    lapiceoi();
    break;
  case T_IRQ0 + IRQ_COM1:
    uartintr();
    lapiceoi();
    break;
  case T_IRQ0 + 7:
  case T_IRQ0 + IRQ_SPURIOUS:
    cprintf("cpu%d: spurious interrupt at %x:%x\n",
            cpunum(), tf->cs, tf->eip);
    lapiceoi();
    break;
  //PAGEBREAK: 13
  default:
    if(proc == 0 || (tf->cs&3) == 0){
      // In kernel, it must be our mistake.
      cprintf("unexpected trap %d from cpu %d eip %x (cr2=0x%x)\n",
              tf->trapno, cpunum(), tf->eip, rcr2());
      panic("trap");
    }
    // In user space, assume process misbehaved.
    cprintf("pid %d %s: trap %d err %d on cpu %d "
            "eip 0x%x addr 0x%x--kill proc\n",
            proc->pid, proc->name, tf->trapno, tf->err, cpunum(), tf->eip,
            rcr2());
    proc->killed = 1;
  }
  // Force process exit if it has been killed and is in user space.
  // (If it is still executing in the kernel, let it keep running
  // until it gets to the regular system call return.)
  if(proc && proc->killed && (tf->cs&3) == DPL_USER)
    exit();
  // Force process to give up CPU on clock tick.
  // If interrupts were on while locks held, would need to check nlock.
  if(proc && proc->state == RUNNING && tf->trapno == T_IRQ0+IRQ_TIMER)
    yield();
  // Check if the process has been killed since we yielded
  if(proc && proc->killed && (tf->cs&3) == DPL_USER)
    exit();
}

　　trap函数是通过tf->trapno来进行逻辑分支处理的。下面主要介绍一下系统调用的处理。

系统调用

　　当tr->trapno是 T_SYSCALL的时候，内核调用syscall函数。

extern int sys_chdir(void);
extern int sys_close(void);
extern int sys_dup(void);
extern int sys_exec(void);
extern int sys_exit(void);
extern int sys_fork(void);
extern int sys_fstat(void);
extern int sys_getpid(void);
extern int sys_kill(void);
extern int sys_link(void);
extern int sys_mkdir(void);
extern int sys_mknod(void);
extern int sys_open(void);
extern int sys_pipe(void);
extern int sys_read(void);
extern int sys_sbrk(void);
extern int sys_sleep(void);
extern int sys_unlink(void);
extern int sys_wait(void);
extern int sys_write(void);
extern int sys_uptime(void);
static int (*syscalls[])(void) = {
[SYS_fork]    sys_fork,
[SYS_exit]    sys_exit,
[SYS_wait]    sys_wait,
[SYS_pipe]    sys_pipe,
[SYS_read]    sys_read,
[SYS_kill]    sys_kill,
[SYS_exec]    sys_exec,
[SYS_fstat]   sys_fstat,
[SYS_chdir]   sys_chdir,
[SYS_dup]     sys_dup,
[SYS_getpid]  sys_getpid,
[SYS_sbrk]    sys_sbrk,
[SYS_sleep]   sys_sleep,
[SYS_uptime]  sys_uptime,
[SYS_open]    sys_open,
[SYS_write]   sys_write,
[SYS_mknod]   sys_mknod,
[SYS_unlink]  sys_unlink,
[SYS_link]    sys_link,
[SYS_mkdir]   sys_mkdir,
[SYS_close]   sys_close,
};
#define SYS_fork    1
#define SYS_exit    2
#define SYS_wait    3
#define SYS_pipe    4
#define SYS_read    5
#define SYS_kill    6
#define SYS_exec    7
#define SYS_fstat   8
#define SYS_chdir   9
#define SYS_dup    10
#define SYS_getpid 11
#define SYS_sbrk   12
#define SYS_sleep  13
#define SYS_uptime 14
#define SYS_open   15
#define SYS_write  16
#define SYS_mknod  17
#define SYS_unlink 18
#define SYS_link   19
#define SYS_mkdir  20
#define SYS_close  21
void
syscall(void)
{
  int num;
  num = proc->tf->eax;
  if(num > 0 && num < NELEM(syscalls) && syscalls[num]) {
    proc->tf->eax = syscalls[num]();
  } else {
    cprintf("%d %s: unknown sys call %d\n",
            proc->pid, proc->name, num);
    proc->tf->eax = -1;
  }
}

　　根据tf->eax决定执行哪个系统调用函数，并把返回值赋给tr->eax。c语言标准规定，函数返回值放在eax寄存器中。

后续

　　有关中断还有一些内容，例如用作上下文切换的时间中断以及磁盘控制器的中断，之后在进一步介绍xv6的时候再作介绍。