diff --git a/.gitignore b/.gitignore index d3048285f1..1c182c9a3c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*~ _* *.o *.d diff --git a/Makefile b/Makefile index 3ddc1c1f5b..f67c88c271 100644 --- a/Makefile +++ b/Makefile @@ -26,19 +26,53 @@ OBJS = \ uart.o\ vectors.o\ vm.o\ + log.o\ # Cross-compiling (e.g., on Mac OS X) -TOOLPREFIX = i386-jos-elf- +#TOOLPREFIX = i386-jos-elf- # Using native tools (e.g., on X86 Linux) #TOOLPREFIX = +# Try to infer the correct TOOLPREFIX if not set +ifndef TOOLPREFIX +TOOLPREFIX := $(shell if i386-jos-elf-objdump -i 2>&1 | grep '^elf32-i386$$' >/dev/null 2>&1; \ + then echo 'i386-jos-elf-'; \ + elif objdump -i 2>&1 | grep 'elf32-i386' >/dev/null 2>&1; \ + then echo ''; \ + else echo "***" 1>&2; \ + echo "*** Error: Couldn't find an i386-*-elf version of GCC/binutils." 1>&2; \ + echo "*** Is the directory with i386-jos-elf-gcc in your PATH?" 1>&2; \ + echo "*** If your i386-*-elf toolchain is installed with a command" 1>&2; \ + echo "*** prefix other than 'i386-jos-elf-', set your TOOLPREFIX" 1>&2; \ + echo "*** environment variable to that prefix and run 'make' again." 1>&2; \ + echo "*** To turn off this error, run 'gmake TOOLPREFIX= ...'." 1>&2; \ + echo "***" 1>&2; exit 1; fi) +endif + +# If the makefile can't find QEMU, specify its path here +#QEMU = + +# Try to infer the correct QEMU +ifndef QEMU +QEMU = $(shell if which qemu > /dev/null; \ + then echo qemu; exit; \ + else \ + qemu=/Applications/Q.app/Contents/MacOS/i386-softmmu.app/Contents/MacOS/i386-softmmu; \ + if test -x $$qemu; then echo $$qemu; exit; fi; fi; \ + echo "***" 1>&2; \ + echo "*** Error: Couldn't find a working QEMU executable." 1>&2; \ + echo "*** Is the directory containing the qemu binary in your PATH" 1>&2; \ + echo "*** or have you tried setting the QEMU variable in Makefile?" 1>&2; \ + echo "***" 1>&2; exit 1) +endif + CC = $(TOOLPREFIX)gcc AS = $(TOOLPREFIX)gas LD = $(TOOLPREFIX)ld OBJCOPY = $(TOOLPREFIX)objcopy OBJDUMP = $(TOOLPREFIX)objdump -CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror +CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -O2 -Wall -MD -ggdb -m32 -Werror -fno-omit-frame-pointer CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector) ASFLAGS = -m32 -gdwarf-2 # FreeBSD ld wants ``elf_i386_fbsd'' @@ -49,6 +83,11 @@ xv6.img: bootblock kernel fs.img dd if=bootblock of=xv6.img conv=notrunc dd if=kernel of=xv6.img seek=1 conv=notrunc +xv6memfs.img: bootblock kernelmemfs + dd if=/dev/zero of=xv6memfs.img count=10000 + dd if=bootblock of=xv6memfs.img conv=notrunc + dd if=kernelmemfs of=xv6memfs.img seek=1 conv=notrunc + bootblock: bootasm.S bootmain.c $(CC) $(CFLAGS) -fno-pic -O -nostdinc -I. -c bootmain.c $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S @@ -69,11 +108,23 @@ initcode: initcode.S $(OBJCOPY) -S -O binary initcode.out initcode $(OBJDUMP) -S initcode.o > initcode.asm -kernel: $(OBJS) bootother initcode - $(LD) $(LDFLAGS) -Ttext 0x100000 -e main -o kernel $(OBJS) -b binary initcode bootother +kernel: $(OBJS) multiboot.o data.o bootother initcode + $(LD) $(LDFLAGS) -Ttext 0x100000 -e main -o kernel multiboot.o data.o $(OBJS) -b binary initcode bootother $(OBJDUMP) -S kernel > kernel.asm $(OBJDUMP) -t kernel | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernel.sym +# kernelmemfs is a copy of kernel that maintains the +# disk image in memory instead of writing to a disk. +# This is not so useful for testing persistent storage or +# exploring disk buffering implementations, but it is +# great for testing the kernel on real hardware without +# needing a scratch disk. +MEMFSOBJS = $(filter-out ide.o,$(OBJS)) memide.o +kernelmemfs: $(MEMFSOBJS) multiboot.o data.o bootother initcode fs.img + $(LD) $(LDFLAGS) -Ttext 0x100000 -e main -o kernelmemfs multiboot.o data.o $(MEMFSOBJS) -b binary initcode bootother fs.img + $(OBJDUMP) -S kernelmemfs > kernelmemfs.asm + $(OBJDUMP) -t kernelmemfs | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > kernelmemfs.sym + tags: $(OBJS) bootother.S _init etags *.S *.c @@ -94,7 +145,7 @@ _forktest: forktest.o $(ULIB) $(OBJDUMP) -S _forktest > forktest.asm mkfs: mkfs.c fs.h - gcc -Wall -o mkfs mkfs.c + gcc -m32 -Werror -Wall -o mkfs mkfs.c UPROGS=\ _cat\ @@ -126,7 +177,7 @@ clean: # make a printout FILES = $(shell grep -v '^\#' runoff.list) -PRINT = runoff.list $(FILES) +PRINT = runoff.list runoff.spec $(FILES) xv6.pdf: $(PRINT) ./runoff @@ -143,27 +194,33 @@ bochs : fs.img xv6.img # try to generate a unique GDB port GDBPORT = $(shell expr `id -u` % 5000 + 25000) # QEMU's gdb stub command line changed in 0.11 -QEMUGDB = $(shell if qemu -help | grep -q '^-gdb'; \ +QEMUGDB = $(shell if $(QEMU) -help | grep -q '^-gdb'; \ then echo "-gdb tcp::$(GDBPORT)"; \ else echo "-s -p $(GDBPORT)"; fi) -QEMUOPTS = -smp 2 -hdb fs.img xv6.img +ifndef CPUS +CPUS := 2 +endif +QEMUOPTS = -hdb fs.img xv6.img -smp $(CPUS) qemu: fs.img xv6.img - qemu -serial mon:stdio $(QEMUOPTS) + $(QEMU) -serial mon:stdio $(QEMUOPTS) + +qemu-memfs: xv6memfs.img + $(QEMU) xv6memfs.img -smp $(CPUS) qemu-nox: fs.img xv6.img - qemu -nographic $(QEMUOPTS) + $(QEMU) -nographic $(QEMUOPTS) .gdbinit: .gdbinit.tmpl sed "s/localhost:1234/localhost:$(GDBPORT)/" < $^ > $@ qemu-gdb: fs.img xv6.img .gdbinit @echo "*** Now run 'gdb'." 1>&2 - qemu -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB) + $(QEMU) -serial mon:stdio $(QEMUOPTS) -S $(QEMUGDB) qemu-nox-gdb: fs.img xv6.img .gdbinit @echo "*** Now run 'gdb'." 1>&2 - qemu -nographic $(QEMUOPTS) -S $(QEMUGDB) + $(QEMU) -nographic $(QEMUOPTS) -S $(QEMUGDB) # CUT HERE # prepare dist for students @@ -195,14 +252,16 @@ dist-test: rm -rf dist-test mkdir dist-test cp dist/* dist-test - cd dist-test; ../m print - cd dist-test; ../m bochs || true - cd dist-test; ../m qemu + cd dist-test; $(MAKE) print + cd dist-test; $(MAKE) bochs || true + cd dist-test; $(MAKE) qemu -# update this rule (change rev1) when it is time to +# update this rule (change rev#) when it is time to # make a new revision. tar: rm -rf /tmp/xv6 mkdir -p /tmp/xv6 cp dist/* dist/.gdbinit.tmpl /tmp/xv6 - (cd /tmp; tar cf - xv6) | gzip >xv6-rev3.tar.gz + (cd /tmp; tar cf - xv6) | gzip >xv6-rev5.tar.gz + +.PHONY: dist-test dist diff --git a/README b/README index 286d336d2b..22f5c4e6e3 100644 --- a/README +++ b/README @@ -19,6 +19,11 @@ The following people made contributions: Russ Cox (context switching, locking) Cliff Frey (MP) Xiao Yu (MP) + Nickolai Zeldovich + Austin Clements + +In addition, we are grateful for the patches contributed by Greg +Price, Yandong Mao, and Hitoshi Mitake. The code in the files that constitute xv6 is Copyright 2006-2007 Frans Kaashoek, Robert Morris, and Russ Cox. @@ -39,9 +44,7 @@ Then run "make TOOLPREFIX=i386-jos-elf-". To run xv6, you can use Bochs or QEMU, both PC simulators. Bochs makes debugging easier, but QEMU is much faster. To run in Bochs, run "make bochs" and then type "c" at the bochs prompt. -To run in QEMU, run "make qemu". Both log the xv6 screen output to -standard output. +To run in QEMU, run "make qemu". -To create a typeset version of the code, run "make xv6.pdf". -This requires the "mpage" text formatting utility. -See http://www.mesa.nl/pub/mpage/. +To create a typeset version of the code, run "make xv6.pdf". This +requires the "mpage" utility. See http://www.mesa.nl/pub/mpage/. diff --git a/bootasm.S b/bootasm.S index 56175cefbf..3cc23e7d5c 100644 --- a/bootasm.S +++ b/bootasm.S @@ -13,7 +13,7 @@ .code16 # Assemble for 16-bit mode .globl start start: - cli # Disable interrupts + cli # BIOS enabled interrupts; disable # Set up the important data segment registers (DS, ES, SS). xorw %ax,%ax # Segment number zero @@ -21,10 +21,8 @@ start: movw %ax,%es # -> Extra Segment movw %ax,%ss # -> Stack Segment - # Enable A20: - # For backwards compatibility with the earliest PCs, physical - # address line 20 is tied low, so that addresses higher than - # 1MB wrap around to zero by default. This code undoes this. + # Physical address line A20 is tied to zero so that the first PCs + # with 2 MB would run software that assumed 1 MB. Undo that. seta20.1: inb $0x64,%al # Wait for not busy testb $0x2,%al @@ -41,23 +39,21 @@ seta20.2: movb $0xdf,%al # 0xdf -> port 0x60 outb %al,$0x60 -//PAGEBREAK! - # Switch from real to protected mode, using a bootstrap GDT - # and segment translation that makes virtual addresses - # identical to physical addresses, so that the - # effective memory map does not change during the switch. + # Switch from real to protected mode. Use a bootstrap GDT that makes + # virtual addresses map dierctly to physical addresses so that the + # effective memory map doesn't change during the transition. lgdt gdtdesc movl %cr0, %eax orl $CR0_PE, %eax movl %eax, %cr0 - - # This ljmp is how you load the CS (Code Segment) register. - # SEG_ASM produces segment descriptors with the 32-bit mode - # flag set (the D flag), so addresses and word operands will - # default to 32 bits after this jump. + +//PAGEBREAK! + # Complete transition to 32-bit protected mode by using long jmp + # to reload %cs and %eip. The segment registers are set up with no + # translation, so that the mapping is still the identity mapping. ljmp $(SEG_KCODE<<3), $start32 -.code32 # Assemble for 32-bit mode +.code32 # Tell assembler to generate 32-bit code now. start32: # Set up the protected-mode data segment registers movw $(SEG_KDATA<<3), %ax # Our data segment selector diff --git a/bootmain.c b/bootmain.c index 14f4ff3e3b..7cd469fee6 100644 --- a/bootmain.c +++ b/bootmain.c @@ -33,8 +33,8 @@ bootmain(void) // Load each program segment (ignores ph flags). ph = (struct proghdr*)((uchar*)elf + elf->phoff); eph = ph + elf->phnum; - for(; ph < eph; ph++) { - va = (uchar*)(ph->va & 0xFFFFFF); + for(; ph < eph; ph++){ + va = (uchar*)ph->va; readseg(va, ph->filesz, ph->offset); if(ph->memsz > ph->filesz) stosb(va + ph->filesz, 0, ph->memsz - ph->filesz); @@ -42,7 +42,7 @@ bootmain(void) // Call the entry point from the ELF header. // Does not return! - entry = (void(*)(void))(elf->entry & 0xFFFFFF); + entry = (void(*)(void))(elf->entry); entry(); } diff --git a/bootother.S b/bootother.S index 899669ad9f..37b899b909 100644 --- a/bootother.S +++ b/bootother.S @@ -9,80 +9,69 @@ # Because this code sets DS to zero, it must sit # at an address in the low 2^16 bytes. # -# Bootothers (in main.c) sends the STARTUPs, one at a time. -# It puts this code (start) at 0x7000. -# It puts the correct %esp in start-4, -# and the place to jump to in start-8. +# Bootothers (in main.c) sends the STARTUPs one at a time. +# It copies this code (start) at 0x7000. +# It puts the address of a newly allocated per-core stack in start-4, +# and the address of the place to jump to (mpmain) in start-8. # # This code is identical to bootasm.S except: # - it does not need to enable A20 # - it uses the address at start-4 for the %esp # - it jumps to the address at start-8 instead of calling bootmain -#define SEG_KCODE 1 // kernel code -#define SEG_KDATA 2 // kernel data+stack +#define SEG_KCODE 1 +#define SEG_KDATA 2 -#define CR0_PE 1 // protected mode enable bit +#define CR0_PE 1 -.code16 # Assemble for 16-bit mode +.code16 .globl start start: - cli # Disable interrupts + cli - # Set up the important data segment registers (DS, ES, SS). - xorw %ax,%ax # Segment number zero - movw %ax,%ds # -> Data Segment - movw %ax,%es # -> Extra Segment - movw %ax,%ss # -> Stack Segment + xorw %ax,%ax + movw %ax,%ds + movw %ax,%es + movw %ax,%ss -//PAGEBREAK! - # Switch from real to protected mode, using a bootstrap GDT - # and segment translation that makes virtual addresses - # identical to physical addresses, so that the - # effective memory map does not change during the switch. lgdt gdtdesc movl %cr0, %eax orl $CR0_PE, %eax movl %eax, %cr0 - # This ljmp is how you load the CS (Code Segment) register. - # SEG_ASM produces segment descriptors with the 32-bit mode - # flag set (the D flag), so addresses and word operands will - # default to 32 bits after this jump. +//PAGEBREAK! ljmp $(SEG_KCODE<<3), $start32 -.code32 # Assemble for 32-bit mode +.code32 start32: - # Set up the protected-mode data segment registers - movw $(SEG_KDATA<<3), %ax # Our data segment selector - movw %ax, %ds # -> DS: Data Segment - movw %ax, %es # -> ES: Extra Segment - movw %ax, %ss # -> SS: Stack Segment - movw $0, %ax # Zero segments not ready for use - movw %ax, %fs # -> FS - movw %ax, %gs # -> GS + movw $(SEG_KDATA<<3), %ax + movw %ax, %ds + movw %ax, %es + movw %ax, %ss + movw $0, %ax + movw %ax, %fs + movw %ax, %gs - # Set up the stack pointer and call into C. + # switch to the stack allocated by bootothers() movl start-4, %esp + + # call mpmain() call *(start-8) - # If the call returns (it shouldn't), trigger a Bochs - # breakpoint if running under Bochs, then loop. - movw $0x8a00, %ax # 0x8a00 -> port 0x8a00 + movw $0x8a00, %ax movw %ax, %dx outw %ax, %dx - movw $0x8ae0, %ax # 0x8ae0 -> port 0x8a00 + movw $0x8ae0, %ax outw %ax, %dx spin: jmp spin -# Bootstrap GDT -.p2align 2 # force 4 byte alignment +.p2align 2 gdt: - SEG_NULLASM # null seg - SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg - SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg + SEG_NULLASM + SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) + SEG_ASM(STA_W, 0x0, 0xffffffff) gdtdesc: - .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1 - .long gdt # address gdt + .word (gdtdesc - gdt - 1) + .long gdt diff --git a/console.c b/console.c index 16d0e7a1b0..27649e68ed 100644 --- a/console.c +++ b/console.c @@ -18,28 +18,29 @@ static void consputc(int); static int panicked = 0; static struct { - struct spinlock lock; - int locking; + struct spinlock lock; + int locking; } cons; static void -printint(int xx, int base, int sgn) +printint(int xx, int base, int sign) { static char digits[] = "0123456789abcdef"; char buf[16]; - int i = 0, neg = 0; + int i; uint x; - if(sgn && xx < 0){ - neg = 1; + if(sign && (sign = xx < 0)) x = -xx; - } else + else x = xx; + i = 0; do{ buf[i++] = digits[x % base]; }while((x /= base) != 0); - if(neg) + + if(sign) buf[i++] = '-'; while(--i >= 0) @@ -136,8 +137,7 @@ cgaputc(int c) if(c == '\n') pos += 80 - pos%80; else if(c == BACKSPACE){ - if(pos > 0) - crt[--pos] = ' ' | 0x0700; + if(pos > 0) --pos; } else crt[pos++] = (c&0xff) | 0x0700; // black on white @@ -163,16 +163,13 @@ consputc(int c) ; } - if (c == BACKSPACE) { - uartputc('\b'); - uartputc(' '); - uartputc('\b'); + if(c == BACKSPACE){ + uartputc('\b'); uartputc(' '); uartputc('\b'); } else uartputc(c); cgaputc(c); } -//PAGEBREAK: 50 #define INPUT_BUF 128 struct { struct spinlock lock; @@ -202,8 +199,7 @@ consoleintr(int (*getc)(void)) consputc(BACKSPACE); } break; - case C('H'): // Backspace - case '\x7f': + case C('H'): case '\x7f': // Backspace if(input.e != input.w){ input.e--; consputc(BACKSPACE); @@ -211,9 +207,7 @@ consoleintr(int (*getc)(void)) break; default: if(c != 0 && input.e-input.r < INPUT_BUF){ - // The serial port produces 0x13, not 0x10 - if(c == '\r') - c = '\n'; + c = (c == '\r') ? '\n' : c; input.buf[input.e++ % INPUT_BUF] = c; consputc(c); if(c == '\n' || c == C('D') || input.e == input.r+INPUT_BUF){ diff --git a/data.S b/data.S new file mode 100644 index 0000000000..8b651b4de8 --- /dev/null +++ b/data.S @@ -0,0 +1,26 @@ +// The kernel layout is: +// +// text +// rodata +// data +// bss +// +// Conventionally, Unix linkers provide pseudo-symbols +// etext, edata, and end, at the end of the text, data, and bss. +// For the kernel mapping, we need the address at the beginning +// of the data section, but that's not one of the conventional +// symbols, because the convention started before there was a +// read-only rodata section between text and data. +// +// To get the address of the data section, we define a symbol +// named data and make sure this is the first object passed to +// the linker, so that it will be the first symbol in the data section. +// +// Alternative approaches would be to parse our own ELF header +// or to write a linker script, but this is simplest. + +.data +.align 4096 +.globl data +data: + .word 1 diff --git a/defs.h b/defs.h index ca7367d248..bbe4ae4f58 100644 --- a/defs.h +++ b/defs.h @@ -6,6 +6,7 @@ struct pipe; struct proc; struct spinlock; struct stat; +struct superblock; // bio.c void binit(void); @@ -32,6 +33,7 @@ int filestat(struct file*, struct stat*); int filewrite(struct file*, char*, int n); // fs.c +void readsb(int dev, struct superblock *sb); int dirlink(struct inode*, char*, uint); struct inode* dirlookup(struct inode*, char*, uint*); struct inode* ialloc(uint, short); @@ -62,7 +64,7 @@ void ioapicinit(void); // kalloc.c char* kalloc(void); void kfree(char*); -void kinit(char*,uint); +void kinit(void); // kbd.c void kbdintr(void); @@ -75,6 +77,12 @@ void lapicinit(int); void lapicstartap(uchar, uint); void microdelay(int); +// log.c +void initlog(void); +void log_write(struct buf*); +void begin_trans(); +void commit_trans(); + // mp.c extern int ismp; int mpbcpu(void); @@ -101,6 +109,7 @@ int kill(int); void pinit(void); void procdump(void); void scheduler(void) __attribute__((noreturn)); +void sched(void); void sleep(void*, struct spinlock*); void userinit(void); int wait(void); @@ -116,8 +125,8 @@ void getcallerpcs(void*, uint*); int holding(struct spinlock*); void initlock(struct spinlock*, char*); void release(struct spinlock*); -void pushcli(); -void popcli(); +void pushcli(void); +void popcli(void); // string.c int memcmp(const void*, const void*, uint); @@ -151,20 +160,20 @@ void uartintr(void); void uartputc(int); // vm.c -void pminit(void); -void ksegment(void); +void seginit(void); void kvmalloc(void); void vmenable(void); pde_t* setupkvm(void); char* uva2ka(pde_t*, char*); -int allocuvm(pde_t*, char*, uint); -int deallocuvm(pde_t *pgdir, char *addr, uint sz); +int allocuvm(pde_t*, uint, uint); +int deallocuvm(pde_t*, uint, uint); void freevm(pde_t*); -void inituvm(pde_t*, char*, char*, uint); -int loaduvm(pde_t*, char*, struct inode *ip, uint, uint); -pde_t* copyuvm(pde_t*,uint); +void inituvm(pde_t*, char*, uint); +int loaduvm(pde_t*, char*, struct inode*, uint, uint); +pde_t* copyuvm(pde_t*, uint); void switchuvm(struct proc*); -void switchkvm(); +void switchkvm(void); +int copyout(pde_t*, uint, void*, uint); // number of elements in fixed-size array #define NELEM(x) (sizeof(x)/sizeof((x)[0])) diff --git a/exec.c b/exec.c index 4f11695531..05f80f8f3f 100644 --- a/exec.c +++ b/exec.c @@ -9,20 +9,18 @@ int exec(char *path, char **argv) { - char *mem, *s, *last; - int i, argc, arglen, len, off; - uint sz, sp, spoffset, argp; + char *s, *last; + int i, off; + uint argc, sz, sp, ustack[3+MAXARG+1]; struct elfhdr elf; struct inode *ip; struct proghdr ph; pde_t *pgdir, *oldpgdir; - pgdir = 0; - sz = 0; - if((ip = namei(path)) == 0) return -1; ilock(ip); + pgdir = 0; // Check ELF header if(readi(ip, (char*)&elf, 0, sizeof(elf)) < sizeof(elf)) @@ -30,10 +28,11 @@ exec(char *path, char **argv) if(elf.magic != ELF_MAGIC) goto bad; - if (!(pgdir = setupkvm())) + if((pgdir = setupkvm()) == 0) goto bad; // Load program into memory. + sz = 0; for(i=0, off=elf.phoff; i sz) - sz = ph.va + ph.memsz; - if (!loaduvm(pgdir, (char *)ph.va, ip, ph.offset, ph.filesz)) + if(loaduvm(pgdir, (char*)ph.va, ip, ph.offset, ph.filesz) < 0) goto bad; } iunlockput(ip); + ip = 0; - // Allocate and initialize stack at sz + // Allocate a one-page stack at the next page boundary sz = PGROUNDUP(sz); - sz += PGSIZE; // leave an invalid page - if (!allocuvm(pgdir, (char *)sz, PGSIZE)) + if((sz = allocuvm(pgdir, sz, sz + PGSIZE)) == 0) goto bad; - mem = uva2ka(pgdir, (char *)sz); - spoffset = sz; - sz += PGSIZE; - - arglen = 0; - for(argc=0; argv[argc]; argc++) - arglen += strlen(argv[argc]) + 1; - arglen = (arglen+3) & ~3; + // Push argument strings, prepare rest of stack in ustack. sp = sz; - argp = sz - arglen - 4*(argc+1); - - // Copy argv strings and pointers to stack. - *(uint*)(mem+argp-spoffset + 4*argc) = 0; // argv[argc] - for(i=argc-1; i>=0; i--){ - len = strlen(argv[i]) + 1; - sp -= len; - memmove(mem+sp-spoffset, argv[i], len); - *(uint*)(mem+argp-spoffset + 4*i) = sp; // argv[i] + for(argc = 0; argv[argc]; argc++) { + if(argc >= MAXARG) + goto bad; + sp -= strlen(argv[argc]) + 1; + sp &= ~3; + if(copyout(pgdir, sp, argv[argc], strlen(argv[argc]) + 1) < 0) + goto bad; + ustack[3+argc] = sp; } + ustack[3+argc] = 0; - // Stack frame for main(argc, argv), below arguments. - sp = argp; - sp -= 4; - *(uint*)(mem+sp-spoffset) = argp; - sp -= 4; - *(uint*)(mem+sp-spoffset) = argc; - sp -= 4; - *(uint*)(mem+sp-spoffset) = 0xffffffff; // fake return pc + ustack[0] = 0xffffffff; // fake return PC + ustack[1] = argc; + ustack[2] = sp - (argc+1)*4; // argv pointer + + sp -= (3+argc+1) * 4; + if(copyout(pgdir, sp, ustack, (3+argc+1)*4) < 0) + goto bad; // Save program name for debugging. for(last=s=path; *s; s++) @@ -97,15 +86,15 @@ exec(char *path, char **argv) proc->sz = sz; proc->tf->eip = elf.entry; // main proc->tf->esp = sp; - - switchuvm(proc); - + switchuvm(proc); freevm(oldpgdir); return 0; bad: - if (pgdir) freevm(pgdir); - iunlockput(ip); + if(pgdir) + freevm(pgdir); + if(ip) + iunlockput(ip); return -1; } diff --git a/fs.c b/fs.c index 15a4d35ff5..a414b6547e 100644 --- a/fs.c +++ b/fs.c @@ -25,7 +25,7 @@ static void itrunc(struct inode*); // Read the super block. -static void +void readsb(int dev, struct superblock *sb) { struct buf *bp; @@ -61,11 +61,11 @@ balloc(uint dev) readsb(dev, &sb); for(b = 0; b < sb.size; b += BPB){ bp = bread(dev, BBLOCK(b, sb.ninodes)); - for(bi = 0; bi < BPB; bi++){ + for(bi = 0; bi < BPB && bi < (sb.size - b); bi++){ m = 1 << (bi % 8); if((bp->data[bi/8] & m) == 0){ // Is block free? bp->data[bi/8] |= m; // Mark block in use on disk. - bwrite(bp); + log_write(bp); brelse(bp); return b + bi; } @@ -92,7 +92,7 @@ bfree(int dev, uint b) if((bp->data[bi/8] & m) == 0) panic("freeing free block"); bp->data[bi/8] &= ~m; // Mark block free on disk. - bwrite(bp); + log_write(bp); brelse(bp); } @@ -159,7 +159,7 @@ ialloc(uint dev, short type) if(dip->type == 0){ // a free inode memset(dip, 0, sizeof(*dip)); dip->type = type; - bwrite(bp); // mark it allocated on the disk + log_write(bp); // mark it allocated on the disk brelse(bp); return iget(dev, inum); } @@ -183,7 +183,7 @@ iupdate(struct inode *ip) dip->nlink = ip->nlink; dip->size = ip->size; memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); - bwrite(bp); + log_write(bp); brelse(bp); } @@ -339,7 +339,7 @@ bmap(struct inode *ip, uint bn) a = (uint*)bp->data; if((addr = a[bn]) == 0){ a[bn] = addr = balloc(ip->dev); - bwrite(bp); + log_write(bp); } brelse(bp); return addr; diff --git a/fs.h b/fs.h index 6f92592dd8..c9e34bf536 100644 --- a/fs.h +++ b/fs.h @@ -13,6 +13,7 @@ struct superblock { uint size; // Size of file system image (blocks) uint nblocks; // Number of data blocks uint ninodes; // Number of inodes. + uint nlog; // Number of log blocks }; #define NDIRECT 12 @@ -41,7 +42,6 @@ struct dinode { // Block containing bit for block b #define BBLOCK(b, ninodes) (b/BPB + (ninodes)/IPB + 3) -// PAGEBREAK: 10 // Directory is a file containing a sequence of dirent structures. #define DIRSIZ 14 diff --git a/ide.c b/ide.c index 7b12aa0ef5..53293a7207 100644 --- a/ide.c +++ b/ide.c @@ -96,7 +96,7 @@ ideintr(void) acquire(&idelock); if((b = idequeue) == 0){ release(&idelock); - cprintf("Spurious IDE interrupt.\n"); + // cprintf("spurious IDE interrupt\n"); return; } idequeue = b->qnext; @@ -131,7 +131,7 @@ iderw(struct buf *b) if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) panic("iderw: nothing to do"); if(b->dev != 0 && !havedisk1) - panic("idrw: ide disk 1 not present"); + panic("iderw: ide disk 1 not present"); acquire(&idelock); @@ -147,7 +147,7 @@ iderw(struct buf *b) // Wait for request to finish. // Assuming will not sleep too long: ignore proc->killed. - while((b->flags & (B_VALID|B_DIRTY)) != B_VALID) { + while((b->flags & (B_VALID|B_DIRTY)) != B_VALID){ sleep(b, &idelock); } diff --git a/initcode.S b/initcode.S index 41e84f419f..d86660a475 100644 --- a/initcode.S +++ b/initcode.S @@ -3,9 +3,12 @@ #include "syscall.h" #include "traps.h" + # exec(init, argv) .globl start start: + movl $SYS_init, %eax + int $T_SYSCALL pushl $argv pushl $init pushl $0 // where caller pc would be diff --git a/kalloc.c b/kalloc.c index 65de759b99..bf1616afc0 100644 --- a/kalloc.c +++ b/kalloc.c @@ -17,17 +17,21 @@ struct { struct run *freelist; } kmem; +extern char end[]; // first address after kernel loaded from ELF file + // Initialize free list of physical pages. void -kinit(char *p, uint len) +kinit(void) { + char *p; + initlock(&kmem.lock, "kmem"); - char *p1 = (char*)PGROUNDUP((uint)p); - char *p2 = PGROUNDDOWN(p + len); - for( ; p1 < p2; p1 += 4096) - kfree(p1); + p = (char*)PGROUNDUP((uint)end); + for(; p + PGSIZE <= (char*)PHYSTOP; p += PGSIZE) + kfree(p); } +//PAGEBREAK: 21 // Free the page of physical memory pointed at by v, // which normally should have been returned by a // call to kalloc(). (The exception is when @@ -37,14 +41,14 @@ kfree(char *v) { struct run *r; - if(((uint) v) % PGSIZE || (uint)v < 1024*1024 || (uint)v >= PHYSTOP) + if((uint)v % PGSIZE || v < end || (uint)v >= PHYSTOP) panic("kfree"); // Fill with junk to catch dangling refs. memset(v, 1, PGSIZE); acquire(&kmem.lock); - r = (struct run *) v; + r = (struct run*)v; r->next = kmem.freelist; kmem.freelist = r; release(&kmem.lock); @@ -54,7 +58,7 @@ kfree(char *v) // Returns a pointer that the kernel can use. // Returns 0 if the memory cannot be allocated. char* -kalloc() +kalloc(void) { struct run *r; @@ -63,6 +67,6 @@ kalloc() if(r) kmem.freelist = r->next; release(&kmem.lock); - return (char*) r; + return (char*)r; } diff --git a/log.c b/log.c new file mode 100644 index 0000000000..72a0367be6 --- /dev/null +++ b/log.c @@ -0,0 +1,164 @@ +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "spinlock.h" +#include "fs.h" +#include "buf.h" + +// Dirt simple "logging" supporting only one transaction. All file system calls +// that potentially write a block should be wrapped in begin_trans and commit_trans, +// so that there is never more than one transaction. This serializes all file system +// operations that potentially write, but simplifies recovery (only the last +// one transaction to recover) and concurrency (don't have to worry about reading a modified +// block from a transaction that hasn't committed yet). + +// The header of the log. If head == 0, there are no log entries. All entries till head +// are committed. sector[] records the home sector for each block in the log +// (i.e., physical logging). +struct logheader { + int head; + int sector[LOGSIZE]; +}; + +struct { + struct spinlock lock; + int start; + int size; + int intrans; + int dev; + struct logheader lh; +} log; + +static void recover_from_log(void); + +void +initlog(void) +{ + if (sizeof(struct logheader) >= BSIZE) + panic("initlog: too big logheader"); + + struct superblock sb; + initlock(&log.lock, "log"); + readsb(ROOTDEV, &sb); + log.start = sb.size - sb.nlog; + log.size = sb.nlog; + log.dev = ROOTDEV; + recover_from_log(); +} + +// Copy committed blocks from log to their home location +static void +install_trans(void) +{ + int tail; + + if (log.lh.head > 0) + cprintf("install_trans %d\n", log.lh.head); + for (tail = 0; tail < log.lh.head; tail++) { + cprintf("put entry %d to disk block %d\n", tail, log.lh.sector[tail]); + struct buf *lbuf = bread(log.dev, log.start+tail+1); // read i'th block from log + struct buf *dbuf = bread(log.dev, log.lh.sector[tail]); // read dst block + memmove(dbuf->data, lbuf->data, BSIZE); + bwrite(dbuf); + brelse(lbuf); + brelse(dbuf); + } +} + +// Read the log header from disk into the in-memory log header +static void +read_head(void) +{ + struct buf *buf = bread(log.dev, log.start); + struct logheader *lh = (struct logheader *) (buf->data); + int i; + log.lh.head = lh->head; + for (i = 0; i < log.lh.head; i++) { + log.lh.sector[i] = lh->sector[i]; + } + brelse(buf); + if (log.lh.head > 0) + cprintf("read_head: %d\n", log.lh.head); +} + +// Write the in-memory log header to disk, committing log entries till head +static void +write_head(void) +{ + if (log.lh.head > 0) + cprintf("write_head: %d\n", log.lh.head); + + struct buf *buf = bread(log.dev, log.start); + struct logheader *hb = (struct logheader *) (buf->data); + int i; + hb->head = log.lh.head; + for (i = 0; i < log.lh.head; i++) { + hb->sector[i] = log.lh.sector[i]; + } + bwrite(buf); + brelse(buf); +} + +static void +recover_from_log(void) +{ + read_head(); + install_trans(); // Install all transactions till head + log.lh.head = 0; + write_head(); // Reclaim log +} + +void +begin_trans(void) +{ + acquire(&log.lock); + while (log.intrans) { + sleep(&log, &log.lock); + } + log.intrans = 1; + release(&log.lock); +} + +void +commit_trans(void) +{ + write_head(); // This causes all blocks till log.head to be commited + install_trans(); // Install all the transactions till head + log.lh.head = 0; + write_head(); // Reclaim log + + acquire(&log.lock); + log.intrans = 0; + wakeup(&log); + release(&log.lock); +} + +// Write buffer into the log at log.head and record the block number log.lh.entry, but +// don't write the log header (which would commit the write). +void +log_write(struct buf *b) +{ + int i; + + if (log.lh.head >= LOGSIZE) + panic("too big a transaction"); + if (!log.intrans) + panic("write outside of trans"); + + cprintf("log_write: %d %d\n", b->sector, log.lh.head); + + for (i = 0; i < log.lh.head; i++) { + if (log.lh.sector[i] == b->sector) // log absorbtion? + break; + } + log.lh.sector[i] = b->sector; + struct buf *lbuf = bread(b->dev, log.start+i+1); + memmove(lbuf->data, b->data, BSIZE); + bwrite(lbuf); + brelse(lbuf); + if (i == log.lh.head) + log.lh.head++; +} diff --git a/main.c b/main.c index 878ea36b97..a27c4ffa2e 100644 --- a/main.c +++ b/main.c @@ -7,40 +7,45 @@ static void bootothers(void); static void mpmain(void); -void jkstack(void) __attribute__((noreturn)); +void jmpkstack(void) __attribute__((noreturn)); void mainc(void); // Bootstrap processor starts running C code here. +// Allocate a real stack and switch to it, first +// doing some setup required for memory allocator to work. int main(void) { mpinit(); // collect info about this machine lapicinit(mpbcpu()); - ksegment(); // set up segments - picinit(); // interrupt controller - ioapicinit(); // another interrupt controller - consoleinit(); // I/O devices & their interrupts - uartinit(); // serial port - pminit(); // discover how much memory there is - jkstack(); // call mainc() on a properly-allocated stack + seginit(); // set up segments + kinit(); // initialize memory allocator + jmpkstack(); // call mainc() on a properly-allocated stack } void -jkstack(void) +jmpkstack(void) { - char *kstack = kalloc(); - if (!kstack) - panic("jkstack\n"); - char *top = kstack + PGSIZE; - asm volatile("movl %0,%%esp" : : "r" (top)); - asm volatile("call mainc"); - panic("jkstack"); + char *kstack, *top; + + kstack = kalloc(); + if(kstack == 0) + panic("jmpkstack kalloc"); + top = kstack + PGSIZE; + asm volatile("movl %0,%%esp; call mainc" : : "r" (top)); + panic("jmpkstack"); } +// Set up hardware and software. +// Runs only on the boostrap processor. void mainc(void) { cprintf("\ncpu%d: starting xv6\n\n", cpu->id); + picinit(); // interrupt controller + ioapicinit(); // another interrupt controller + consoleinit(); // I/O devices & their interrupts + uartinit(); // serial port kvmalloc(); // initialize the kernel page table pinit(); // process table tvinit(); // trap vectors @@ -63,17 +68,18 @@ mainc(void) static void mpmain(void) { - if(cpunum() != mpbcpu()) { - ksegment(); + if(cpunum() != mpbcpu()){ + seginit(); lapicinit(cpunum()); } vmenable(); // turn on paging cprintf("cpu%d: starting\n", cpu->id); idtinit(); // load idt register - xchg(&cpu->booted, 1); + xchg(&cpu->booted, 1); // tell bootothers() we're up scheduler(); // start running processes } +// Start the non-boot processors. static void bootothers(void) { @@ -82,19 +88,23 @@ bootothers(void) struct cpu *c; char *stack; - // Write bootstrap code to unused memory at 0x7000. The linker has - // placed the start of bootother.S there. - code = (uchar *) 0x7000; + // Write bootstrap code to unused memory at 0x7000. + // The linker has placed the image of bootother.S in + // _binary_bootother_start. + code = (uchar*)0x7000; memmove(code, _binary_bootother_start, (uint)_binary_bootother_size); for(c = cpus; c < cpus+ncpu; c++){ if(c == cpus+cpunum()) // We've started already. continue; - // Fill in %esp, %eip and start code on cpu. + // Tell bootother.S what stack to use and the address of mpmain; + // it expects to find these two addresses stored just before + // its first instruction. stack = kalloc(); *(void**)(code-4) = stack + KSTACKSIZE; *(void**)(code-8) = mpmain; + lapicstartap(c->id, (uint)code); // Wait for cpu to finish mpmain() @@ -103,3 +113,6 @@ bootothers(void) } } +//PAGEBREAK! +// Blank page. + diff --git a/memide.c b/memide.c new file mode 100644 index 0000000000..d2c5bb7e16 --- /dev/null +++ b/memide.c @@ -0,0 +1,58 @@ +// Fake IDE disk; stores blocks in memory. +// Useful for running kernel without scratch disk. + +#include "types.h" +#include "defs.h" +#include "param.h" +#include "mmu.h" +#include "proc.h" +#include "x86.h" +#include "traps.h" +#include "spinlock.h" +#include "buf.h" + +extern uchar _binary_fs_img_start[], _binary_fs_img_size[]; + +static int disksize; +static uchar *memdisk; + +void +ideinit(void) +{ + memdisk = _binary_fs_img_start; + disksize = (uint)_binary_fs_img_size/512; +} + +// Interrupt handler. +void +ideintr(void) +{ + // no-op +} + +// Sync buf with disk. +// If B_DIRTY is set, write buf to disk, clear B_DIRTY, set B_VALID. +// Else if B_VALID is not set, read buf from disk, set B_VALID. +void +iderw(struct buf *b) +{ + uchar *p; + + if(!(b->flags & B_BUSY)) + panic("iderw: buf not busy"); + if((b->flags & (B_VALID|B_DIRTY)) == B_VALID) + panic("iderw: nothing to do"); + if(b->dev != 1) + panic("iderw: request not for disk 1"); + if(b->sector >= disksize) + panic("iderw: sector out of range"); + + p = memdisk + b->sector*512; + + if(b->flags & B_DIRTY){ + b->flags &= ~B_DIRTY; + memmove(p, b->data, 512); + } else + memmove(b->data, p, 512); + b->flags |= B_VALID; +} diff --git a/mkfs.c b/mkfs.c index 3a3c62a616..f015edd628 100644 --- a/mkfs.c +++ b/mkfs.c @@ -4,11 +4,15 @@ #include #include #include + +#define stat xv6_stat // avoid clash with host struct stat #include "types.h" #include "fs.h" #include "stat.h" +#include "param.h" -int nblocks = 995; +int nblocks = 985; +int nlog = LOGSIZE; int ninodes = 200; int size = 1024; @@ -33,7 +37,7 @@ ushort xshort(ushort x) { ushort y; - uchar *a = (uchar*) &y; + uchar *a = (uchar*)&y; a[0] = x; a[1] = x >> 8; return y; @@ -43,7 +47,7 @@ uint xint(uint x) { uint y; - uchar *a = (uchar*) &y; + uchar *a = (uchar*)&y; a[0] = x; a[1] = x >> 8; a[2] = x >> 16; @@ -77,20 +81,23 @@ main(int argc, char *argv[]) sb.size = xint(size); sb.nblocks = xint(nblocks); // so whole disk is size sectors sb.ninodes = xint(ninodes); + sb.nlog = xint(nlog); bitblocks = size/(512*8) + 1; usedblocks = ninodes / IPB + 3 + bitblocks; freeblock = usedblocks; - printf("used %d (bit %d ninode %lu) free %u total %d\n", usedblocks, - bitblocks, ninodes/IPB + 1, freeblock, nblocks+usedblocks); + printf("used %d (bit %d ninode %zu) free %u log %u total %d\n", usedblocks, + bitblocks, ninodes/IPB + 1, freeblock, nlog, nblocks+usedblocks+nlog); - assert(nblocks + usedblocks == size); + assert(nblocks + usedblocks + nlog == size); - for(i = 0; i < nblocks + usedblocks; i++) + for(i = 0; i < nblocks + usedblocks + nlog; i++) wsect(i, zeroes); - wsect(1, &sb); + memset(buf, 0, sizeof(buf)); + memmove(buf, &sb, sizeof(sb)); + wsect(1, buf); rootino = ialloc(T_DIR); assert(rootino == ROOTINO); @@ -173,7 +180,7 @@ winode(uint inum, struct dinode *ip) bn = i2b(inum); rsect(bn, buf); - dip = ((struct dinode*) buf) + (inum % IPB); + dip = ((struct dinode*)buf) + (inum % IPB); *dip = *ip; wsect(bn, buf); } @@ -187,7 +194,7 @@ rinode(uint inum, struct dinode *ip) bn = i2b(inum); rsect(bn, buf); - dip = ((struct dinode*) buf) + (inum % IPB); + dip = ((struct dinode*)buf) + (inum % IPB); *ip = *dip; } @@ -225,12 +232,12 @@ balloc(int used) int i; printf("balloc: first %d blocks have been allocated\n", used); - assert(used < 512); + assert(used < 512*8); bzero(buf, 512); - for(i = 0; i < used; i++) { + for(i = 0; i < used; i++){ buf[i/8] = buf[i/8] | (0x1 << (i%8)); } - printf("balloc: write bitmap block at sector %lu\n", ninodes/IPB + 3); + printf("balloc: write bitmap block at sector %zu\n", ninodes/IPB + 3); wsect(ninodes / IPB + 3, buf); } @@ -239,7 +246,7 @@ balloc(int used) void iappend(uint inum, void *xp, int n) { - char *p = (char*) xp; + char *p = (char*)xp; uint fbn, off, n1; struct dinode din; char buf[512]; @@ -252,24 +259,24 @@ iappend(uint inum, void *xp, int n) while(n > 0){ fbn = off / 512; assert(fbn < MAXFILE); - if(fbn < NDIRECT) { - if(xint(din.addrs[fbn]) == 0) { + if(fbn < NDIRECT){ + if(xint(din.addrs[fbn]) == 0){ din.addrs[fbn] = xint(freeblock++); usedblocks++; } x = xint(din.addrs[fbn]); } else { - if(xint(din.addrs[NDIRECT]) == 0) { + if(xint(din.addrs[NDIRECT]) == 0){ // printf("allocate indirect block\n"); din.addrs[NDIRECT] = xint(freeblock++); usedblocks++; } // printf("read indirect block\n"); - rsect(xint(din.addrs[NDIRECT]), (char*) indirect); - if(indirect[fbn - NDIRECT] == 0) { + rsect(xint(din.addrs[NDIRECT]), (char*)indirect); + if(indirect[fbn - NDIRECT] == 0){ indirect[fbn - NDIRECT] = xint(freeblock++); usedblocks++; - wsect(xint(din.addrs[NDIRECT]), (char*) indirect); + wsect(xint(din.addrs[NDIRECT]), (char*)indirect); } x = xint(indirect[fbn-NDIRECT]); } diff --git a/mmu.h b/mmu.h index db40f2588f..2d88a52128 100644 --- a/mmu.h +++ b/mmu.h @@ -24,6 +24,20 @@ #define FL_VIP 0x00100000 // Virtual Interrupt Pending #define FL_ID 0x00200000 // ID flag +// Control Register flags +#define CR0_PE 0x00000001 // Protection Enable +#define CR0_MP 0x00000002 // Monitor coProcessor +#define CR0_EM 0x00000004 // Emulation +#define CR0_TS 0x00000008 // Task Switched +#define CR0_ET 0x00000010 // Extension Type +#define CR0_NE 0x00000020 // Numeric Errror +#define CR0_WP 0x00010000 // Write Protect +#define CR0_AM 0x00040000 // Alignment Mask +#define CR0_NW 0x20000000 // Not Writethrough +#define CR0_CD 0x40000000 // Cache Disable +#define CR0_PG 0x80000000 // Paging + +//PAGEBREAK! // Segment Descriptor struct segdesc { uint lim_15_0 : 16; // Low bits of segment limit @@ -46,7 +60,6 @@ struct segdesc { { ((lim) >> 12) & 0xffff, (uint)(base) & 0xffff, \ ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \ (uint)(lim) >> 28, 0, 0, 1, 1, (uint)(base) >> 24 } - #define SEG16(type, base, lim, dpl) (struct segdesc) \ { (lim) & 0xffff, (uint)(base) & 0xffff, \ ((uint)(base) >> 16) & 0xff, type, 1, dpl, 1, \ @@ -62,8 +75,6 @@ struct segdesc { #define STA_R 0x2 // Readable (executable segments) #define STA_A 0x1 // Accessed -// - // System segment type bits #define STS_T16A 0x1 // Available 16-bit TSS #define STS_LDT 0x2 // Local Descriptor Table @@ -78,7 +89,6 @@ struct segdesc { #define STS_IG32 0xE // 32-bit Interrupt Gate #define STS_TG32 0xF // 32-bit Trap Gate - // A linear address 'la' has a three-part structure as follows: // // +--------10------+-------10-------+---------12----------+ @@ -88,18 +98,18 @@ struct segdesc { // \--- PDX(la) --/ \--- PTX(la) --/ // page directory index -#define PDX(la) ((((uint) (la)) >> PDXSHIFT) & 0x3FF) +#define PDX(la) (((uint)(la) >> PDXSHIFT) & 0x3FF) // page table index -#define PTX(la) ((((uint) (la)) >> PTXSHIFT) & 0x3FF) +#define PTX(la) (((uint)(la) >> PTXSHIFT) & 0x3FF) // construct linear address from indexes and offset -#define PGADDR(d, t, o) ((uint) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) +#define PGADDR(d, t, o) ((uint)((d) << PDXSHIFT | (t) << PTXSHIFT | (o))) // turn a kernel linear address into a physical address. // all of the kernel data structures have linear and // physical addresses that are equal. -#define PADDR(a) ((uint) a) +#define PADDR(a) ((uint)(a)) // Page directory and page table constants. #define NPDENTRIES 1024 // page directory entries per page directory @@ -126,25 +136,10 @@ struct segdesc { #define PTE_MBZ 0x180 // Bits must be zero // Address in page table or page directory entry -#define PTE_ADDR(pte) ((uint) (pte) & ~0xFFF) +#define PTE_ADDR(pte) ((uint)(pte) & ~0xFFF) typedef uint pte_t; -// Control Register flags -#define CR0_PE 0x00000001 // Protection Enable -#define CR0_MP 0x00000002 // Monitor coProcessor -#define CR0_EM 0x00000004 // Emulation -#define CR0_TS 0x00000008 // Task Switched -#define CR0_ET 0x00000010 // Extension Type -#define CR0_NE 0x00000020 // Numeric Errror -#define CR0_WP 0x00010000 // Write Protect -#define CR0_AM 0x00040000 // Alignment Mask -#define CR0_NW 0x20000000 // Not Writethrough -#define CR0_CD 0x40000000 // Cache Disable -#define CR0_PG 0x80000000 // Paging - - -// PAGEBREAK: 40 // Task state segment format struct taskstate { uint link; // Old ts selector @@ -210,7 +205,7 @@ struct gatedesc { // this interrupt/trap gate explicitly using an int instruction. #define SETGATE(gate, istrap, sel, off, d) \ { \ - (gate).off_15_0 = (uint) (off) & 0xffff; \ + (gate).off_15_0 = (uint)(off) & 0xffff; \ (gate).cs = (sel); \ (gate).args = 0; \ (gate).rsv1 = 0; \ @@ -218,6 +213,6 @@ struct gatedesc { (gate).s = 0; \ (gate).dpl = (d); \ (gate).p = 1; \ - (gate).off_31_16 = (uint) (off) >> 16; \ + (gate).off_31_16 = (uint)(off) >> 16; \ } diff --git a/mp.c b/mp.c index d2f828adab..5ab348ef9e 100644 --- a/mp.c +++ b/mp.c @@ -39,7 +39,6 @@ mpsearch1(uchar *addr, int len) { uchar *e, *p; - cprintf("mpsearch1 0x%x %d\n", addr, len); e = addr+len; for(p = addr; p < e; p += sizeof(struct mp)) if(memcmp(p, "_MP_", 4) == 0 && sum(p, sizeof(struct mp)) == 0) @@ -113,9 +112,9 @@ mpinit(void) switch(*p){ case MPPROC: proc = (struct mpproc*)p; - if(ncpu != proc->apicid) { - cprintf("mpinit: ncpu=%d apicpid=%d", ncpu, proc->apicid); - panic("mpinit"); + if(ncpu != proc->apicid){ + cprintf("mpinit: ncpu=%d apicid=%d\n", ncpu, proc->apicid); + ismp = 0; } if(proc->flags & MPBOOT) bcpu = &cpus[ncpu]; @@ -135,9 +134,17 @@ mpinit(void) continue; default: cprintf("mpinit: unknown config type %x\n", *p); - panic("mpinit"); + ismp = 0; } } + if(!ismp){ + // Didn't like what we found; fall back to no MP. + ncpu = 1; + lapic = 0; + ioapicid = 0; + return; + } + if(mp->imcrp){ // Bochs doesn't support IMCR, so this doesn't run on Bochs. // But it would on real hardware. diff --git a/multiboot.S b/multiboot.S new file mode 100644 index 0000000000..2579b6d9ae --- /dev/null +++ b/multiboot.S @@ -0,0 +1,75 @@ +# Multiboot header, for multiboot boot loaders like GNU Grub. +# http://www.gnu.org/software/grub/manual/multiboot/multiboot.html +# +# Using GRUB 2, you can boot xv6 from a file stored in a +# Linux file system by copying kernel or kernelmemfs to /boot +# and then adding this menu entry: +# +# menuentry "xv6" { +# insmod ext2 +# set root='(hd0,msdos1)' +# set kernel='/boot/kernel' +# echo "Loading ${kernel}..." +# multiboot ${kernel} ${kernel} +# boot +# } + +#include "asm.h" + +#define STACK 4096 + +#define SEG_KCODE 1 // kernel code +#define SEG_KDATA 2 // kernel data+stack + +# Multiboot header. Data to direct multiboot loader. +.p2align 2 +.text +.globl multiboot_header +multiboot_header: + #define magic 0x1badb002 + #define flags (1<<16 | 1<<0) + .long magic + .long flags + .long (-magic-flags) + .long multiboot_header # beginning of image + .long multiboot_header + .long edata + .long end + .long multiboot_entry + +# Multiboot entry point. Machine is mostly set up. +# Configure the GDT to match the environment that our usual +# boot loader - bootasm.S - sets up. +.globl multiboot_entry +multiboot_entry: + lgdt gdtdesc + ljmp $(SEG_KCODE<<3), $mbstart32 + +mbstart32: + # Set up the protected-mode data segment registers + movw $(SEG_KDATA<<3), %ax # Our data segment selector + movw %ax, %ds # -> DS: Data Segment + movw %ax, %es # -> ES: Extra Segment + movw %ax, %ss # -> SS: Stack Segment + movw $0, %ax # Zero segments not ready for use + movw %ax, %fs # -> FS + movw %ax, %gs # -> GS + + # Set up the stack pointer and call into C. + movl $(stack + STACK), %esp + call main +spin: + jmp spin + +# Bootstrap GDT +.p2align 2 # force 4 byte alignment +gdt: + SEG_NULLASM # null seg + SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg + SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg + +gdtdesc: + .word (gdtdesc - gdt - 1) # sizeof(gdt) - 1 + .long gdt # address gdt + +.comm stack, STACK diff --git a/param.h b/param.h index 48c3352c5d..ab1b9fe921 100644 --- a/param.h +++ b/param.h @@ -7,4 +7,8 @@ #define NINODE 50 // maximum number of active i-nodes #define NDEV 10 // maximum major device number #define ROOTDEV 1 // device number of file system root disk +#define USERTOP 0xA0000 // end of user address space #define PHYSTOP 0x1000000 // use phys mem up to here as free pool +#define MAXARG 32 // max exec arguments +#define LOGSIZE 10 // size of log + diff --git a/picirq.c b/picirq.c index 1230c13187..ff86831311 100644 --- a/picirq.c +++ b/picirq.c @@ -82,32 +82,3 @@ picinit(void) if(irqmask != 0xFFFF) picsetmask(irqmask); } - - - - - - - - - - - - - - - - - - - - - - - - - - - - -// Blank page. diff --git a/pipe.c b/pipe.c index bc847b9e8a..f76ed5c557 100644 --- a/pipe.c +++ b/pipe.c @@ -66,7 +66,7 @@ pipeclose(struct pipe *p, int writable) p->readopen = 0; wakeup(&p->nwrite); } - if(p->readopen == 0 && p->writeopen == 0) { + if(p->readopen == 0 && p->writeopen == 0){ release(&p->lock); kfree((char*)p); } else @@ -81,7 +81,7 @@ pipewrite(struct pipe *p, char *addr, int n) acquire(&p->lock); for(i = 0; i < n; i++){ - while(p->nwrite == p->nread + PIPESIZE) { //DOC: pipewrite-full + while(p->nwrite == p->nread + PIPESIZE){ //DOC: pipewrite-full if(p->readopen == 0 || proc->killed){ release(&p->lock); return -1; diff --git a/proc.c b/proc.c index 5ac27800ed..eb334d0086 100644 --- a/proc.c +++ b/proc.c @@ -17,53 +17,18 @@ int nextpid = 1; extern void forkret(void); extern void trapret(void); +static void wakeup1(void *chan); + void pinit(void) { initlock(&ptable.lock, "ptable"); } -//PAGEBREAK: 36 -// Print a process listing to console. For debugging. -// Runs when user types ^P on console. -// No lock to avoid wedging a stuck machine further. -void -procdump(void) -{ - static char *states[] = { - [UNUSED] "unused", - [EMBRYO] "embryo", - [SLEEPING] "sleep ", - [RUNNABLE] "runble", - [RUNNING] "run ", - [ZOMBIE] "zombie" - }; - int i; - struct proc *p; - char *state; - uint pc[10]; - - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->state == UNUSED) - continue; - if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) - state = states[p->state]; - else - state = "???"; - cprintf("%d %s %s", p->pid, state, p->name); - if(p->state == SLEEPING){ - getcallerpcs((uint*)p->context->ebp+2, pc); - for(i=0; i<10 && pc[i] != 0; i++) - cprintf(" %p", pc[i]); - } - cprintf("\n"); - } -} - - //PAGEBREAK: 32 // Look in the process table for an UNUSED proc. -// If found, change state to EMBRYO and return it. +// If found, change state to EMBRYO and initialize +// state required to run in the kernel. // Otherwise return 0. static struct proc* allocproc(void) @@ -95,7 +60,7 @@ allocproc(void) p->tf = (struct trapframe*)sp; // Set up new context to start executing at forkret, - // which returns to trapret (see below). + // which returns to trapret. sp -= 4; *(uint*)sp = (uint)trapret; @@ -103,6 +68,7 @@ allocproc(void) p->context = (struct context*)sp; memset(p->context, 0, sizeof *p->context); p->context->eip = (uint)forkret; + return p; } @@ -116,12 +82,10 @@ userinit(void) p = allocproc(); initproc = p; - if (!(p->pgdir = setupkvm())) + if((p->pgdir = setupkvm()) == 0) panic("userinit: out of memory?"); - if (!allocuvm(p->pgdir, 0x0, (int)_binary_initcode_size)) - panic("userinit: out of memory?"); - inituvm(p->pgdir, 0x0, _binary_initcode_start, (int)_binary_initcode_size); - p->sz = PGROUNDUP((int)_binary_initcode_size); + inituvm(p->pgdir, _binary_initcode_start, (int)_binary_initcode_size); + p->sz = PGSIZE; memset(p->tf, 0, sizeof(*p->tf)); p->tf->cs = (SEG_UCODE << 3) | DPL_USER; p->tf->ds = (SEG_UDATA << 3) | DPL_USER; @@ -142,14 +106,17 @@ userinit(void) int growproc(int n) { + uint sz; + + sz = proc->sz; if(n > 0){ - if (!allocuvm(proc->pgdir, (char *)proc->sz, n)) + if((sz = allocuvm(proc->pgdir, sz, sz + n)) == 0) return -1; } else if(n < 0){ - if (!deallocuvm(proc->pgdir, (char *)(proc->sz + n), 0 - n)) + if((sz = deallocuvm(proc->pgdir, sz, sz + n)) == 0) return -1; } - proc->sz += n; + proc->sz = sz; switchuvm(proc); return 0; } @@ -168,7 +135,7 @@ fork(void) return -1; // Copy process state from p. - if (!(np->pgdir = copyuvm(proc->pgdir, proc->sz))) { + if((np->pgdir = copyuvm(proc->pgdir, proc->sz)) == 0){ kfree(np->kstack); np->kstack = 0; np->state = UNUSED; @@ -192,6 +159,92 @@ fork(void) return pid; } +// Exit the current process. Does not return. +// An exited process remains in the zombie state +// until its parent calls wait() to find out it exited. +void +exit(void) +{ + struct proc *p; + int fd; + + if(proc == initproc) + panic("init exiting"); + + // Close all open files. + for(fd = 0; fd < NOFILE; fd++){ + if(proc->ofile[fd]){ + fileclose(proc->ofile[fd]); + proc->ofile[fd] = 0; + } + } + + iput(proc->cwd); + proc->cwd = 0; + + acquire(&ptable.lock); + + // Parent might be sleeping in wait(). + wakeup1(proc->parent); + + // Pass abandoned children to init. + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->parent == proc){ + p->parent = initproc; + if(p->state == ZOMBIE) + wakeup1(initproc); + } + } + + // Jump into the scheduler, never to return. + proc->state = ZOMBIE; + sched(); + panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int +wait(void) +{ + struct proc *p; + int havekids, pid; + + acquire(&ptable.lock); + for(;;){ + // Scan through table looking for zombie children. + havekids = 0; + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ + if(p->parent != proc) + continue; + havekids = 1; + if(p->state == ZOMBIE){ + // Found one. + pid = p->pid; + kfree(p->kstack); + p->kstack = 0; + freevm(p->pgdir); + p->state = UNUSED; + p->pid = 0; + p->parent = 0; + p->name[0] = 0; + p->killed = 0; + release(&ptable.lock); + return pid; + } + } + + // No point waiting if we don't have any children. + if(!havekids || proc->killed){ + release(&ptable.lock); + return -1; + } + + // Wait for children to exit. (See wakeup1 call in proc_exit.) + sleep(proc, &ptable.lock); //DOC: wait-sleep + } +} + //PAGEBREAK: 42 // Per-CPU process scheduler. // Each CPU calls scheduler() after setting itself up. @@ -356,89 +409,41 @@ kill(int pid) return -1; } -// Exit the current process. Does not return. -// An exited process remains in the zombie state -// until its parent calls wait() to find out it exited. +//PAGEBREAK: 36 +// Print a process listing to console. For debugging. +// Runs when user types ^P on console. +// No lock to avoid wedging a stuck machine further. void -exit(void) +procdump(void) { + static char *states[] = { + [UNUSED] "unused", + [EMBRYO] "embryo", + [SLEEPING] "sleep ", + [RUNNABLE] "runble", + [RUNNING] "run ", + [ZOMBIE] "zombie" + }; + int i; struct proc *p; - int fd; - - if(proc == initproc) - panic("init exiting"); - - // Close all open files. - for(fd = 0; fd < NOFILE; fd++){ - if(proc->ofile[fd]){ - fileclose(proc->ofile[fd]); - proc->ofile[fd] = 0; - } - } - - iput(proc->cwd); - proc->cwd = 0; - - acquire(&ptable.lock); - - // Parent might be sleeping in wait(). - wakeup1(proc->parent); - - // Pass abandoned children to init. + char *state; + uint pc[10]; + for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->parent == proc){ - p->parent = initproc; - if(p->state == ZOMBIE) - wakeup1(initproc); + if(p->state == UNUSED) + continue; + if(p->state >= 0 && p->state < NELEM(states) && states[p->state]) + state = states[p->state]; + else + state = "???"; + cprintf("%d %s %s", p->pid, state, p->name); + if(p->state == SLEEPING){ + getcallerpcs((uint*)p->context->ebp+2, pc); + for(i=0; i<10 && pc[i] != 0; i++) + cprintf(" %p", pc[i]); } + cprintf("\n"); } - - // Jump into the scheduler, never to return. - proc->state = ZOMBIE; - sched(); - panic("zombie exit"); } -// Wait for a child process to exit and return its pid. -// Return -1 if this process has no children. -int -wait(void) -{ - struct proc *p; - int havekids, pid; - - acquire(&ptable.lock); - for(;;){ - // Scan through table looking for zombie children. - havekids = 0; - for(p = ptable.proc; p < &ptable.proc[NPROC]; p++){ - if(p->parent != proc) - continue; - havekids = 1; - if(p->state == ZOMBIE){ - // Found one. - pid = p->pid; - kfree(p->kstack); - p->kstack = 0; - freevm(p->pgdir); - p->state = UNUSED; - p->pid = 0; - p->parent = 0; - p->name[0] = 0; - p->killed = 0; - release(&ptable.lock); - return pid; - } - } - - // No point waiting if we don't have any children. - if(!havekids || proc->killed){ - release(&ptable.lock); - return -1; - } - - // Wait for children to exit. (See wakeup1 call in proc_exit.) - sleep(proc, &ptable.lock); //DOC: wait-sleep - } -} diff --git a/proc.h b/proc.h index 7d97dfa6b7..7ffaffb6f0 100644 --- a/proc.h +++ b/proc.h @@ -8,6 +8,36 @@ #define SEG_TSS 6 // this process's task state #define NSEGS 7 +// Per-CPU state +struct cpu { + uchar id; // Local APIC ID; index into cpus[] below + struct context *scheduler; // swtch() here to enter scheduler + struct taskstate ts; // Used by x86 to find stack for interrupt + struct segdesc gdt[NSEGS]; // x86 global descriptor table + volatile uint booted; // Has the CPU started? + int ncli; // Depth of pushcli nesting. + int intena; // Were interrupts enabled before pushcli? + + // Cpu-local storage variables; see below + struct cpu *cpu; + struct proc *proc; // The currently-running process. +}; + +extern struct cpu cpus[NCPU]; +extern int ncpu; + +// Per-CPU variables, holding pointers to the +// current cpu and to the current process. +// The asm suffix tells gcc to use "%gs:0" to refer to cpu +// and "%gs:4" to refer to proc. seginit sets up the +// %gs segment register so that %gs refers to the memory +// holding those two variables in the local cpu's struct cpu. +// This is similar to how thread-local variables are implemented +// in thread libraries such as Linux pthreads. +extern struct cpu *cpu asm("%gs:0"); // &cpus[cpunum()] +extern struct proc *proc asm("%gs:4"); // cpus[cpunum()].proc + +//PAGEBREAK: 17 // Saved registers for kernel context switches. // Don't need to save all the segment registers (%cs, etc), // because they are constant across kernel contexts. @@ -31,13 +61,13 @@ enum procstate { UNUSED, EMBRYO, SLEEPING, RUNNABLE, RUNNING, ZOMBIE }; // Per-process state struct proc { uint sz; // Size of process memory (bytes) - pde_t* pgdir; // Linear address of proc's pgdir + pde_t* pgdir; // Page table char *kstack; // Bottom of kernel stack for this process enum procstate state; // Process state volatile int pid; // Process ID struct proc *parent; // Parent process struct trapframe *tf; // Trap frame for current syscall - struct context *context; // Switch here to run process + struct context *context; // swtch() here to run process void *chan; // If non-zero, sleeping on chan int killed; // If non-zero, have been killed struct file *ofile[NOFILE]; // Open files @@ -48,35 +78,5 @@ struct proc { // Process memory is laid out contiguously, low addresses first: // text // original data and bss -// invalid page // fixed-size stack // expandable heap - -// Per-CPU state -struct cpu { - uchar id; // Local APIC ID; index into cpus[] below - struct context *scheduler; // Switch here to enter scheduler - struct taskstate ts; // Used by x86 to find stack for interrupt - struct segdesc gdt[NSEGS]; // x86 global descriptor table - volatile uint booted; // Has the CPU started? - int ncli; // Depth of pushcli nesting. - int intena; // Were interrupts enabled before pushcli? - - // Cpu-local storage variables; see below - struct cpu *cpu; - struct proc *proc; -}; - -extern struct cpu cpus[NCPU]; -extern int ncpu; - -// Per-CPU variables, holding pointers to the -// current cpu and to the current process. -// The asm suffix tells gcc to use "%gs:0" to refer to cpu -// and "%gs:4" to refer to proc. ksegment sets up the -// %gs segment register so that %gs refers to the memory -// holding those two variables in the local cpu's struct cpu. -// This is similar to how thread-local variables are implemented -// in thread libraries such as Linux pthreads. -extern struct cpu *cpu asm("%gs:0"); // This cpu. -extern struct proc *proc asm("%gs:4"); // Current proc on this cpu. diff --git a/runoff b/runoff index 21ee8ed5eb..2b48cd50b2 100755 --- a/runoff +++ b/runoff @@ -58,6 +58,13 @@ perl -e ' next; } + if(/sheet1: (left|right)$/){ + print STDERR "assuming that sheet 1 is a $1 page. double-check!\n"; + $left = $1 eq "left" ? "13579" : "02468"; + $right = $1 eq "left" ? "02468" : "13579"; + next; + } + if(/even: (.*)/){ $file = $1; if(!defined($toc{$file})){ @@ -89,18 +96,13 @@ perl -e ' print STDERR "Have no toc for $file\n"; next; } - # this assumes that sheet 1 of code is a left page - # double-check the PDF - if(!$leftwarn++) { - print STDERR "assuming that sheet 1 is a left page. double-check!\n"; - } - if($what eq "left" && !($toc{$file} =~ /^\d[13579]0/)){ - print STDERR "$file does not start on a fresh left page [$toc{$file}]\n"; + if($what eq "left" && !($toc{$file} =~ /^\d[$left][05]/)){ + print STDERR "$file does not start on a left page [$toc{$file}]\n"; } # why does this not work if I inline $x in the if? - $x = ($toc{$file} =~ /^\d[02468]0/); + $x = ($toc{$file} =~ /^\d[$right][05]/); if($what eq "right" && !$x){ - print STDERR "$file does not start on a fresh right page [$toc{$file}] [$x]\n"; + print STDERR "$file does not start on a right page [$toc{$file}] [$x]\n"; } next; } @@ -189,7 +191,9 @@ do uses=`egrep -h '([^a-zA-Z_0-9])'$i'($|[^a-zA-Z_0-9])' alltext | awk '{print $1}'` if [ "x$defs" != "x$uses" ]; then echo $i $defs - echo $uses |fmt -24 | sed 's/^/ /' + echo $uses |fmt -29 | sed 's/^/ /' +# else +# echo $i defined but not used >&2 fi done ) >refs diff --git a/runoff.list b/runoff.list index 3258398868..f0edaf0c61 100644 --- a/runoff.list +++ b/runoff.list @@ -22,8 +22,8 @@ proc.h proc.c swtch.S kalloc.c +data.S vm.c - # system calls traps.h vectors.pl @@ -46,11 +46,10 @@ file.c sysfile.c exec.c - - # pipes pipe.c + # string operations string.c @@ -65,6 +64,7 @@ kbd.c console.c timer.c uart.c +multiboot.S # user-level initcode.S @@ -73,3 +73,6 @@ init.c sh.c + + + diff --git a/runoff.spec b/runoff.spec index e4cfd426bd..4d00038a25 100644 --- a/runoff.spec +++ b/runoff.spec @@ -1,3 +1,16 @@ +sheet1: left + +# "left" and "right" specify which page of a two-page spread a file +# must start on. "left" means that a file must start on the first of +# the two pages. "right" means it must start on the second of the two +# pages. The file may start in either column. +# +# "even" and "odd" specify which column a file must start on. "even" +# means it must start in the left of the two columns (00). "odd" means it +# must start in the right of the two columns (50). +# +# You'd think these would be the other way around. + # types.h either # param.h either # defs.h either @@ -9,25 +22,36 @@ even: bootasm.S # mild preference even: bootother.S # mild preference -# bootmain.c either +even: bootmain.c # mild preference even: main.c # mp.c don't care at all # even: initcode.S # odd: init.c # spinlock.h either -# spinlock.c either -even: proc.h # mild preference +left: spinlock.h # mild preference +even: spinlock.h # mild preference + +# This gets struct proc and allocproc on the same spread +left: proc.h +even: proc.h # goal is to have two action-packed 2-page spreads, # one with -# ksegment usegment allocproc userinit growproc fork +# userinit growproc fork exit wait # and another with # scheduler sched yield forkret sleep wakeup1 wakeup right: proc.c # VERY important +even: proc.c # VERY important + +# A few more action packed spreads +# page table creation and process loading +# walkpgdir mappages setupkvm vmenable switch[ku]vm inituvm loaduvm +# process memory management +# allocuvm deallocuvm freevm +left: vm.c +odd: vm.c -# setjmp.S either -# vm.c either # kalloc.c either # syscall.h either @@ -45,15 +69,25 @@ right: proc.c # VERY important # file.h either # fs.h either # fsvar.h either -left: ide.c +# left: ide.c # mild preference +even: ide.c # odd: bio.c + +# with fs.c starting on 2nd column of a left page, we get these 2-page spreads: +# ialloc iupdate iget idup ilock iunlock iput iunlockput +# bmap itrunc stati readi writei +# namecmp dirlookup dirlink skipelem namex namei +# fielinit filealloc filedup fileclose filestat fileread filewrite +# starting on 2nd column of a right page is not terrible either odd: fs.c # VERY important +left: fs.c # mild preference # file.c either # exec.c either # sysfile.c either # even: pipe.c # mild preference # string.c either -left: kbd.h +# left: kbd.h # mild preference +even: kbd.h even: console.c odd: sh.c diff --git a/runoff1 b/runoff1 index ba42e8f6e4..532f844231 100755 --- a/runoff1 +++ b/runoff1 @@ -33,7 +33,7 @@ for($i=0; $i<@lines; ){ last if $i>=@lines; # If the rest of the file fits, use the whole thing. - if(@lines <= $i+50){ + if(@lines <= $i+50 && !grep { /PAGEBREAK/ } @lines){ $breakbefore = @lines; }else{ # Find a good next page break; diff --git a/spinlock.c b/spinlock.c index 68cfbe94af..e668598aa8 100644 --- a/spinlock.c +++ b/spinlock.c @@ -23,7 +23,7 @@ initlock(struct spinlock *lk, char *name) void acquire(struct spinlock *lk) { - pushcli(); + pushcli(); // disable interrupts to avoid deadlock. if(holding(lk)) panic("acquire"); @@ -71,7 +71,7 @@ getcallerpcs(void *v, uint pcs[]) ebp = (uint*)v - 2; for(i = 0; i < 10; i++){ - if(ebp == 0 || ebp < (uint *) 0x100000 || ebp == (uint*)0xffffffff) + if(ebp == 0 || ebp < (uint*)0x100000 || ebp == (uint*)0xffffffff) break; pcs[i] = ebp[1]; // saved %eip ebp = (uint*)ebp[0]; // saved %ebp diff --git a/stressfs.c b/stressfs.c index 21a5d165a7..5d4fee2112 100644 --- a/stressfs.c +++ b/stressfs.c @@ -14,21 +14,20 @@ int main(int argc, char *argv[]) { - int i; + int fd, i; + char path[] = "stressfs0"; + printf(1, "stressfs starting\n"); - for (i = 0; i < 4; i++) { - if (fork() > 0) { + for(i = 0; i < 4; i++) + if(fork() > 0) break; - } - } printf(1, "%d\n", i); - char path[] = "stressfs0"; path[8] += i; - int fd = open(path, O_CREATE | O_RDWR); - for (i = 0; i < 100; i++) + fd = open(path, O_CREATE | O_RDWR); + for(i = 0; i < 100; i++) printf(fd, "%d\n", i); close(fd); diff --git a/syscall.c b/syscall.c index 9296cffd5c..ce50a59b2d 100644 --- a/syscall.c +++ b/syscall.c @@ -22,8 +22,6 @@ fetchint(struct proc *p, uint addr, int *ip) return 0; } -// XXX should we copy the string? - // Fetch the nul-terminated string at addr from process p. // Doesn't actually copy the string - just sets *pp to point at it. // Returns length of string, not including nul. @@ -34,8 +32,8 @@ fetchstr(struct proc *p, uint addr, char **pp) if(addr >= p->sz) return -1; - *pp = (char *) addr; - ep = (char *) p->sz; + *pp = (char*)addr; + ep = (char*)p->sz; for(s = *pp; s < ep; s++) if(*s == 0) return s - *pp; @@ -46,8 +44,7 @@ fetchstr(struct proc *p, uint addr, char **pp) int argint(int n, int *ip) { - int x = fetchint(proc, proc->tf->esp + 4 + 4*n, ip); - return x; + return fetchint(proc, proc->tf->esp + 4 + 4*n, ip); } // Fetch the nth word-sized system call argument as a pointer @@ -60,10 +57,9 @@ argptr(int n, char **pp, int size) if(argint(n, &i) < 0) return -1; - if((uint)i >= proc->sz || (uint)i+size >= proc->sz) + if((uint)i >= proc->sz || (uint)i+size > proc->sz) return -1; - // *pp = proc->mem + i; // XXXXX - *pp = (char *) i; // XXXXX + *pp = (char*)i; return 0; } @@ -102,39 +98,52 @@ extern int sys_wait(void); extern int sys_write(void); extern int sys_uptime(void); +int +sys_init(void) +{ + initlog(); + return 0; +} + static int (*syscalls[])(void) = { -[SYS_chdir] sys_chdir, -[SYS_close] sys_close, -[SYS_dup] sys_dup, -[SYS_exec] sys_exec, -[SYS_exit] sys_exit, +[SYS_init] sys_init, [SYS_fork] sys_fork, -[SYS_fstat] sys_fstat, -[SYS_getpid] sys_getpid, -[SYS_kill] sys_kill, -[SYS_link] sys_link, -[SYS_mkdir] sys_mkdir, -[SYS_mknod] sys_mknod, -[SYS_open] sys_open, +[SYS_exit] sys_exit, +[SYS_wait] sys_wait, [SYS_pipe] sys_pipe, [SYS_read] sys_read, +[SYS_kill] sys_kill, +[SYS_exec] sys_exec, +[SYS_fstat] sys_fstat, +[SYS_chdir] sys_chdir, +[SYS_dup] sys_dup, +[SYS_getpid] sys_getpid, [SYS_sbrk] sys_sbrk, [SYS_sleep] sys_sleep, -[SYS_unlink] sys_unlink, -[SYS_wait] sys_wait, -[SYS_write] sys_write, [SYS_uptime] sys_uptime, +// File system calls that are run in a transaction: +[SYS_open] sys_open, +[SYS_write] sys_write, +[SYS_mknod] sys_mknod, +[SYS_unlink] sys_unlink, +[SYS_link] sys_link, +[SYS_mkdir] sys_mkdir, +[SYS_close] sys_close, }; void syscall(void) { int num; - + num = proc->tf->eax; - if(num >= 0 && num < NELEM(syscalls) && syscalls[num]) + if(num >= 0 && num < SYS_open && syscalls[num]) { + proc->tf->eax = syscalls[num](); + } else if (num >= SYS_open && num < NELEM(syscalls) && syscalls[num]) { + begin_trans(); proc->tf->eax = syscalls[num](); - else { + commit_trans(); + } else { cprintf("%d %s: unknown sys call %d\n", proc->pid, proc->name, num); proc->tf->eax = -1; diff --git a/syscall.h b/syscall.h index 3a0fbcad93..e9e43a21e4 100644 --- a/syscall.h +++ b/syscall.h @@ -1,22 +1,24 @@ // System call numbers +#define SYS_init 0 #define SYS_fork 1 #define SYS_exit 2 #define SYS_wait 3 #define SYS_pipe 4 -#define SYS_write 5 -#define SYS_read 6 -#define SYS_close 7 -#define SYS_kill 8 -#define SYS_exec 9 -#define SYS_open 10 -#define SYS_mknod 11 -#define SYS_unlink 12 -#define SYS_fstat 13 -#define SYS_link 14 -#define SYS_mkdir 15 -#define SYS_chdir 16 -#define SYS_dup 17 -#define SYS_getpid 18 -#define SYS_sbrk 19 -#define SYS_sleep 20 -#define SYS_uptime 21 +#define SYS_read 5 +#define SYS_kill 6 +#define SYS_exec 7 +#define SYS_fstat 8 +#define SYS_chdir 9 +#define SYS_dup 10 +#define SYS_getpid 11 +#define SYS_sbrk 12 +#define SYS_sleep 13 +#define SYS_uptime 14 + +#define SYS_open 15 +#define SYS_write 16 +#define SYS_mknod 17 +#define SYS_unlink 18 +#define SYS_link 19 +#define SYS_mkdir 20 +#define SYS_close 21 diff --git a/sysfile.c b/sysfile.c index 6b8eef4ba4..4235660fa5 100644 --- a/sysfile.c +++ b/sysfile.c @@ -344,11 +344,11 @@ sys_chdir(void) int sys_exec(void) { - char *path, *argv[20]; + char *path, *argv[MAXARG]; int i; uint uargv, uarg; - if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0) { + if(argstr(0, &path) < 0 || argint(1, (int*)&uargv) < 0){ return -1; } memset(argv, 0, sizeof(argv)); diff --git a/toc.ftr b/toc.ftr index 6ed7fe009b..5e159117d4 100644 --- a/toc.ftr +++ b/toc.ftr @@ -6,9 +6,8 @@ on the same line as the name, the line number (or, in a few cases, numbers) where the name is defined. Successive lines in an entry list the line numbers where the name is used. For example, this entry: - swtch 2208 - 0318 1928 1967 2207 - 2208 + swtch 2358 + 0317 2128 2166 2357 2358 -indicates that swtch is defined on line 2208 and is mentioned on five lines -on sheets 03, 19, and 22. +indicates that swtch is defined on line 2358 and is mentioned on five lines +on sheets 03, 21, and 23. diff --git a/trap.c b/trap.c index daee22f6e8..6651f8e5f3 100644 --- a/trap.c +++ b/trap.c @@ -59,6 +59,9 @@ trap(struct trapframe *tf) ideintr(); lapiceoi(); break; + case T_IRQ0 + IRQ_IDE+1: + // Bochs generates spurious IDE1 interrupts. + break; case T_IRQ0 + IRQ_KBD: kbdintr(); lapiceoi(); @@ -83,9 +86,10 @@ trap(struct trapframe *tf) panic("trap"); } // In user space, assume process misbehaved. - cprintf("pid %d %s: trap %d err %d on cpu %d eip 0x%x addr 0x%x--kill proc\n", + cprintf("pid %d %s: trap %d err %d on cpu %d " + "eip 0x%x addr 0x%x--kill proc\n", proc->pid, proc->name, tf->trapno, tf->err, cpu->id, tf->eip, - rcr2()); + rcr2()); proc->killed = 1; } diff --git a/ulib.c b/ulib.c index 0268c262e8..dbbcfcfb65 100644 --- a/ulib.c +++ b/ulib.c @@ -45,7 +45,7 @@ strchr(const char *s, char c) { for(; *s; s++) if(*s == c) - return (char*) s; + return (char*)s; return 0; } diff --git a/umalloc.c b/umalloc.c index 4984591218..a7e7d2cea8 100644 --- a/umalloc.c +++ b/umalloc.c @@ -26,7 +26,7 @@ free(void *ap) { Header *bp, *p; - bp = (Header*) ap - 1; + bp = (Header*)ap - 1; for(p = freep; !(bp > p && bp < p->s.ptr); p = p->s.ptr) if(p >= p->s.ptr && (bp > p || bp < p->s.ptr)) break; @@ -52,7 +52,7 @@ morecore(uint nu) if(nu < 4096) nu = 4096; p = sbrk(nu * sizeof(Header)); - if(p == (char*) -1) + if(p == (char*)-1) return 0; hp = (Header*)p; hp->s.size = nu; @@ -81,7 +81,7 @@ malloc(uint nbytes) p->s.size = nunits; } freep = prevp; - return (void*) (p + 1); + return (void*)(p + 1); } if(p == freep) if((p = morecore(nunits)) == 0) diff --git a/user.h b/user.h index 431428c17b..9e26cf13a7 100644 --- a/user.h +++ b/user.h @@ -18,10 +18,10 @@ int link(char*, char*); int mkdir(char*); int chdir(char*); int dup(int); -int getpid(); +int getpid(void); char* sbrk(int); int sleep(int); -int uptime(); +int uptime(void); // ulib.c int stat(char*, struct stat*); diff --git a/usertests.c b/usertests.c index 670a4a8e3d..296731ae48 100644 --- a/usertests.c +++ b/usertests.c @@ -3,6 +3,8 @@ #include "user.h" #include "fs.h" #include "fcntl.h" +#include "syscall.h" +#include "traps.h" char buf[2048]; char name[3]; @@ -45,12 +47,12 @@ writetest(void) printf(stdout, "error: creat small failed!\n"); exit(); } - for(i = 0; i < 100; i++) { - if(write(fd, "aaaaaaaaaa", 10) != 10) { + for(i = 0; i < 100; i++){ + if(write(fd, "aaaaaaaaaa", 10) != 10){ printf(stdout, "error: write aa %d new file failed\n", i); exit(); } - if(write(fd, "bbbbbbbbbb", 10) != 10) { + if(write(fd, "bbbbbbbbbb", 10) != 10){ printf(stdout, "error: write bb %d new file failed\n", i); exit(); } @@ -65,7 +67,7 @@ writetest(void) exit(); } i = read(fd, buf, 2000); - if(i == 2000) { + if(i == 2000){ printf(stdout, "read succeeded ok\n"); } else { printf(stdout, "read failed\n"); @@ -73,7 +75,7 @@ writetest(void) } close(fd); - if(unlink("small") < 0) { + if(unlink("small") < 0){ printf(stdout, "unlink small failed\n"); exit(); } @@ -93,9 +95,9 @@ writetest1(void) exit(); } - for(i = 0; i < MAXFILE; i++) { - ((int*) buf)[0] = i; - if(write(fd, buf, 512) != 512) { + for(i = 0; i < MAXFILE; i++){ + ((int*)buf)[0] = i; + if(write(fd, buf, 512) != 512){ printf(stdout, "error: write big file failed\n", i); exit(); } @@ -110,19 +112,19 @@ writetest1(void) } n = 0; - for(;;) { + for(;;){ i = read(fd, buf, 512); - if(i == 0) { - if(n == MAXFILE - 1) { + if(i == 0){ + if(n == MAXFILE - 1){ printf(stdout, "read only %d blocks from big", n); exit(); } break; - } else if(i != 512) { + } else if(i != 512){ printf(stdout, "read failed %d\n", i); exit(); } - if(((int*)buf)[0] != n) { + if(((int*)buf)[0] != n){ printf(stdout, "read content of block %d is %d\n", n, ((int*)buf)[0]); exit(); @@ -130,7 +132,7 @@ writetest1(void) n++; } close(fd); - if(unlink("big") < 0) { + if(unlink("big") < 0){ printf(stdout, "unlink big failed\n"); exit(); } @@ -146,14 +148,14 @@ createtest(void) name[0] = 'a'; name[2] = '\0'; - for(i = 0; i < 52; i++) { + for(i = 0; i < 52; i++){ name[1] = '0' + i; fd = open(name, O_CREATE|O_RDWR); close(fd); } name[0] = 'a'; name[2] = '\0'; - for(i = 0; i < 52; i++) { + for(i = 0; i < 52; i++){ name[1] = '0' + i; unlink(name); } @@ -164,22 +166,22 @@ void dirtest(void) { printf(stdout, "mkdir test\n"); - if(mkdir("dir0") < 0) { + if(mkdir("dir0") < 0){ printf(stdout, "mkdir failed\n"); exit(); } - if(chdir("dir0") < 0) { + if(chdir("dir0") < 0){ printf(stdout, "chdir dir0 failed\n"); exit(); } - if(chdir("..") < 0) { + if(chdir("..") < 0){ printf(stdout, "chdir .. failed\n"); exit(); } - if(unlink("dir0") < 0) { + if(unlink("dir0") < 0){ printf(stdout, "unlink dir0 failed\n"); exit(); } @@ -190,7 +192,7 @@ void exectest(void) { printf(stdout, "exec test\n"); - if(exec("echo", echoargv) < 0) { + if(exec("echo", echoargv) < 0){ printf(stdout, "exec echo failed\n"); exit(); } @@ -324,20 +326,21 @@ mem(void) void *m1, *m2; int pid, ppid; + printf(1, "mem test\n"); ppid = getpid(); if((pid = fork()) == 0){ m1 = 0; - while((m2 = malloc(10001)) != 0) { - *(char**) m2 = m1; + while((m2 = malloc(10001)) != 0){ + *(char**)m2 = m1; m1 = m2; } - while(m1) { + while(m1){ m2 = *(char**)m1; free(m1); m1 = m2; } m1 = malloc(1024*20); - if(m1 == 0) { + if(m1 == 0){ printf(1, "couldn't allocate mem?!!\n"); kill(ppid); exit(); @@ -1234,16 +1237,18 @@ forktest(void) void sbrktest(void) { - int pid; - char *oldbrk = sbrk(0); + int fds[2], pid, pids[32], ppid; + char *a, *b, *c, *lastaddr, *oldbrk, *p, scratch; + uint amt; printf(stdout, "sbrk test\n"); + oldbrk = sbrk(0); // can one sbrk() less than a page? - char *a = sbrk(0); + a = sbrk(0); int i; for(i = 0; i < 5000; i++){ - char *b = sbrk(1); + b = sbrk(1); if(b != a){ printf(stdout, "sbrk test failed %d %x %x\n", i, a, b); exit(); @@ -1256,7 +1261,7 @@ sbrktest(void) printf(stdout, "sbrk test fork failed\n"); exit(); } - char *c = sbrk(1); + c = sbrk(1); c = sbrk(1); if(c != a + 1){ printf(stdout, "sbrk test failed post-fork\n"); @@ -1268,18 +1273,18 @@ sbrktest(void) // can one allocate the full 640K? a = sbrk(0); - uint amt = (640 * 1024) - (uint) a; - char *p = sbrk(amt); + amt = (640 * 1024) - (uint)a; + p = sbrk(amt); if(p != a){ printf(stdout, "sbrk test failed 640K test, p %x a %x\n", p, a); exit(); } - char *lastaddr = (char *)(640 * 1024 - 1); + lastaddr = (char*)(640 * 1024 - 1); *lastaddr = 99; // is one forbidden from allocating more than 640K? c = sbrk(4096); - if(c != (char *) 0xffffffff){ + if(c != (char*)0xffffffff){ printf(stdout, "sbrk allocated more than 640K, c %x\n", c); exit(); } @@ -1287,7 +1292,7 @@ sbrktest(void) // can one de-allocate? a = sbrk(0); c = sbrk(-4096); - if(c == (char *) 0xffffffff){ + if(c == (char*)0xffffffff){ printf(stdout, "sbrk could not deallocate\n"); exit(); } @@ -1311,15 +1316,15 @@ sbrktest(void) } c = sbrk(4096); - if(c != (char *) 0xffffffff){ + if(c != (char*)0xffffffff){ printf(stdout, "sbrk was able to re-allocate beyond 640K, c %x\n", c); exit(); } // can we read the kernel's memory? - for(a = (char*)(640*1024); a < (char *)2000000; a += 50000){ - int ppid = getpid(); - int pid = fork(); + for(a = (char*)(640*1024); a < (char*)2000000; a += 50000){ + ppid = getpid(); + pid = fork(); if(pid < 0){ printf(stdout, "fork failed\n"); exit(); @@ -1332,6 +1337,38 @@ sbrktest(void) wait(); } + // if we run the system out of memory, does it clean up the last + // failed allocation? + sbrk(-(sbrk(0) - oldbrk)); + if(pipe(fds) != 0){ + printf(1, "pipe() failed\n"); + exit(); + } + for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){ + if((pids[i] = fork()) == 0){ + // allocate the full 640K + sbrk((640 * 1024) - (uint)sbrk(0)); + write(fds[1], "x", 1); + // sit around until killed + for(;;) sleep(1000); + } + if(pids[i] != -1) + read(fds[0], &scratch, 1); + } + // if those failed allocations freed up the pages they did allocate, + // we'll be able to allocate here + c = sbrk(4096); + for(i = 0; i < sizeof(pids)/sizeof(pids[0]); i++){ + if(pids[i] == -1) + continue; + kill(pids[i]); + wait(); + } + if(c == (char*)0xffffffff){ + printf(stdout, "failed sbrk leaked memory\n"); + exit(); + } + if(sbrk(0) > oldbrk) sbrk(-(sbrk(0) - oldbrk)); @@ -1339,26 +1376,89 @@ sbrktest(void) } void -stacktest(void) +validateint(int *p) { - printf(stdout, "stack test\n"); - char dummy = 1; - char *p = &dummy; - int ppid = getpid(); - int pid = fork(); - if(pid < 0){ - printf(stdout, "fork failed\n"); - exit(); + int res; + asm("mov %%esp, %%ebx\n\t" + "mov %3, %%esp\n\t" + "int %2\n\t" + "mov %%ebx, %%esp" : + "=a" (res) : + "a" (SYS_sleep), "n" (T_SYSCALL), "c" (p) : + "ebx"); +} + +void +validatetest(void) +{ + int hi, pid; + uint p; + + printf(stdout, "validate test\n"); + hi = 1100*1024; + + for(p = 0; p <= (uint)hi; p += 4096){ + if((pid = fork()) == 0){ + // try to crash the kernel by passing in a badly placed integer + validateint((int*)p); + exit(); + } + sleep(0); + sleep(0); + kill(pid); + wait(); + + // try to crash the kernel by passing in a bad string pointer + if(link("nosuchfile", (char*)p) != -1){ + printf(stdout, "link should not succeed\n"); + exit(); + } } + + printf(stdout, "validate ok\n"); +} + +// does unintialized data start out zero? +char uninit[10000]; +void +bsstest(void) +{ + int i; + + printf(stdout, "bss test\n"); + for(i = 0; i < sizeof(uninit); i++){ + if(uninit[i] != '\0'){ + printf(stdout, "bss test failed\n"); + exit(); + } + } + printf(stdout, "bss test ok\n"); +} + +// does exec do something sensible if the arguments +// are larger than a page? +void +bigargtest(void) +{ + int pid, ppid; + + ppid = getpid(); + pid = fork(); if(pid == 0){ - // should cause a trap: - p[-4096] = 'z'; - kill(ppid); - printf(stdout, "stack test failed: page before stack was writeable\n"); + char *args[32+1]; + int i; + for(i = 0; i < 32; i++) + args[i] = "bigargs test: failed\n "; + args[32] = 0; + printf(stdout, "bigarg test\n"); + exec("echo", args); + printf(stdout, "bigarg test ok\n"); + exit(); + } else if(pid < 0){ + printf(stdout, "bigargtest: fork failed\n"); exit(); } wait(); - printf(stdout, "stack test OK\n"); } int @@ -1372,8 +1472,10 @@ main(int argc, char *argv[]) } close(open("usertests.ran", O_CREATE)); - stacktest(); + bigargtest(); + bsstest(); sbrktest(); + validatetest(); opentest(); writetest(); diff --git a/vm.c b/vm.c index 46d18fcd73..1fe64d24fa 100644 --- a/vm.c +++ b/vm.c @@ -6,81 +6,86 @@ #include "proc.h" #include "elf.h" -// The mappings from logical to linear are one to one (i.e., -// segmentation doesn't do anything). -// There is one page table per process, plus one that's used -// when a CPU is not running any process (kpgdir). -// A user process uses the same page table as the kernel; the -// page protection bits prevent it from using anything other -// than its memory. -// -// setupkvm() and exec() set up every page table like this: -// 0..640K : user memory (text, data, stack, heap) -// 640K..1M : mapped direct (for IO space) -// 1M..kernend : mapped direct (for the kernel's text and data) -// kernend..PHYSTOP : mapped direct (kernel heap and user pages) -// 0xfe000000..0 : mapped direct (devices such as ioapic) -// -// The kernel allocates memory for its heap and for user memory -// between kernend and the end of physical memory (PHYSTOP). -// The virtual address space of each user program includes the kernel -// (which is inaccessible in user mode). The user program addresses -// range from 0 till 640KB (USERTOP), which where the I/O hole starts -// (both in physical memory and in the kernel's virtual address -// space). - -#define USERTOP 0xA0000 +extern char data[]; // defined in data.S -static uint kerntext; // Linker starts kernel at 1MB -static uint kerntsz; -static uint kerndata; -static uint kerndsz; -static uint kernend; -static uint freesz; static pde_t *kpgdir; // for use in scheduler() -// return the address of the PTE in page table pgdir -// that corresponds to linear address va. if create!=0, +// Allocate one page table for the machine for the kernel address +// space for scheduler processes. +void +kvmalloc(void) +{ + kpgdir = setupkvm(); +} + +// Set up CPU's kernel segment descriptors. +// Run once at boot time on each CPU. +void +seginit(void) +{ + struct cpu *c; + + // Map virtual addresses to linear addresses using identity map. + // Cannot share a CODE descriptor for both kernel and user + // because it would have to have DPL_USR, but the CPU forbids + // an interrupt from CPL=0 to DPL=3. + c = &cpus[cpunum()]; + c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); + c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); + c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); + c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); + + // Map cpu, and curproc + c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); + + lgdt(c->gdt, sizeof(c->gdt)); + loadgs(SEG_KCPU << 3); + + // Initialize cpu-local storage. + cpu = c; + proc = 0; +} + +// Return the address of the PTE in page table pgdir +// that corresponds to linear address va. If create!=0, // create any required page table pages. static pte_t * walkpgdir(pde_t *pgdir, const void *va, int create) { - uint r; pde_t *pde; pte_t *pgtab; pde = &pgdir[PDX(va)]; - if (*pde & PTE_P) { - pgtab = (pte_t*) PTE_ADDR(*pde); - } else if (!create || !(r = (uint) kalloc())) - return 0; - else { - pgtab = (pte_t*) r; - + if(*pde & PTE_P){ + pgtab = (pte_t*)PTE_ADDR(*pde); + } else { + if(!create || (pgtab = (pte_t*)kalloc()) == 0) + return 0; // Make sure all those PTE_P bits are zero. memset(pgtab, 0, PGSIZE); - // The permissions here are overly generous, but they can // be further restricted by the permissions in the page table // entries, if necessary. - *pde = PADDR(r) | PTE_P | PTE_W | PTE_U; + *pde = PADDR(pgtab) | PTE_P | PTE_W | PTE_U; } return &pgtab[PTX(va)]; } -// create PTEs for linear addresses starting at la that refer to +// Create PTEs for linear addresses starting at la that refer to // physical addresses starting at pa. la and size might not // be page-aligned. static int mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) { - char *first = PGROUNDDOWN(la); - char *last = PGROUNDDOWN(la + size - 1); - char *a = first; - while(1){ - pte_t *pte = walkpgdir(pgdir, a, 1); + char *a, *last; + pte_t *pte; + + a = PGROUNDDOWN(la); + last = PGROUNDDOWN(la + size - 1); + for(;;){ + pte = walkpgdir(pgdir, a, 1); if(pte == 0) - return 0; + return -1; if(*pte & PTE_P) panic("remap"); *pte = pa | perm | PTE_P; @@ -89,292 +94,274 @@ mappages(pde_t *pgdir, void *la, uint size, uint pa, int perm) a += PGSIZE; pa += PGSIZE; } - return 1; + return 0; } -// Set up CPU's kernel segment descriptors. -// Run once at boot time on each CPU. -void -ksegment(void) +// The mappings from logical to linear are one to one (i.e., +// segmentation doesn't do anything). +// There is one page table per process, plus one that's used +// when a CPU is not running any process (kpgdir). +// A user process uses the same page table as the kernel; the +// page protection bits prevent it from using anything other +// than its memory. +// +// setupkvm() and exec() set up every page table like this: +// 0..640K : user memory (text, data, stack, heap) +// 640K..1M : mapped direct (for IO space) +// 1M..end : mapped direct (for the kernel's text and data) +// end..PHYSTOP : mapped direct (kernel heap and user pages) +// 0xfe000000..0 : mapped direct (devices such as ioapic) +// +// The kernel allocates memory for its heap and for user memory +// between kernend and the end of physical memory (PHYSTOP). +// The virtual address space of each user program includes the kernel +// (which is inaccessible in user mode). The user program addresses +// range from 0 till 640KB (USERTOP), which where the I/O hole starts +// (both in physical memory and in the kernel's virtual address +// space). +static struct kmap { + void *p; + void *e; + int perm; +} kmap[] = { + {(void*)USERTOP, (void*)0x100000, PTE_W}, // I/O space + {(void*)0x100000, data, 0 }, // kernel text, rodata + {data, (void*)PHYSTOP, PTE_W}, // kernel data, memory + {(void*)0xFE000000, 0, PTE_W}, // device mappings +}; + +// Set up kernel part of a page table. +pde_t* +setupkvm(void) { - struct cpu *c; + pde_t *pgdir; + struct kmap *k; - // Map virtual addresses to linear addresses using identity map. - // Cannot share a CODE descriptor for both kernel and user - // because it would have to have DPL_USR, but the CPU forbids - // an interrupt from CPL=0 to DPL=3. - c = &cpus[cpunum()]; - c->gdt[SEG_KCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, 0); - c->gdt[SEG_KDATA] = SEG(STA_W, 0, 0xffffffff, 0); - c->gdt[SEG_UCODE] = SEG(STA_X|STA_R, 0, 0xffffffff, DPL_USER); - c->gdt[SEG_UDATA] = SEG(STA_W, 0, 0xffffffff, DPL_USER); + if((pgdir = (pde_t*)kalloc()) == 0) + return 0; + memset(pgdir, 0, PGSIZE); + k = kmap; + for(k = kmap; k < &kmap[NELEM(kmap)]; k++) + if(mappages(pgdir, k->p, k->e - k->p, (uint)k->p, k->perm) < 0) + return 0; - // map cpu, and curproc - c->gdt[SEG_KCPU] = SEG(STA_W, &c->cpu, 8, 0); + return pgdir; +} - lgdt(c->gdt, sizeof(c->gdt)); - loadgs(SEG_KCPU << 3); - - // Initialize cpu-local storage. - cpu = c; - proc = 0; +// Turn on paging. +void +vmenable(void) +{ + uint cr0; + + switchkvm(); // load kpgdir into cr3 + cr0 = rcr0(); + cr0 |= CR0_PG; + lcr0(cr0); } -// Switch h/w page table and TSS registers to point to process p. +// Switch h/w page table register to the kernel-only page table, +// for when no process is running. +void +switchkvm(void) +{ + lcr3(PADDR(kpgdir)); // switch to the kernel page table +} + +// Switch TSS and h/w page table to correspond to process p. void switchuvm(struct proc *p) { pushcli(); - - // Setup TSS cpu->gdt[SEG_TSS] = SEG16(STS_T32A, &cpu->ts, sizeof(cpu->ts)-1, 0); cpu->gdt[SEG_TSS].s = 0; cpu->ts.ss0 = SEG_KDATA << 3; cpu->ts.esp0 = (uint)proc->kstack + KSTACKSIZE; ltr(SEG_TSS << 3); - - if (p->pgdir == 0) - panic("switchuvm: no pgdir\n"); - + if(p->pgdir == 0) + panic("switchuvm: no pgdir"); lcr3(PADDR(p->pgdir)); // switch to new address space popcli(); } -// Switch h/w page table register to the kernel-only page table, for when -// no process is running. +// Load the initcode into address 0 of pgdir. +// sz must be less than a page. void -switchkvm() +inituvm(pde_t *pgdir, char *init, uint sz) { - lcr3(PADDR(kpgdir)); // Switch to the kernel page table + char *mem; + + if(sz >= PGSIZE) + panic("inituvm: more than a page"); + mem = kalloc(); + memset(mem, 0, PGSIZE); + mappages(pgdir, 0, PGSIZE, PADDR(mem), PTE_W|PTE_U); + memmove(mem, init, sz); } -// Set up kernel part of a page table. -pde_t* -setupkvm(void) +// Load a program segment into pgdir. addr must be page-aligned +// and the pages from addr to addr+sz must already be mapped. +int +loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) { - pde_t *pgdir; - - // Allocate page directory - if (!(pgdir = (pde_t *) kalloc())) - return 0; - memset(pgdir, 0, PGSIZE); - // Map IO space from 640K to 1Mbyte - if (!mappages(pgdir, (void *)USERTOP, 0x60000, USERTOP, PTE_W)) - return 0; - // Map kernel text read-only - if (!mappages(pgdir, (void *) kerntext, kerntsz, kerntext, 0)) - return 0; - // Map kernel data read/write - if (!mappages(pgdir, (void *) kerndata, kerndsz, kerndata, PTE_W)) - return 0; - // Map dynamically-allocated memory read/write (kernel stacks, user mem) - if (!mappages(pgdir, (void *) kernend, freesz, PADDR(kernend), PTE_W)) - return 0; - // Map devices such as ioapic, lapic, ... - if (!mappages(pgdir, (void *)0xFE000000, 0x2000000, 0xFE000000, PTE_W)) - return 0; - return pgdir; -} + uint i, pa, n; + pte_t *pte; -// return the physical address that a given user address -// maps to. the result is also a kernel logical address, -// since the kernel maps the physical memory allocated to user -// processes directly. -char* -uva2ka(pde_t *pgdir, char *uva) -{ - pte_t *pte = walkpgdir(pgdir, uva, 0); - if (pte == 0) return 0; - uint pa = PTE_ADDR(*pte); - return (char *)pa; + if((uint)addr % PGSIZE != 0) + panic("loaduvm: addr must be page aligned"); + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walkpgdir(pgdir, addr+i, 0)) == 0) + panic("loaduvm: address should exist"); + pa = PTE_ADDR(*pte); + if(sz - i < PGSIZE) + n = sz - i; + else + n = PGSIZE; + if(readi(ip, (char*)pa, offset+i, n) != n) + return -1; + } + return 0; } -// allocate sz bytes more memory for a process starting at the -// given user address; allocates physical memory and page -// table entries. addr and sz need not be page-aligned. -// it is a no-op for any parts of the requested memory -// that are already allocated. +// Allocate page tables and physical memory to grow process from oldsz to +// newsz, which need not be page aligned. Returns new size or 0 on error. int -allocuvm(pde_t *pgdir, char *addr, uint sz) +allocuvm(pde_t *pgdir, uint oldsz, uint newsz) { - if (addr + sz > (char*)USERTOP) + char *mem; + uint a; + + if(newsz > USERTOP) return 0; - char *first = PGROUNDDOWN(addr); - char *last = PGROUNDDOWN(addr + sz - 1); - char *a; - for(a = first; a <= last; a += PGSIZE){ - pte_t *pte = walkpgdir(pgdir, a, 0); - if(pte == 0 || (*pte & PTE_P) == 0){ - char *mem = kalloc(); - if(mem == 0){ - // XXX clean up? - return 0; - } - memset(mem, 0, PGSIZE); - mappages(pgdir, a, PGSIZE, PADDR(mem), PTE_W|PTE_U); + if(newsz < oldsz) + return oldsz; + + a = PGROUNDUP(oldsz); + for(; a < newsz; a += PGSIZE){ + mem = kalloc(); + if(mem == 0){ + cprintf("allocuvm out of memory\n"); + deallocuvm(pgdir, newsz, oldsz); + return 0; } + memset(mem, 0, PGSIZE); + mappages(pgdir, (char*)a, PGSIZE, PADDR(mem), PTE_W|PTE_U); } - return 1; + return newsz; } -// deallocate some of the user pages, in response to sbrk() -// with a negative argument. if addr is not page-aligned, -// then only deallocates starting at the next page boundary. +// Deallocate user pages to bring the process size from oldsz to +// newsz. oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz. oldsz can be larger than the actual +// process size. Returns the new process size. int -deallocuvm(pde_t *pgdir, char *addr, uint sz) +deallocuvm(pde_t *pgdir, uint oldsz, uint newsz) { - if (addr + sz > (char*)USERTOP) - return 0; - char *first = (char*) PGROUNDUP((uint)addr); - char *last = PGROUNDDOWN(addr + sz - 1); - char *a; - for(a = first; a <= last; a += PGSIZE){ - pte_t *pte = walkpgdir(pgdir, a, 0); + pte_t *pte; + uint a, pa; + + if(newsz >= oldsz) + return oldsz; + + a = PGROUNDUP(newsz); + for(; a < oldsz; a += PGSIZE){ + pte = walkpgdir(pgdir, (char*)a, 0); if(pte && (*pte & PTE_P) != 0){ - uint pa = PTE_ADDR(*pte); + pa = PTE_ADDR(*pte); if(pa == 0) - panic("deallocuvm"); - kfree((void *) pa); + panic("kfree"); + kfree((char*)pa); *pte = 0; } } - return 1; + return newsz; } -// free a page table and all the physical memory pages +// Free a page table and all the physical memory pages // in the user part. void freevm(pde_t *pgdir) { - uint i, j, da; - - if (!pgdir) - panic("freevm: no pgdir\n"); - for (i = 0; i < NPDENTRIES; i++) { - da = PTE_ADDR(pgdir[i]); - if (da != 0) { - pte_t *pgtab = (pte_t*) da; - for (j = 0; j < NPTENTRIES; j++) { - if (pgtab[j] != 0) { - uint pa = PTE_ADDR(pgtab[j]); - uint va = PGADDR(i, j, 0); - if (va < USERTOP) // user memory - kfree((void *) pa); - pgtab[j] = 0; - } - } - kfree((void *) da); - pgdir[i] = 0; - } - } - kfree((void *) pgdir); -} - -int -loaduvm(pde_t *pgdir, char *addr, struct inode *ip, uint offset, uint sz) -{ - uint i, pa, n; - pte_t *pte; - - if ((uint)addr % PGSIZE != 0) - panic("loaduvm: addr must be page aligned\n"); - for (i = 0; i < sz; i += PGSIZE) { - if (!(pte = walkpgdir(pgdir, addr+i, 0))) - panic("loaduvm: address should exist\n"); - pa = PTE_ADDR(*pte); - if (sz - i < PGSIZE) n = sz - i; - else n = PGSIZE; - if(readi(ip, (char *)pa, offset+i, n) != n) - return 0; + uint i; + + if(pgdir == 0) + panic("freevm: no pgdir"); + deallocuvm(pgdir, USERTOP, 0); + for(i = 0; i < NPDENTRIES; i++){ + if(pgdir[i] & PTE_P) + kfree((char*)PTE_ADDR(pgdir[i])); } - return 1; + kfree((char*)pgdir); } -void -inituvm(pde_t *pgdir, char *addr, char *init, uint sz) -{ - uint i, pa, n, off; - pte_t *pte; - - for (i = 0; i < sz; i += PGSIZE) { - if (!(pte = walkpgdir(pgdir, (void *)(i+addr), 0))) - panic("inituvm: pte should exist\n"); - off = (i+(uint)addr) % PGSIZE; - pa = PTE_ADDR(*pte); - if (sz - i < PGSIZE) n = sz - i; - else n = PGSIZE; - memmove((char *)pa+off, init+i, n); - } -} - -// given a parent process's page table, create a copy +// Given a parent process's page table, create a copy // of it for a child. pde_t* copyuvm(pde_t *pgdir, uint sz) { - pde_t *d = setupkvm(); + pde_t *d; pte_t *pte; uint pa, i; char *mem; - if (!d) return 0; - for (i = 0; i < sz; i += PGSIZE) { - if (!(pte = walkpgdir(pgdir, (void *)i, 0))) - panic("copyuvm: pte should exist\n"); - if(*pte & PTE_P){ - pa = PTE_ADDR(*pte); - if (!(mem = kalloc())) - return 0; - memmove(mem, (char *)pa, PGSIZE); - if (!mappages(d, (void *)i, PGSIZE, PADDR(mem), PTE_W|PTE_U)) - return 0; - } + if((d = setupkvm()) == 0) + return 0; + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walkpgdir(pgdir, (void*)i, 0)) == 0) + panic("copyuvm: pte should exist"); + if(!(*pte & PTE_P)) + panic("copyuvm: page not present"); + pa = PTE_ADDR(*pte); + if((mem = kalloc()) == 0) + goto bad; + memmove(mem, (char*)pa, PGSIZE); + if(mappages(d, (void*)i, PGSIZE, PADDR(mem), PTE_W|PTE_U) < 0) + goto bad; } return d; -} -// Gather information about physical memory layout. -// Called once during boot. -// Really should find out how much physical memory -// there is rather than assuming PHYSTOP. -void -pminit(void) -{ - extern char end[]; - struct proghdr *ph; - struct elfhdr *elf = (struct elfhdr*)0x10000; // scratch space - - if (elf->magic != ELF_MAGIC || elf->phnum != 2) - panic("pminit: need a text and data segment\n"); - - ph = (struct proghdr*)((uchar*)elf + elf->phoff); - kernend = ((uint)end + PGSIZE) & ~(PGSIZE-1); - kerntext = ph[0].va; - kerndata = ph[1].va; - kerntsz = ph[0].memsz; - kerndsz = ph[1].memsz; - freesz = PHYSTOP - kernend; - - kinit((char *)kernend, freesz); +bad: + freevm(d); + return 0; } -// Allocate one page table for the machine for the kernel address -// space for scheduler processes. -void -kvmalloc(void) +//PAGEBREAK! +// Map user virtual address to kernel physical address. +char* +uva2ka(pde_t *pgdir, char *uva) { - kpgdir = setupkvm(); + pte_t *pte; + + pte = walkpgdir(pgdir, uva, 0); + if((*pte & PTE_P) == 0) + return 0; + if((*pte & PTE_U) == 0) + return 0; + return (char*)PTE_ADDR(*pte); } -// Turn on paging. -void -vmenable(void) +// Copy len bytes from p to user address va in page table pgdir. +// Most useful when pgdir is not the current page table. +// uva2ka ensures this only works for PTE_U pages. +int +copyout(pde_t *pgdir, uint va, void *p, uint len) { - uint cr0; - - switchkvm(); // load kpgdir into cr3 - cr0 = rcr0(); - cr0 |= CR0_PG; - lcr0(cr0); + char *buf, *pa0; + uint n, va0; + + buf = (char*)p; + while(len > 0){ + va0 = (uint)PGROUNDDOWN(va); + pa0 = uva2ka(pgdir, (char*)va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (va - va0); + if(n > len) + n = len; + memmove(pa0 + (va - va0), buf, n); + len -= n; + buf += n; + va = va0 + PGSIZE; + } + return 0; } - diff --git a/web/Makefile b/web/Makefile deleted file mode 100644 index 107d6830c2..0000000000 --- a/web/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -index.html: index.txt mkhtml - ./mkhtml index.txt >_$@ && mv _$@ $@ - diff --git a/web/boot.pdf b/web/boot.pdf new file mode 100644 index 0000000000..9cb84f5065 Binary files /dev/null and b/web/boot.pdf differ diff --git a/web/disk.pdf b/web/disk.pdf new file mode 100644 index 0000000000..4df20b755d Binary files /dev/null and b/web/disk.pdf differ diff --git a/web/exec.pdf b/web/exec.pdf new file mode 100644 index 0000000000..1652a1411f Binary files /dev/null and b/web/exec.pdf differ diff --git a/web/fscall.pdf b/web/fscall.pdf new file mode 100644 index 0000000000..4b44eac17f Binary files /dev/null and b/web/fscall.pdf differ diff --git a/web/fsdata.pdf b/web/fsdata.pdf new file mode 100644 index 0000000000..2115b64eb5 Binary files /dev/null and b/web/fsdata.pdf differ diff --git a/web/index.html b/web/index.html index 5f60112387..d7bba91804 100644 --- a/web/index.html +++ b/web/index.html @@ -1,4 +1,3 @@ - Xv6, a simple Unix-like teaching operating system @@ -32,31 +31,36 @@ --> +

Xv6, a simple Unix-like teaching operating system

-

-Xv6 is a teaching operating system developed -in the summer of 2006 for MIT's operating systems course, -“6.828: Operating Systems Engineering.” -We used it for 6.828 in Fall 2006 and Fall 2007 -and are using it this semester (Fall 2008). -We hope that xv6 will be useful in other courses too. -This page collects resources to aid the use of xv6 -in other courses. + +

Introduction

+ +Xv6 is a teaching operating system developed in the summer of 2006 for +MIT's operating systems +course, 6.828: operating +systems Engineering. We hope that xv6 will be useful in other +courses too. This page collects resources to aid the use of xv6 in +other courses, including a commentary on the source code itself. + +

Status: The xv6 code is in pretty good shape, but +the commentary is rough.

History and Background

-For many years, MIT had no operating systems course. -In the fall of 2002, Frans Kaashoek, Josh Cates, and Emil Sit -created a new, experimental course (6.097) -to teach operating systems engineering. -In the course lectures, the class worked through Sixth Edition Unix (aka V6) -using John Lions's famous commentary. -In the lab assignments, students wrote most of an exokernel operating -system, eventually named Jos, for the Intel x86. -Exposing students to multiple systems–V6 and Jos–helped -develop a sense of the spectrum of operating system designs. -In the fall of 2003, the experimental 6.097 became the -official course 6.828; the course has been offered each fall since then. -

+ +

For many years, MIT had no operating systems course. In the fall +of 2002, Frans Kaashoek, Josh Cates, and Emil Sit created a new, +experimental course (6.097) to teach operating systems engineering. +In the course lectures, the class worked through Sixth +Edition Unix (aka V6) using John Lions's famous commentary. In +the lab assignments, students wrote most of an exokernel operating +system, eventually named Jos, for the Intel x86. Exposing students to +multiple systems–V6 and Jos–helped develop a sense of the +spectrum of operating system designs. In the fall of 2003, the +experimental 6.097 became the official course 6.828; the course has +been offered each fall since then. + +

V6 presented pedagogic challenges from the start. Students doubted the relevance of an obsolete 30-year-old operating system written in an obsolete programming language (pre-K&R C) @@ -76,13 +80,12 @@

History and Background

enabling/disabling interrupts) and helps relevance. Finally, writing a new system allowed us to write cleaner versions of the rougher parts of V6, like the scheduler and file system. -

-6.828 substituted xv6 for V6 in the fall of 2006. -Based on that experience, we cleaned up rough patches -of xv6 for the course in the fall of 2007. -Since then, xv6 has stabilized, so we are making it -available in the hopes that others will find it useful too. -

+

6.828 substituted xv6 for V6 in the fall of 2006. Based on +that experience, we cleaned up rough patches of xv6. Since then, xv6 +has stabilized, so we are making it available in the hopes that others +will find it useful too. + +

6.828 uses both xv6 and Jos. Courses taught at UCLA, NYU, Peking University, Stanford, Tsinghua, and University Texas (Austin) have used @@ -90,14 +93,16 @@

History and Background

xv6 without Jos, though we are not aware of any that have.

Xv6 sources

-The latest xv6 is xv6-rev3.tar.gz. + +The latest xv6 is xv6-rev5.tar.gz. We distribute the sources in electronic form but also as a printed booklet with line numbers that keep everyone together during lectures. The booklet is available as -xv6-rev3.pdf. +xv6-rev5.pdf. The xv6 source code is licensed under the traditional MIT license; see the LICENSE file in the source distribution. -

+ +

xv6 compiles using the GNU C compiler, targeted at the x86 using ELF binaries. On BSD and Linux systems, you can use the native compilers; @@ -106,239 +111,131 @@

Xv6 sources

Xv6 does boot on real hardware, but typically we run it using the Bochs emulator. Both the GCC cross compiler and Bochs -can be found on the 6.828 tools page. - -

Lectures

-In 6.828, the lectures in the first half of the course -introduce the PC hardware, the Intel x86, and then xv6. -The lectures in the second half consider advanced topics -using research papers; for some, xv6 serves as a useful -base for making discussions concrete. -This section describe a typical 6.828 lecture schedule, -linking to lecture notes and homework. -A course using only xv6 (not Jos) will need to adapt -a few of the lectures, but we hope these are a useful -starting point. - -

Lecture 1. Operating systems -

-The first lecture introduces both the general topic of -operating systems and the specific approach of 6.828. -After defining “operating system,” the lecture -examines the implementation of a Unix shell -to look at the details the traditional Unix system call interface. -This is relevant to both xv6 and Jos: in the final -Jos labs, students implement a Unix-like interface -and culminating in a Unix shell. -

-lecture notes -OS abstractions slides - -

Lecture 2. PC hardware and x86 programming -

-This lecture introduces the PC architecture, the 16- and 32-bit x86, -the stack, and the GCC x86 calling conventions. -It also introduces the pieces of a typical C tool chain–compiler, -assembler, linker, loader–and the Bochs emulator. -

-Reading: PC Assembly Language -

-Homework: familiarize with Bochs -

-lecture notes -x86 intro slides -homework - -

Lecture 3. Operating system organization -

-This lecture continues Lecture 1's discussion of what -an operating system does. -An operating system provides a “virtual computer” -interface to user space programs. -At a high level, the main job of the operating system -is to implement that interface -using the physical computer it runs on. -

-The lecture discusses four approaches to that job: -monolithic operating systems, microkernels, -virtual machines, and exokernels. -Exokernels might not be worth mentioning -except that the Jos labs are built around one. -

-Reading: Engler et al., Exokernel: An Operating System Architecture -for Application-Level Resource Management -

-lecture notes - -

Lecture 4. Address spaces using segmentation -

-This is the first lecture that uses xv6. -It introduces the idea of address spaces and the -details of the x86 segmentation hardware. -It makes the discussion concrete by reading the xv6 -source code and watching xv6 execute using the Bochs simulator. -

-Reading: x86 MMU handout, -xv6: bootasm.S, bootother.S, bootmain.c, main.c, init.c, and setupsegs in proc.c. -

-Homework: Bochs stack introduction -

-lecture notes -x86 virtual memory slides -homework - -

Lecture 5. Address spaces using page tables -

-This lecture continues the discussion of address spaces, -examining the other x86 virtual memory mechanism: page tables. -Xv6 does not use page tables, so there is no xv6 here. -Instead, the lecture uses Jos as a concrete example. -An xv6-only course might skip or shorten this discussion. -

-Reading: x86 manual excerpts -

-Homework: stuff about gdt -XXX not appropriate; should be in Lecture 4 -

-lecture notes - -

Lecture 6. Interrupts and exceptions -

-How does a user program invoke the operating system kernel? -How does the kernel return to the user program? -What happens when a hardware device needs attention? -This lecture explains the answer to these questions: -interrupt and exception handling. -

-It explains the x86 trap setup mechanisms and then -examines their use in xv6's SETGATE (mmu.h), -tvinit (trap.c), idtinit (trap.c), vectors.pl, and vectors.S. -

-It then traces through a call to the system call open: -init.c, usys.S, vector48 and alltraps (vectors.S), trap (trap.c), -syscall (syscall.c), -sys_open (sysfile.c), fetcharg, fetchint, argint, argptr, argstr (syscall.c), -

-The interrupt controller, briefly: -pic_init and pic_enable (picirq.c). -The timer and keyboard, briefly: -timer_init (timer.c), console_init (console.c). -Enabling and disabling of interrupts. -

-Reading: x86 manual excerpts, -xv6: trapasm.S, trap.c, syscall.c, and usys.S. -Skim lapic.c, ioapic.c, picirq.c. -

-Homework: Explain the 35 words on the top of the -stack at first invocation of syscall. -

-lecture notes -homework - -

Lecture 7. Multiprocessors and locking -

-This lecture introduces the problems of -coordination and synchronization on a -multiprocessor -and then the solution of mutual exclusion locks. -Atomic instructions, test-and-set locks, -lock granularity, (the mistake of) recursive locks. -

-Although xv6 user programs cannot share memory, -the xv6 kernel itself is a program with multiple threads -executing concurrently and sharing memory. -Illustration: the xv6 scheduler's proc_table_lock (proc.c) -and the spin lock implementation (spinlock.c). -

-Reading: xv6: spinlock.c. Skim mp.c. -

-Homework: Interaction between locking and interrupts. -Try not disabling interrupts in the disk driver and watch xv6 break. -

-lecture notes -homework - -

Lecture 8. Threads, processes and context switching -

-The last lecture introduced some of the issues -in writing threaded programs, using xv6's processes -as an example. -This lecture introduces the issues in implementing -threads, continuing to use xv6 as the example. -

-The lecture defines a thread of computation as a register -set and a stack. A process is an address space plus one -or more threads of computation sharing that address space. -Thus the xv6 kernel can be viewed as a single process -with many threads (each user process) executing concurrently. -

-Illustrations: thread switching (swtch.S), scheduler (proc.c), sys_fork (sysproc.c) -

-Reading: proc.c, swtch.S, sys_fork (sysproc.c) -

-Homework: trace through stack switching. -

-lecture notes (need to be updated to use swtch) -homework - -

Lecture 9. Processes and coordination -

-This lecture introduces the idea of sequence coordination -and then examines the particular solution illustrated by -sleep and wakeup (proc.c). -It introduces and refines a simple -producer/consumer queue to illustrate the -need for sleep and wakeup -and then the sleep and wakeup -implementations themselves. -

-Reading: proc.c, sys_exec, sys_sbrk, sys_wait, sys_exec, sys_kill (sysproc.c). -

-Homework: Explain how sleep and wakeup would break -without proc_table_lock. Explain how devices would break -without second lock argument to sleep. -

-lecture notes -homework - -

Lecture 10. Files and disk I/O -

-This is the first of three file system lectures. -This lecture introduces the basic file system interface -and then considers the on-disk layout of individual files -and the free block bitmap. -

-Reading: iread, iwrite, fileread, filewrite, wdir, mknod1, and - code related to these calls in fs.c, bio.c, ide.c, and file.c. -

-Homework: Add print to bwrite to trace every disk write. -Explain the disk writes caused by some simple shell commands. -

-lecture notes -homework - -

Lecture 11. Naming -

-The last lecture discussed on-disk file system representation. -This lecture covers the implementation of -file system paths (namei in fs.c) -and also discusses the security problems of a shared /tmp -and symbolic links. -

-Understanding exec (exec.c) is left as an exercise. -

-Reading: namei in fs.c, sysfile.c, file.c. -

-Homework: Explain how to implement symbolic links in xv6. -

-lecture notes -homework - -

Lecture 12. High-performance file systems -

-This lecture is the first of the research paper-based lectures. -It discusses the “soft updates” paper, -using xv6 as a concrete example. +can be found on the 6.828 tools page. + +

Xv6 lecture material

+ +In 6.828, the lectures in the first half of the course introduce the +PC hardware, the Intel x86, and then xv6. The lectures in the second +half consider advanced topics using research papers; for some, xv6 +serves as a useful base for making discussions concrete. The lecture +notes are available from the 6.828 schedule page, and the chapters of +the commentary are below. + +

Xv6 commentary (rough)

+ +

The chapters are rough drafts. + +

Introduction yet to be written.
+

    +
  • read with the code side by side +
  • code references look like (xxxx) or (xxxx-yyyy) in small text. +
  • this pdf is the one with matching line numbers. +
  • each chapter starts with an introduction to the topic, +spends most of the text on code, +and then wraps up talking about how xv6 +compares to real-world operating systems. +
+ +Chapter 0: Operating system interfaces +
+The Unix system call interface. (rev 4) +
+ +Chapter 1: Bootstrap +
+From power on to kernel start. (rev 4) +
+ +Chapter 2: Processes +
+Memory and process allocation, segments, the first user process. (rev 4) +
+ +Chapter 3: Traps +
+Low-level trap mechanism, trap handler, system call arguments, sbrk, fork. +
+ +Chapter 4: Locks +
+Locks and interrupts. +
+ +Chapter 5: Scheduling and coordination +
+Scheduling, sleep and wakeup, pipes, wait and exit. +
+ +Chapter 6: Buffer cache +
+Buffer cache and IDE disk driver. +
+ +Chapter 7: File system data +
+Block in use bitmap, block allocation, inode structure, inode contents, +directories, path names. +
+ +Chapter 8: File system calls +
+FIle descriptors, open, close, dup, read, write. +
+ +Chapter 9: Exec +
+Exec +
+ +Appendix A: Low-level C and inline assembly +
+Intro to C and inline assembly for people who only know Java (say). +Examples drawn entirely from xv6 source. +
+ +Appendix B: Additional drivers. +
+Keyboard, screen, probably MP hardware. +
+ + +

Unix Version 6

+ +

6.828's xv6 is inspired by Unix V6 and by: + +

+ +The following are useful to read the original code: +
    +
  • +The PDP11/40 Processor Handbook, Digital Equipment Corporation, 1972. +
      +
    • A PDF (made from scanned images, +and not text-searchable) +
    • A web-based +version that is indexed by instruction name. +
    + +

Feedback

If you are interested in using xv6 or have used xv6 in a course, @@ -346,13 +243,10 @@

Feedback

If there's anything that we can do to make xv6 easier to adopt, we'd like to hear about it. We'd also be interested to hear what worked well and what didn't. -

+

Russ Cox (rsc@swtch.com)
Frans Kaashoek (kaashoek@mit.edu)
Robert Morris (rtm@mit.edu) -

+

You can reach all of us at 6.828-staff@pdos.csail.mit.edu. -

-

- - + diff --git a/web/index.txt b/web/index.txt deleted file mode 100644 index 6f3e1f6de4..0000000000 --- a/web/index.txt +++ /dev/null @@ -1,339 +0,0 @@ -** Xv6, a simple Unix-like teaching operating system -Xv6 is a teaching operating system developed -in the summer of 2006 for MIT's operating systems course, -``6.828: Operating Systems Engineering.'' -We used it for 6.828 in Fall 2006 and Fall 2007 -and are using it this semester (Fall 2008). -We hope that xv6 will be useful in other courses too. -This page collects resources to aid the use of xv6 -in other courses. - -* History and Background - -For many years, MIT had no operating systems course. -In the fall of 2002, Frans Kaashoek, Josh Cates, and Emil Sit -created a new, experimental course (6.097) -to teach operating systems engineering. -In the course lectures, the class worked through Sixth Edition Unix (aka V6) -using John Lions's famous commentary. -In the lab assignments, students wrote most of an exokernel operating -system, eventually named Jos, for the Intel x86. -Exposing students to multiple systems--V6 and Jos--helped -develop a sense of the spectrum of operating system designs. -In the fall of 2003, the experimental 6.097 became the -official course 6.828; the course has been offered each fall since then. - -V6 presented pedagogic challenges from the start. -Students doubted the relevance of an obsolete 30-year-old operating system -written in an obsolete programming language (pre-K&R C) -running on obsolete hardware (the PDP-11). -Students also struggled to learn the low-level details of two different -architectures (the PDP-11 and the Intel x86) at the same time. -By the summer of 2006, we had decided to replace V6 -with a new operating system, xv6, modeled on V6 -but written in ANSI C and running on multiprocessor -Intel x86 machines. -Xv6's use of the x86 makes it more relevant to -students' experience than V6 was -and unifies the course around a single architecture. -Adding multiprocessor support requires handling concurrency head on with -locks and threads (instead of using special-case solutions for -uniprocessors such as -enabling/disabling interrupts) and helps relevance. -Finally, writing a new system allowed us to write cleaner versions -of the rougher parts of V6, like the scheduler and file system. - -6.828 substituted xv6 for V6 in the fall of 2006. -Based on that experience, we cleaned up rough patches -of xv6 for the course in the fall of 2007. -Since then, xv6 has stabilized, so we are making it -available in the hopes that others will find it useful too. - -6.828 uses both xv6 and Jos. -Courses taught at UCLA, NYU, Peking University, Stanford, Tsinghua, -and University Texas (Austin) have used -Jos without xv6; we believe other courses could use -xv6 without Jos, though we are not aware of any that have. - - -* Xv6 sources - -The latest xv6 is [xv6-rev2.tar.gz]. -We distribute the sources in electronic form but also as -a printed booklet with line numbers that keep everyone -together during lectures. The booklet is available as -[xv6-rev2.pdf]. - -xv6 compiles using the GNU C compiler, -targeted at the x86 using ELF binaries. -On BSD and Linux systems, you can use the native compilers; -On OS X, which doesn't use ELF binaries, -you must use a cross-compiler. -Xv6 does boot on real hardware, but typically -we run it using the Bochs emulator. -Both the GCC cross compiler and Bochs -can be found on the [../../2007/tools.html | 6.828 tools page]. - - -* Lectures - -In 6.828, the lectures in the first half of the course -introduce the PC hardware, the Intel x86, and then xv6. -The lectures in the second half consider advanced topics -using research papers; for some, xv6 serves as a useful -base for making discussions concrete. -This section describe a typical 6.828 lecture schedule, -linking to lecture notes and homework. -A course using only xv6 (not Jos) will need to adapt -a few of the lectures, but we hope these are a useful -starting point. - - -Lecture 1. Operating systems - -The first lecture introduces both the general topic of -operating systems and the specific approach of 6.828. -After defining ``operating system,'' the lecture -examines the implementation of a Unix shell -to look at the details the traditional Unix system call interface. -This is relevant to both xv6 and Jos: in the final -Jos labs, students implement a Unix-like interface -and culminating in a Unix shell. - -[l1.html | lecture notes] -[os-lab-1.pdf | OS abstractions slides] - - -Lecture 2. PC hardware and x86 programming - -This lecture introduces the PC architecture, the 16- and 32-bit x86, -the stack, and the GCC x86 calling conventions. -It also introduces the pieces of a typical C tool chain--compiler, -assembler, linker, loader--and the Bochs emulator. - -Reading: PC Assembly Language - -Homework: familiarize with Bochs - -[l2.html | lecture notes] -[os-lab-2.pdf | x86 intro slides] -[x86-intro.html | homework] - - -Lecture 3. Operating system organization - -This lecture continues Lecture 1's discussion of what -an operating system does. -An operating system provides a ``virtual computer'' -interface to user space programs. -At a high level, the main job of the operating system -is to implement that interface -using the physical computer it runs on. - -The lecture discusses four approaches to that job: -monolithic operating systems, microkernels, -virtual machines, and exokernels. -Exokernels might not be worth mentioning -except that the Jos labs are built around one. - -Reading: Engler et al., Exokernel: An Operating System Architecture -for Application-Level Resource Management - -[l3.html | lecture notes] - - -Lecture 4. Address spaces using segmentation - -This is the first lecture that uses xv6. -It introduces the idea of address spaces and the -details of the x86 segmentation hardware. -It makes the discussion concrete by reading the xv6 -source code and watching xv6 execute using the Bochs simulator. - -Reading: x86 MMU handout, -xv6: bootasm.S, bootother.S, bootmain.c, main.c, init.c, and setupsegs in proc.c. - -Homework: Bochs stack introduction - -[l4.html | lecture notes] -[os-lab-3.pdf | x86 virtual memory slides] -[xv6-intro.html | homework] - - -Lecture 5. Address spaces using page tables - -This lecture continues the discussion of address spaces, -examining the other x86 virtual memory mechanism: page tables. -Xv6 does not use page tables, so there is no xv6 here. -Instead, the lecture uses Jos as a concrete example. -An xv6-only course might skip or shorten this discussion. - -Reading: x86 manual excerpts - -Homework: stuff about gdt -XXX not appropriate; should be in Lecture 4 - -[l5.html | lecture notes] - - -Lecture 6. Interrupts and exceptions - -How does a user program invoke the operating system kernel? -How does the kernel return to the user program? -What happens when a hardware device needs attention? -This lecture explains the answer to these questions: -interrupt and exception handling. - -It explains the x86 trap setup mechanisms and then -examines their use in xv6's SETGATE (mmu.h), -tvinit (trap.c), idtinit (trap.c), vectors.pl, and vectors.S. - -It then traces through a call to the system call open: -init.c, usys.S, vector48 and alltraps (vectors.S), trap (trap.c), -syscall (syscall.c), -sys_open (sysfile.c), fetcharg, fetchint, argint, argptr, argstr (syscall.c), - -The interrupt controller, briefly: -pic_init and pic_enable (picirq.c). -The timer and keyboard, briefly: -timer_init (timer.c), console_init (console.c). -Enabling and disabling of interrupts. - -Reading: x86 manual excerpts, -xv6: trapasm.S, trap.c, syscall.c, and usys.S. -Skim lapic.c, ioapic.c, picirq.c. - -Homework: Explain the 35 words on the top of the -stack at first invocation of syscall. - -[l-interrupt.html | lecture notes] -[x86-intr.html | homework] - - -Lecture 7. Multiprocessors and locking - -This lecture introduces the problems of -coordination and synchronization on a -multiprocessor -and then the solution of mutual exclusion locks. -Atomic instructions, test-and-set locks, -lock granularity, (the mistake of) recursive locks. - -Although xv6 user programs cannot share memory, -the xv6 kernel itself is a program with multiple threads -executing concurrently and sharing memory. -Illustration: the xv6 scheduler's proc_table_lock (proc.c) -and the spin lock implementation (spinlock.c). - -Reading: xv6: spinlock.c. Skim mp.c. - -Homework: Interaction between locking and interrupts. -Try not disabling interrupts in the disk driver and watch xv6 break. - -[l-lock.html | lecture notes] -[xv6-lock.html | homework] - - -Lecture 8. Threads, processes and context switching - -The last lecture introduced some of the issues -in writing threaded programs, using xv6's processes -as an example. -This lecture introduces the issues in implementing -threads, continuing to use xv6 as the example. - -The lecture defines a thread of computation as a register -set and a stack. A process is an address space plus one -or more threads of computation sharing that address space. -Thus the xv6 kernel can be viewed as a single process -with many threads (each user process) executing concurrently. - -Illustrations: thread switching (swtch.S), scheduler (proc.c), sys_fork (sysproc.c) - -Reading: proc.c, swtch.S, sys_fork (sysproc.c) - -Homework: trace through stack switching. - -[l-threads.html | lecture notes (need to be updated to use swtch)] -[xv6-sched.html | homework] - - -Lecture 9. Processes and coordination - -This lecture introduces the idea of sequence coordination -and then examines the particular solution illustrated by -sleep and wakeup (proc.c). -It introduces and refines a simple -producer/consumer queue to illustrate the -need for sleep and wakeup -and then the sleep and wakeup -implementations themselves. - -Reading: proc.c, sys_exec, sys_sbrk, sys_wait, sys_exec, sys_kill (sysproc.c). - -Homework: Explain how sleep and wakeup would break -without proc_table_lock. Explain how devices would break -without second lock argument to sleep. - -[l-coordination.html | lecture notes] -[xv6-sleep.html | homework] - - -Lecture 10. Files and disk I/O - -This is the first of three file system lectures. -This lecture introduces the basic file system interface -and then considers the on-disk layout of individual files -and the free block bitmap. - -Reading: iread, iwrite, fileread, filewrite, wdir, mknod1, and - code related to these calls in fs.c, bio.c, ide.c, and file.c. - -Homework: Add print to bwrite to trace every disk write. -Explain the disk writes caused by some simple shell commands. - -[l-fs.html | lecture notes] -[xv6-disk.html | homework] - - -Lecture 11. Naming - -The last lecture discussed on-disk file system representation. -This lecture covers the implementation of -file system paths (namei in fs.c) -and also discusses the security problems of a shared /tmp -and symbolic links. - -Understanding exec (exec.c) is left as an exercise. - -Reading: namei in fs.c, sysfile.c, file.c. - -Homework: Explain how to implement symbolic links in xv6. - -[l-name.html | lecture notes] -[xv6-names.html | homework] - - -Lecture 12. High-performance file systems - -This lecture is the first of the research paper-based lectures. -It discusses the ``soft updates'' paper, -using xv6 as a concrete example. - - -* Feedback - -If you are interested in using xv6 or have used xv6 in a course, -we would love to hear from you. -If there's anything that we can do to make xv6 easier -to adopt, we'd like to hear about it. -We'd also be interested to hear what worked well and what didn't. - -Russ Cox (rsc@swtch.com)
-Frans Kaashoek (kaashoek@mit.edu)
-Robert Morris (rtm@mit.edu) - -You can reach all of us at 6.828-staff@pdos.csail.mit.edu. - -xv6 and lecture notes are copyright © 2006-present by Russ Cox, -Frans Kaashoek, and Robert Morris. diff --git a/web/lock.pdf b/web/lock.pdf new file mode 100644 index 0000000000..b6574a480a Binary files /dev/null and b/web/lock.pdf differ diff --git a/web/mem.pdf b/web/mem.pdf new file mode 100644 index 0000000000..1fe14f0b59 Binary files /dev/null and b/web/mem.pdf differ diff --git a/web/mkhtml b/web/mkhtml deleted file mode 100755 index 74987e6a5a..0000000000 --- a/web/mkhtml +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/perl - -my @lines = <>; -my $text = join('', @lines); -my $title; -if($text =~ /^\*\* (.*?)\n/m){ - $title = $1; - $text = $` . $'; -}else{ - $title = "Untitled"; -} - -$text =~ s/[ \t]+$//mg; -$text =~ s/^$/

/mg; -$text =~ s!\b([a-z0-9]+\.(c|s|pl|h))\b!$1!g; -$text =~ s!^(Lecture [0-9]+\. .*?)$!$1!mg; -$text =~ s!^\* (.*?)$!

$1

!mg; -$text =~ s!((
)+\n)+

!\n

!g; -$text =~ s!

\n?((
)+\n)+!\n!g; -$text =~ s!((
)+\n)+!\n

!g; -$text =~ s!\b\s*--\s*\b!\–!g; -$text =~ s!\[([^\[\]|]+) \| ([^\[\]]+)\]!$2!g; -$text =~ s!\[([^ \t]+)\]!$1!g; - -$text =~ s!``!\“!g; -$text =~ s!''!\”!g; - -print < - - -$title - - - -

$title

-

-EOF -print $text; -print < - -EOF diff --git a/web/sched.pdf b/web/sched.pdf new file mode 100644 index 0000000000..4255d2fb82 Binary files /dev/null and b/web/sched.pdf differ diff --git a/web/trap.pdf b/web/trap.pdf new file mode 100644 index 0000000000..d7910c6f0d Binary files /dev/null and b/web/trap.pdf differ diff --git a/web/unix.pdf b/web/unix.pdf new file mode 100644 index 0000000000..4fd7972542 Binary files /dev/null and b/web/unix.pdf differ diff --git a/x86.h b/x86.h index b9fa8b8ac4..5a59cc2043 100644 --- a/x86.h +++ b/x86.h @@ -90,23 +90,26 @@ readeflags(void) return eflags; } +static inline void +loadgs(ushort v) +{ + asm volatile("movw %0, %%gs" : : "r" (v)); +} + static inline uint -xchg(volatile uint *addr, uint newval) +rebp(void) { - uint result; - - // The + in "+m" denotes a read-modify-write operand. - asm volatile("lock; xchgl %0, %1" : - "+m" (*addr), "=a" (result) : - "1" (newval) : - "cc"); - return result; + uint val; + asm volatile("movl %%ebp,%0" : "=r" (val)); + return val; } -static inline void -loadgs(ushort v) +static inline uint +resp(void) { - asm volatile("movw %0, %%gs" : : "r" (v)); + uint val; + asm volatile("movl %%esp,%0" : "=r" (val)); + return val; } static inline void @@ -121,66 +124,56 @@ sti(void) asm volatile("sti"); } -static inline void lcr0(uint val) +static inline uint +xchg(volatile uint *addr, uint newval) +{ + uint result; + + // The + in "+m" denotes a read-modify-write operand. + asm volatile("lock; xchgl %0, %1" : + "+m" (*addr), "=a" (result) : + "1" (newval) : + "cc"); + return result; +} + +//PAGEBREAK! +static inline void +lcr0(uint val) { asm volatile("movl %0,%%cr0" : : "r" (val)); } -static inline uint rcr0(void) +static inline uint +rcr0(void) { uint val; asm volatile("movl %%cr0,%0" : "=r" (val)); return val; } -static inline uint rcr2(void) +static inline uint +rcr2(void) { uint val; asm volatile("movl %%cr2,%0" : "=r" (val)); return val; } -static inline void lcr3(uint val) +static inline void +lcr3(uint val) { asm volatile("movl %0,%%cr3" : : "r" (val)); } -static inline uint rcr3(void) +static inline uint +rcr3(void) { uint val; asm volatile("movl %%cr3,%0" : "=r" (val)); return val; } -static inline void lebp(uint val) -{ - asm volatile("movl %0,%%ebp" : : "r" (val)); -} - -static inline uint rebp(void) -{ - uint val; - asm volatile("movl %%ebp,%0" : "=r" (val)); - return val; -} - -static inline void lesp(uint val) -{ - asm volatile("movl %0,%%esp" : : "r" (val)); -} - -static inline uint resp(void) -{ - uint val; - asm volatile("movl %%esp,%0" : "=r" (val)); - return val; -} - -static inline void nop_pause(void) -{ - asm volatile("pause" : :); -} - //PAGEBREAK: 36 // Layout of the trap frame built on the stack by the // hardware and by trapasm.S, and passed to trap(). diff --git a/xv6-rev4.tar.gz b/xv6-rev4.tar.gz new file mode 100644 index 0000000000..adb3acc5ff Binary files /dev/null and b/xv6-rev4.tar.gz differ diff --git a/xv6-rev5.pdf b/xv6-rev5.pdf new file mode 100644 index 0000000000..3ea90777bc Binary files /dev/null and b/xv6-rev5.pdf differ diff --git a/xv6-rev5.tar.gz b/xv6-rev5.tar.gz new file mode 100644 index 0000000000..c73fc21932 Binary files /dev/null and b/xv6-rev5.tar.gz differ