Page granularity allocators, basic printk, banner print
authorJack Miller <jack@codezen.org>
Fri, 4 Mar 2016 21:41:08 +0000 (15:41 -0600)
committerJack Miller <jack@codezen.org>
Sat, 19 Mar 2016 19:02:48 +0000 (14:02 -0500)
20 files changed:
Makefile
asm/gdt.asm
asm/head.asm
asm/idt.asm
asm/misc.asm [new file with mode: 0644]
include/asm/misc.h [new file with mode: 0644]
include/early.h [deleted file]
include/errno.h [new file with mode: 0644]
include/grub.h
include/kernel.h
include/map.h [new file with mode: 0644]
include/memmap.h [new file with mode: 0644]
include/page_alloc.h [new file with mode: 0644]
include/types.h [new file with mode: 0644]
include/vmalloc.h [new file with mode: 0644]
kernel/main.c
linker.ld
mm/map.c [new file with mode: 0644]
mm/page_alloc.c [new file with mode: 0644]
mm/vmalloc.c [new file with mode: 0644]

index ee9e102..06da764 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -3,14 +3,19 @@ LD = ld
 NASM=nasm
 CPP=cpp
 
+MAJOR=0
+MINOR=1
+
 INCLUDE_DIRS = include/
 
-CPPFLAGS = -I ${INCLUDE_DIRS}
-CFLAGS = -c -std=gnu99 -pedantic -Wall -nostdlib -ffreestanding -mcmodel=kernel -g -I ${INCLUDE_DIRS}
+GIT_REV=$(shell git describe --always)
+
+CPPFLAGS = -I ${INCLUDE_DIRS} -D__GIT_VERSION__='"$(GIT_REV)"' -D__MAJOR_VERSION__=$(MAJOR) -D__MINOR_VERSION__=$(MINOR)
+CFLAGS = ${CPPFLAGS} -c -std=gnu99 -pedantic -Wall -nostdlib -ffreestanding -mcmodel=kernel -g
 LDFLAGS = -T linker.ld -nostdlib -n
 NASMFLAGS = -f elf64
 
-SRC_C = $(wildcard kernel/*.c)
+SRC_C = $(wildcard boot/*.c kernel/*.c mm/*.c)
 OBJ_C = ${SRC_C:.c=.o}
 
 SRC_ASM = $(wildcard asm/*.asm)
@@ -27,7 +32,7 @@ all: viridis
 
 %.o : %.asm
        @echo NASM $<
-       @${CPP} ${CPPFLAGS} $< > $@.tmp
+       @${CPP} ${CPPFLAGS} -D__ASSEMBLY__ $< > $@.tmp
        @${NASM} ${NASMFLAGS} $@.tmp -o $@
        @rm $@.tmp
 
index a8052cb..ace5cb5 100644 (file)
@@ -1,7 +1,7 @@
 BITS 32
 SECTION .text_early
 
-#include <early.h>
+#include <memmap.h>
 
 /* AMD64 Architecture Programmer's Manual v2 4.8
  *
index 2253003..5c2c787 100644 (file)
@@ -1,6 +1,6 @@
 BITS 32
 
-#include <early.h>
+#include <memmap.h>
 
 /* main.c */
 EXTERN main
@@ -153,7 +153,7 @@ zero_page_mem:
      * now let's set them up.
      */
 
-    /* First, map the PML4 into itself (see early.h) */
+    /* First, map the PML4 into itself (see memmap.h) */
 
     /* eax = address of PML4[510]*/
     mov eax, ebx
@@ -369,8 +369,6 @@ cleanup_64:
     mov rax, cr3
     mov cr3, rax
 
-    sti
-
     /* Move to rax first to avoid linker relocation truncation. */
     mov rax, main
 
index 8528cb3..4c7d72c 100644 (file)
@@ -1,7 +1,7 @@
 BITS 32
 SECTION .text_early
 
-#include <early.h>
+#include <memmap.h>
 
 /* gdt.asm */
 EXTERN CODE_SEL_64
@@ -19,21 +19,6 @@ EXTERN CODE_SEL_64
 
 #define IDT_ENTRIES     256
 
-/* Generate 256 interrupt routines. */
-
-/* 1 = ISR # */
-
-%macro ISR 1
-isr%1:
-    jmp $
-%endmacro
-
-ISRS:
-%assign i 0
-%rep IDT_ENTRIES
-ISR i
-%assign i (i+1)
-%endrep
 
 #define ISR_SIZE (isr1 - isr0)
 
@@ -109,6 +94,23 @@ idt_init_one:
     ret
 
 BITS 64
+
+/* Generate 256 interrupt routines. */
+
+/* 1 = ISR # */
+
+%macro ISR 1
+isr%1:
+    iretq
+%endmacro
+
+ISRS:
+%assign i 0
+%rep IDT_ENTRIES
+ISR i
+%assign i (i+1)
+%endrep
+
 GLOBAL fixup_idtr
 fixup_idtr:
     mov rax, VIRT_BASE + IDTR + 2
diff --git a/asm/misc.asm b/asm/misc.asm
new file mode 100644 (file)
index 0000000..0de0fca
--- /dev/null
@@ -0,0 +1,8 @@
+GLOBAL reset_cr3
+
+/* Reset CR3 to itself to force page update */
+
+reset_cr3:
+    mov rax, cr3
+    mov cr3, rax
+    ret
diff --git a/include/asm/misc.h b/include/asm/misc.h
new file mode 100644 (file)
index 0000000..f058a30
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef ASM_MISC_H
+#define ASM_MISC_H
+
+extern void reset_cr3(void);
+
+#endif
diff --git a/include/early.h b/include/early.h
deleted file mode 100644 (file)
index a1fcbb5..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-#ifndef EARLY_H
-#define EARLY_H
-
-#define VIRT_BASE 0xFFFFFFFF80000000
-
-/* 4k page size */
-#define PAGE_SHIFT      12
-#define PAGE_SIZE       (1 << PAGE_SHIFT)
-#define PAGE_MASK      (PAGE_SIZE - 1)
-
-#define PTE_SHIFT      (PAGE_SHIFT + 9*0)
-#define PDE_SHIFT      (PAGE_SHIFT + 9*1)
-#define PDPE_SHIFT     (PAGE_SHIFT + 9*2)
-#define PML4E_SHIFT    (PAGE_SHIFT + 9*3)
-
-/* Find index based on virtual address */
-#define PTE(x)         (((x) >> PTE_SHIFT) & 0x1FF)
-#define PDE(x)         (((x) >> PDE_SHIFT) & 0x1FF)
-#define PDPE(x)                (((x) >> PDPE_SHIFT) & 0x1FF)
-#define PML4E(x)       (((x) >> PML4E_SHIFT) & 0x1FF)
-
-/* Find page structure based on virtual address */
-
-/* Because we mapped the PML4 page into itself as the second to last entry, and
- * the structure of the PML4/PDP/PD/PTs are all compatible, we can then use the
- * bits in the virtual address to locate its relevant page structures.
- *
- * With the standard 4-level page tables, the "leaves" are data pages.
- *
- * With the PML4 mapped into itself at 0x1fe, if you have an address with PML4E
- * = 0x1fe, then the PML4 becomes the PDP, the PDP becomes the PD, the PD
- * becomes the PT and PTs are now the leaves and accessible.
- *
- * If you create an address with PML4E = 0x1fe and PDP = 0x1fe, then the PDs
- * are the leaves and accessible.
- *
- * So if we want to access, for example, the page directory for a certain
- * address, we can set PML4E = 0x1fe and PDP = 0x1fe... but what are the rest
- * of the bits? The look ups that the MMU already does! The PML4E in the
- * address becomes the PDE index and the PDPE in the address becomes the PTE
- * index.
- */
-
-/* The BASE definitions are merely handling setting the entries we know need to
- * be 0x1FE for each form of lookup. P_VIRT_BASE is just the sign extension we
- * know must be present for a valid address.
- *
- * As we layed out above, to get the PTs to be the leaf nodes (accessible
- * data), just the PML4E has to be 0x1fe, and so on
- */
-
-#define P_VIRT_BASE    (0xFFFF000000000000)
-#define PT_VIRT_BASE   (P_VIRT_BASE | ((long) 0x1FE << PML4E_SHIFT))
-#define PD_VIRT_BASE   (PT_VIRT_BASE | ((long) 0x1FE << PDPE_SHIFT))
-#define PDP_VIRT_BASE  (PD_VIRT_BASE | ((long) 0x1FE << PDE_SHIFT))
-#define PML4_VIRT_BASE (PDP_VIRT_BASE | ((long) 0x1FE << PTE_SHIFT))
-
-/*
- * Derive addresses of these entries. This macro is complicated so let's break
- * it down.
- *
- * Let's say I'm trying to find PTE address for 0x12345678, which in binary is:
- *
- * 00 | 01 0010 001 | 1 0100 0101 | 0110 0111 1000
- *    | PDE         | PTE         | PAGE OFFSET
- *
- * There's no sign extension, but nonetheless (x & ~P_VIRT_BASE) doesn't hurt.
- * Then we shift it down by 9, we get
- *
- * 0000 0 | 000 0000 00 | 01 0010 001 | 1 0100 0101 011
- *        | PDE         | PTE         | PAGE OFFSET
- *
- * As you can see, the new PTE is the old PDE, the new PDE is the old PDP (all
- * zeroes) and so forth, but the top three bits of the original page offset are
- * now the bottom three bits of it, and are irrelevant, so we zero them with an
- * (& ~7).
- *
- * 0000 0 | 000 0000 00 | 01 0010 001 | 1 0100 0101 000
- *        | PDE         | PTE         | PAGE OFFSET
- *
- * Now that we've properly shifted all of our lookups, and discarded the
- * unnecessary page offset and sign extension bits, we can and in the BASE,
- * which as explained above is a constructed value where all of the unset
- * lookups are set to 0x1fe. In this case, our PML4E which is now 0 thanks to
- * shifting, needs to be set to 0x1fe, which is exactly what PT_VIRT_BASE
- * includes, along with the sign extension required due to the top bit of the
- * PML4E being 1
- */
-
-#define PTE_ADDR(x)      (PT_VIRT_BASE  | (((x & ~P_VIRT_BASE) >> 9)  & ~7))
-#define PDE_ADDR(x)      (PD_VIRT_BASE  | (((x & ~P_VIRT_BASE) >> 18) & ~7))
-#define PDPE_ADDR(x)     (PDP_VIRT_BASE  | (((x & ~P_VIRT_BASE) >> 27) & ~7))
-#define PML4E_ADDR(x)    (PML4_VIRT_BASE | (((x & ~P_VIRT_BASE) >> 36) & ~7))
-
-#define PF_P                (1 << 0)
-#define PF_RW               (1 << 1)
-#define PF_USER             (1 << 2)
-#define PF_WRITETHRU        (1 << 3)
-#define PF_DISABLE_CACHE    (1 << 4)
-
-/* Early physical addresses */
-#define KERNEL_START 0x100000
-
-#define S_PAGES                        2       // 8k stack, for now
-#define STACK_PAGES_PHYS       0       // Starting at 0
-#define STACK_PAGES_START (KERNEL_START - (S_PAGES * PAGE_SIZE))
-
-
-#endif
diff --git a/include/errno.h b/include/errno.h
new file mode 100644 (file)
index 0000000..ac96fe1
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef ERRNO_H
+#define ERRNO_H
+
+/* Oh no copyright infringment. Come at me, SCO. */
+
+/* A small subset of POSIX errnos */
+
+#define ENOENT 2
+#define E2BIG  7
+#define EINVAL 11
+#define ENOMEM 12
+#define EACCES 13
+
+#endif
index c5c5e47..3796add 100644 (file)
@@ -1,17 +1,96 @@
 #ifndef GRUB_H
 #define GRUB_H
 
+#include <types.h>
+
 struct grub_signature {
-       unsigned int magic;
-       unsigned int flags;
-       unsigned int checksum;
+       u32 magic;
+       u32 flags;
+       u32 checksum;
 };
 
 #define GRUB_MAGIC 0x1BADB002
-#define GRUB_FLAGS 0x0
+
+#define GRUB_FLAGS_REQ_MMAP  (1 << 1)
+
+#define GRUB_FLAGS (GRUB_FLAGS_REQ_MMAP)
 #define GRUB_CHECKSUM (-1 * (GRUB_MAGIC + GRUB_FLAGS))
 
-struct grub_signature gs __attribute__ ((section (".grub_sig"))) =
-       { GRUB_MAGIC, GRUB_FLAGS, GRUB_CHECKSUM };
+/* Multiboot Specification 3.3
+ *
+ *           +-------------------+
+ *   0       | flags             |    (required)
+ *           +-------------------+
+ *   4       | mem_lower         |    (present if flags[0] is set)
+ *   8       | mem_upper         |    (present if flags[0] is set)
+ *           +-------------------+
+ *   12      | boot_device       |    (present if flags[1] is set)
+ *           +-------------------+
+ *   16      | cmdline           |    (present if flags[2] is set)
+ *           +-------------------+
+ *   20      | mods_count        |    (present if flags[3] is set)
+ *   24      | mods_addr         |    (present if flags[3] is set)
+ *           +-------------------+
+ *   28 - 40 | syms              |    (present if flags[4] or
+ *           |                   |                flags[5] is set)
+ *           +-------------------+
+ *   44      | mmap_length       |    (present if flags[6] is set)
+ *   48      | mmap_addr         |    (present if flags[6] is set)
+ *           +-------------------+
+ *   52      | drives_length     |    (present if flags[7] is set)
+ *   56      | drives_addr       |    (present if flags[7] is set)
+ *           +-------------------+
+ *   60      | config_table      |    (present if flags[8] is set)
+ *           +-------------------+
+ *   64      | boot_loader_name  |    (present if flags[9] is set)
+ *           +-------------------+
+ *   68      | apm_table         |    (present if flags[10] is set)
+ *           +-------------------+
+ *   72      | vbe_control_info  |    (present if flags[11] is set)
+ *   76      | vbe_mode_info     |
+ *   80      | vbe_mode          |
+ *   82      | vbe_u32erface_seg |
+ *   84      | vbe_u32erface_off |
+ *   86      | vbe_u32erface_len |
+ *           +-------------------+
+ */
+
+#define GF_MMAP_PRESENT        (1 << 6)
+
+/* We don't use pointers here because the addresses are going to be 32-bit
+ * physical so casting is our best option.
+ */
+
+struct grub_info {
+       u32 flags;
+       u32 mem_lower;
+       u32 mem_upper;
+       u32 boot_device;
+       u32 cmdline;
+       u32 mods_count;
+       u32 mods_addr;
+       u32 syms[4];
+       u32 mmap_length;
+       u32 mmap_addr;
+       u32 drives_length;
+       u32 drives_addr;
+       u32 config_table;
+       u32 boot_loader_name;
+       u32 apm_table;
+       u32 vbe_control_info;
+       u32 vbe_mode_info;
+       u16 vbe_mode;
+       u16 vbe_interface_seg;
+       u16 vbe_interface_off;
+       u16 vbe_interface_len;
+} __attribute__ ((packed));
+
+struct grub_mmap {
+       u32 size;
+       u64 base;
+       u64 length;
+       u32 type;
+} __attribute__ ((packed));
+
 
 #endif
index 70ee2c1..9cbbb7d 100644 (file)
@@ -1,6 +1,7 @@
 #ifndef KERNEL_H
 #define KERNEL_H
 
-#include <early.h>
+#include <memmap.h>
+#include <types.h>
 
 #endif
diff --git a/include/map.h b/include/map.h
new file mode 100644 (file)
index 0000000..2bf0ce4
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef MAP_H
+#define MAP_H
+
+#include <types.h>
+
+int map_page(u64 virtual, u64 physical);
+void *map_page_early(u64 virtual, u64 physical, u64 *alloc_base);
+
+#endif
diff --git a/include/memmap.h b/include/memmap.h
new file mode 100644 (file)
index 0000000..1cd93f1
--- /dev/null
@@ -0,0 +1,137 @@
+/* vim: set ts=4 sw=4 sts=4 noet : */
+#ifndef MEMMAP_H
+#define MEMMAP_H
+
+
+/* 4k page size */
+#define PAGE_SHIFT      12
+#define PAGE_SIZE       (1 << PAGE_SHIFT)
+#define PAGE_MASK      (PAGE_SIZE - 1)
+
+#define PAGE_ALIGN(x)          ((x) & ~PAGE_MASK)
+#define PAGE_ALIGN_OFF(x)      ((x) & PAGE_MASK)
+#define PAGE_ALIGNED(x)                (PAGE_ALIGN_OFF(x) == 0)
+#define PAGE_ALIGN_UP(x)       (PAGE_ALIGN((x) + PAGE_MASK))
+
+#define PTE_SHIFT      (PAGE_SHIFT + 9*0)
+#define PDE_SHIFT      (PAGE_SHIFT + 9*1)
+#define PDPE_SHIFT     (PAGE_SHIFT + 9*2)
+#define PML4E_SHIFT    (PAGE_SHIFT + 9*3)
+
+/* Find index based on virtual address */
+#define PTE(x)         (((x) >> PTE_SHIFT) & 0x1FF)
+#define PDE(x)         (((x) >> PDE_SHIFT) & 0x1FF)
+#define PDPE(x)                (((x) >> PDPE_SHIFT) & 0x1FF)
+#define PML4E(x)       (((x) >> PML4E_SHIFT) & 0x1FF)
+
+/* Find page structure based on virtual address */
+
+/* Because we mapped the PML4 page into itself as the second to last entry, and
+ * the structure of the PML4/PDP/PD/PTs are all compatible, we can then use the
+ * bits in the virtual address to locate its relevant page structures.
+ *
+ * With the standard 4-level page tables, the "leaves" are data pages.
+ *
+ * With the PML4 mapped into itself at 0x1fe, if you have an address with PML4E
+ * = 0x1fe, then the PML4 becomes the PDP, the PDP becomes the PD, the PD
+ * becomes the PT and PTs are now the leaves and accessible.
+ *
+ * If you create an address with PML4E = 0x1fe and PDP = 0x1fe, then the PDs
+ * are the leaves and accessible.
+ *
+ * So if we want to access, for example, the page directory for a certain
+ * address, we can set PML4E = 0x1fe and PDP = 0x1fe... but what are the rest
+ * of the bits? The look ups that the MMU already does! The PML4E in the
+ * address becomes the PDE index and the PDPE in the address becomes the PTE
+ * index.
+ */
+
+/* The BASE definitions are merely handling setting the entries we know need to
+ * be 0x1FE for each form of lookup. P_VIRT_BASE is just the sign extension we
+ * know must be present for a valid address.
+ *
+ * As we layed out above, to get the PTs to be the leaf nodes (accessible
+ * data), just the PML4E has to be 0x1fe, and so on
+ */
+
+#define P_VIRT_BASE    (0xFFFF000000000000)
+#define MAX_VIRT_ADDRESS (0x0000FFFFFFFFFFFF)
+#define PT_VIRT_BASE   (P_VIRT_BASE | ((long) 0x1FE << PML4E_SHIFT))
+#define PD_VIRT_BASE   (PT_VIRT_BASE | ((long) 0x1FE << PDPE_SHIFT))
+#define PDP_VIRT_BASE  (PD_VIRT_BASE | ((long) 0x1FE << PDE_SHIFT))
+#define PML4_VIRT_BASE (PDP_VIRT_BASE | ((long) 0x1FE << PTE_SHIFT))
+
+/*
+ * Derive addresses of these entries. This macro is complicated so let's break
+ * it down.
+ *
+ * Let's say I'm trying to find PTE address for 0x12345678, which in binary is:
+ *
+ * 00 | 01 0010 001 | 1 0100 0101 | 0110 0111 1000
+ *    | PDE         | PTE         | PAGE OFFSET
+ *
+ * There's no sign extension, but nonetheless (x & ~P_VIRT_BASE) doesn't hurt.
+ * Then we shift it down by 9, we get
+ *
+ * 0000 0 | 000 0000 00 | 01 0010 001 | 1 0100 0101 011
+ *        | PDE         | PTE         | PAGE OFFSET
+ *
+ * As you can see, the new PTE is the old PDE, the new PDE is the old PDP (all
+ * zeroes) and so forth, but the top three bits of the original page offset are
+ * now the bottom three bits of it, and are irrelevant, so we zero them with an
+ * (& ~7).
+ *
+ * 0000 0 | 000 0000 00 | 01 0010 001 | 1 0100 0101 000
+ *        | PDE         | PTE         | PAGE OFFSET
+ *
+ * Now that we've properly shifted all of our lookups, and discarded the
+ * unnecessary page offset and sign extension bits, we can and in the BASE,
+ * which as explained above is a constructed value where all of the unset
+ * lookups are set to 0x1fe. In this case, our PML4E which is now 0 thanks to
+ * shifting, needs to be set to 0x1fe, which is exactly what PT_VIRT_BASE
+ * includes, along with the sign extension required due to the top bit of the
+ * PML4E being 1
+ */
+
+#define PTE_ADDR(x)      (PT_VIRT_BASE  | (((x & ~P_VIRT_BASE) >> 9)  & ~7))
+#define PDE_ADDR(x)      (PD_VIRT_BASE  | (((x & ~P_VIRT_BASE) >> 18) & ~7))
+#define PDPE_ADDR(x)     (PDP_VIRT_BASE  | (((x & ~P_VIRT_BASE) >> 27) & ~7))
+#define PML4E_ADDR(x)    (PML4_VIRT_BASE | (((x & ~P_VIRT_BASE) >> 36) & ~7))
+
+#define PF_P                (1 << 0)
+#define PF_RW               (1 << 1)
+#define PF_USER             (1 << 2)
+#define PF_WRITETHRU        (1 << 3)
+#define PF_DISABLE_CACHE    (1 << 4)
+
+/* Early physical addresses */
+#define KERNEL_START 0x100000
+
+#define S_PAGES                        2       // 8k stack, for now
+#define STACK_PAGES_PHYS       0       // Starting at 0
+#define STACK_PAGES_START      (KERNEL_START - (S_PAGES * PAGE_SIZE))
+
+#define VIRT_BASE              0xFFFFFFFF80000000
+#define KERNEL_VIRT            (VIRT_BASE | KERNEL_START)
+
+#define VGA_MEM_VIRTUAL                        0xFFFFFFFC00000000
+#define VMALLOC_HEAP_VIRTUAL   0xFFFFFFFD00000000
+#define PAGE_HEAP_VIRTUAL              0xFFFFFFFE00000000
+
+#ifndef __ASSEMBLY__
+
+#include <types.h>
+
+static inline void __clear_page(u64 *addr)
+{
+       int i;
+
+       addr = (u64 *) ((u64) addr & ~PAGE_MASK);
+
+       for (i = 0; i < (PAGE_SIZE / sizeof(u64)); i++)
+               addr[i] = 0;
+}
+
+#endif
+
+#endif
diff --git a/include/page_alloc.h b/include/page_alloc.h
new file mode 100644 (file)
index 0000000..fdba337
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef PAGE_ALLOC_H
+#define PAGE_ALLOC_H
+
+#include <grub.h>
+#include <types.h>
+
+u32 page_alloc_init (struct grub_info *info_phys, u64 end_structures);
+
+u64 page_alloc_phys(u32 order);
+u64 page_alloc_free_phys(u64 phys);
+
+void *page_alloc(u32 order);
+void *page_alloc_to_virtual(u64 virtual, u32 order);
+#endif
diff --git a/include/types.h b/include/types.h
new file mode 100644 (file)
index 0000000..81da225
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef TYPES_H
+#define TYPES_H
+
+typedef unsigned long  u64;
+typedef unsigned int   u32;
+typedef unsigned short u16;
+typedef unsigned char  u8;
+
+typedef signed long    s64;
+typedef signed int     s32;
+typedef signed short   s16;
+typedef signed char    s8;
+
+#define NULL   ((void *) 0x0)
+#define MAXLONG ((unsigned long) -1)
+
+#endif
diff --git a/include/vmalloc.h b/include/vmalloc.h
new file mode 100644 (file)
index 0000000..f4827fa
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef VMALLOC_H
+#define VMALLOC_H
+
+#include <types.h>
+
+void vmalloc_init(void);
+
+void *vmalloc(char *name, u64 address_hint, u64 size);
+void vfree(u64 address, u64 size);
+void vmalloc_vspace(char *name, u64 address, u64 size);
+
+#endif
index e2ce019..1972ec2 100644 (file)
@@ -1,7 +1,255 @@
+/* vim: set ts=4 sw=4 sts=4 noet : */
+
 #include <kernel.h>
-#include <grub.h>
+#include <page_alloc.h>
+#include <stdarg.h>
+#include <vmalloc.h>
+#include <memmap.h>
+#include <map.h>
+
+char *vga_mem = NULL;
+u8 vga_x = 0;
+u8 vga_y = 0;
+
+u8 vga_max_x = 80;
+u8 vga_max_y = 25;
+
+void memcpy(void *dest, void *src, u32 size)
+{
+       int i;
+       for (i = 0; i < size; i++)
+               ((char *)dest)[i] = ((char *)src)[i];
+}
+
+void vga_putchar(char c)
+{
+       if ((c == '\n') || (vga_x >= vga_max_x))
+       {
+               vga_x = 0;
+               vga_y++;
+               return;
+       }
+
+       if (vga_y >= vga_max_y) {
+               memcpy(vga_mem, &vga_mem[vga_max_x], (vga_max_x * (vga_max_y - 1)));
+               vga_y--;
+       }
+
+       vga_mem[((vga_max_x * vga_y) + vga_x) * 2] = c;
+       vga_x++;
+}
+
+void vga_clear(void)
+{
+       int i;
+       for(i = 0; i < (vga_max_x * vga_max_y); i++) {
+               vga_mem[i * 2] = ' ';
+               vga_mem[(i * 2) + 1] = 0xf;
+       }
+}
+
+void vga_init(void)
+{
+       vga_mem = vmalloc("VGA mem", VGA_MEM_VIRTUAL, PAGE_SIZE);
+       map_page((u64) vga_mem, 0xB8000);
+       vga_clear();
+}
+
+char *console_mem;
+u32 console_end = 0;
+u32 console_wrapped = 0;
+
+#define CONSOLE_ORDER 1
+#define CONSOLE_SIZE  (1 << (CONSOLE_ORDER + PAGE_SHIFT))
+
+void console_init(void)
+{
+       console_mem = page_alloc(CONSOLE_ORDER);
+}
+
+void console_flush(void)
+{
+
+}
+
+void console_putchar(char c)
+{
+       console_mem[console_end] = c;
+       console_end++;
+
+       if(console_end >= CONSOLE_SIZE) {
+               console_wrapped = 1;
+               console_end = 0;
+       }
+
+       vga_putchar(c);
+}
+
+void console_puts(char *string)
+{
+       int i;
+
+       for (i = 0; string[i]; i++)
+               console_putchar(string[i]);
+}
+
+char *strncpy(char *dest, char *src, u32 size)
+{
+       int i;
+       for(i = 0; i < size && src[i]; i++)
+               dest[i] = src[i];
+       if (i < size)
+               src[i] = 0;
+       return dest;
+}
 
-void main (void)
+int strlen(char *src)
 {
-    asm ("hlt" : :);
+       int i = 0;
+       while (src[i])
+               i++;
+       return i;
+}
+
+/* Convert a single value into ASCII */
+
+static int convert(char *buf, u32 rem, s32 fieldwidth, unsigned long long val, u32 base)
+{
+       char conv[] = "0123456789abcdef";
+       /* Largest possible ull in decimal is 39 digits */
+       char tmp[39];
+       int digits = 0;
+       int i = 0;
+
+       if (fieldwidth > -1 && fieldwidth < rem)
+               rem = fieldwidth;
+
+       /* Write the digits into tmp, backwards because it's easiest to calculate
+        * the smallest digits first */
+
+       do {
+               tmp[digits] = conv[val % base];
+               val /= base;
+               digits++;
+       } while (val > 0);
+
+       /* Pad out tmp to fieldwidth */
+
+       i = 0;
+       if (fieldwidth != -1) {
+               for(i = 0; i < (rem - digits); i++)
+                       buf[i] = '0';
+       }
+
+       /* Then reverse the digits into buf */
+
+       for( ; i <= (digits - 1) && rem; rem--, i++)
+               buf[i] = tmp[(digits - 1) - i];
+
+       return i;
+}
+
+void vsnprintf(char *buf, u32 size, char *format, va_list ap)
+{
+       char *str = buf;
+       char qualifier[2];
+       char *s;
+
+       unsigned long long val;
+       s32 fieldwidth = -1;
+       u32 escaped = 0;
+       u32 tmp;
+       u32 i;
+
+       /* size - 1, so we have room for the null byte regardless */
+
+       for(i = 0; i < (size - 1) && format[i]; i++) {
+               if (format[i] == '%') {
+                       if (escaped) {
+                               *str++ = '%';
+                               escaped = 0;
+                       }
+                       else
+                               escaped = 1;
+                       continue;
+               }
+
+               if (escaped) {
+                       switch(format[i]) {
+                               case 's': s = va_arg(ap, char *);
+                                                 if(fieldwidth == -1)
+                                                         fieldwidth = (size - i);
+                                                 while ((fieldwidth--) && (*s)) {
+                                                         *str++ = *s++;
+                                                         size--;
+                                                 }
+
+                                                 fieldwidth = -1;
+                                                 escaped = 0;
+                                                 size++;
+                                                 break;
+                               case 'l': if (qualifier[0] == 'l')
+                                                         qualifier[1] = 'l';
+                                                 else {
+                                                         qualifier[0] = 'l';
+                                                         qualifier[1] = 0;
+                                                 }
+
+                                                 size++;
+                                                 break;
+                               case 'd':
+                               case 'x': if (qualifier[0] == 'l') {
+                                                        if(qualifier[1] == 'l')
+                                                               val = va_arg(ap, unsigned long long);
+                                                        else
+                                                               val = va_arg(ap, unsigned long);
+                                                 } else {
+                                                         val = va_arg(ap, unsigned int);
+                                                 }
+                                                 if (format[i] == 'x')
+                                                         tmp = convert(str, (size - i), fieldwidth, val, 16);
+                                                 else
+                                                         tmp = convert(str, (size - i), fieldwidth, val, 10);
+                                                 str += tmp;
+
+                                                 /* tmp were printed, but the format string shouldn't
+                                                  * count against size */
+
+                                                 size -= (tmp - 1);
+
+                                                 escaped = 0;
+                                                 break;
+                       }
+               }
+               else
+                       *str++ = format[i];
+       }
+       *str = 0;
+}
+
+void printk(char *format, ...)
+{
+       char buf[255];
+       va_list ap;
+
+       va_start(ap, format);
+
+       vsnprintf(buf, 255, format, ap);
+
+       va_end(ap);
+
+       console_puts(buf);
+}
+
+void main (void *info_phys, unsigned long end_structures)
+{
+       page_alloc_init(info_phys, end_structures);
+
+       vga_init();
+
+       console_init();
+
+       printk("Viridis %d.%d-g%s\n", __MAJOR_VERSION__,__MINOR_VERSION__, __GIT_VERSION__);
+
+       asm ("hlt" : : );
 }
index 96035fa..3af9923 100644 (file)
--- a/linker.ld
+++ b/linker.ld
@@ -12,7 +12,9 @@ SECTIONS
     {
         *(.text_early)
     }
-    .text 0xFFFFFFFF80102000 : AT(0x102000)
+    . = ALIGN(0x8);
+    end_early_text = .;
+    .text (0xFFFFFFFF80000000 + end_early_text): AT (end_early_text)
     {
         *(.text)
     }
@@ -24,6 +26,10 @@ SECTIONS
     {
         *(.bss)
     }
+    .rodata :
+    {
+        *(.rodata)
+    }
     /DISCARD/ :
     {
         *(.comment)
diff --git a/mm/map.c b/mm/map.c
new file mode 100644 (file)
index 0000000..c5b198b
--- /dev/null
+++ b/mm/map.c
@@ -0,0 +1,120 @@
+/* vim: set ts=4 sw=4 sts=4 noet : */
+
+#include <types.h>
+#include <errno.h>
+#include <memmap.h>
+#include <page_alloc.h>
+
+#include <asm/misc.h>
+
+static void __early_map_check(u64 *entry, u64 *target, u64 *alloc_base)
+{
+       if (!(*entry & PF_P)) {
+               *entry = (*alloc_base) | (PF_RW | PF_P);
+
+               /* reset_cr3 so the newly mapped page is accessible, zero it, then
+                * reset cr3 again to make sure no crap mappings are in there. */
+
+               reset_cr3();
+               __clear_page(target);
+               reset_cr3();
+
+               *alloc_base = (*alloc_base) + PAGE_SIZE;
+       }
+}
+
+void *map_page_early(u64 virtual, u64 physical, u64 *alloc_base)
+{
+       u64 *pml4e = (u64 *) PML4E_ADDR(virtual);
+       u64 *pdpe = (u64 *) PDPE_ADDR(virtual);
+       u64 *pde = (u64 *) PDE_ADDR(virtual);
+       u64 *pte = (u64 *) PTE_ADDR(virtual);
+
+       /* Make sure all of these structures exist, and if not, alloc and zero them */
+
+       __early_map_check(pml4e, pdpe, alloc_base);
+       __early_map_check(pdpe, pde, alloc_base);
+       __early_map_check(pde, pte, alloc_base);
+
+       *pte = PAGE_ALIGN(physical) | PF_RW | PF_P;
+
+       reset_cr3();
+
+       return (void *) virtual;
+}
+
+/* Similar to __early_map_check, except it can use page_alloc_phys, and check
+ * its return value*/
+
+int __map_check(u64 *entry, u64 *target)
+{
+       u64 phys = 0;
+
+       if (!(*entry & PF_P)) {
+               phys = page_alloc_phys(0);
+               if(phys == 0)
+                       return -ENOMEM;
+
+               *entry = (page_alloc_phys(0) | PF_RW | PF_P);
+
+               /* reset_cr3 so the newly mapped page is accessible, zero it, then
+                * reset cr3 again to make sure no crap mappings are in there. */
+
+               reset_cr3();
+               __clear_page(target);
+               reset_cr3();
+               return 1;
+       }
+
+       return 0;
+}
+
+int map_page(u64 virtual, u64 physical)
+{
+       u64 *pml4e,*pdpe,*pde,*pte = NULL;
+       int ret, allocd = 0;
+
+       pml4e = (u64 *) PML4E_ADDR(virtual);
+       pdpe = (u64 *) PDPE_ADDR(virtual);
+       pde = (u64 *) PDE_ADDR(virtual);
+       pte = (u64 *) PTE_ADDR(virtual);
+
+       /* Make sure all of these structures exist, and if not, alloc and zero them */
+
+       ret = __map_check(pml4e, pdpe);
+       if (ret < 0)
+               return -ENOMEM;
+
+       allocd += ret;
+
+       ret = __map_check(pdpe, pde);
+       if (ret < 0)
+               goto cleanup_pml4e;
+
+       allocd += ret;
+
+       ret = __map_check(pde, pte);
+       if (ret < 0)
+               goto cleanup_pdpe;
+
+       *pte = physical | PF_RW | PF_P;
+
+       reset_cr3();
+       return 0;
+
+cleanup_pdpe:
+       if (allocd) {
+               page_alloc_free_phys(PAGE_ALIGN(*pdpe));
+               *pdpe = 0;
+               allocd--;
+       }
+cleanup_pml4e:
+       if (allocd) {
+               page_alloc_free_phys(PAGE_ALIGN(*pml4e));
+               *pml4e = 0;
+       }
+
+       reset_cr3();
+
+       return -ENOMEM;
+}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
new file mode 100644 (file)
index 0000000..c23ae13
--- /dev/null
@@ -0,0 +1,836 @@
+/* vim: set ts=4 sw=4 sts=4 noet : */
+
+/* Physical and virtual page allocator */
+
+/* API:
+ * Physical page allocator:
+ *
+ * page_alloc_phys(u32 order);
+ * page_alloc_free_phys(u64 address);
+ *
+ * Virtual page allocator:
+ *
+ * page_alloc(u32 order);
+ * page_alloc_free(void *address);
+ */
+
+#include <kernel.h>
+#include <grub.h>
+#include <errno.h>
+#include <types.h>
+#include <vmalloc.h>
+#include <map.h>
+
+#define TEST_PAGE_ALLOC
+
+struct page_block {
+       u64 address;
+       struct page_block *next;
+};
+
+u64 total_mem;
+
+/* MAX_ORDER is the order of the largest block of memory that we track. Linux uses
+ * 11 for 8M chunks which seems like a good default size as it's large enough to
+ * accomodate even the largest allocation.
+ *
+ *  0 - 4k
+ *  1 - 8k
+ *  2 - 16k
+ *  3 - 32k
+ *  4 - 64k
+ *  5 - 128k
+ *  6 - 256k
+ *  7 - 512k
+ *  8 - 1M
+ *  9 - 2M
+ * 10 - 4M
+ * 11 - 8M
+ */
+
+#define MAX_ORDER 11
+
+struct page_block *free_pages[MAX_ORDER + 1] = { 0 };
+struct page_block *used_pages[MAX_ORDER + 1] = { 0 };
+struct page_block *unused_page_blocks = NULL;
+
+static inline void __setprev(struct page_block **head, struct page_block *prev, struct page_block *next)
+{
+       if (prev)
+               prev->next = next;
+       else
+               *head = next;
+}
+
+void __page_list_add(struct page_block **head, struct page_block *new)
+{
+       struct page_block *cur, *prev;
+
+       /* Insert sorted by address */
+
+       prev = NULL;
+       for (cur = *head; cur; prev = cur, cur = cur->next) {
+               if (cur->address > new->address) {
+                       new->next = cur;
+                       __setprev(head, prev, new);
+                       return;
+               }
+       }
+
+       __setprev(head, prev, new);
+       new->next = cur;
+}
+
+struct page_block *__pop_unused_page_block(void)
+{
+       struct page_block *ret = unused_page_blocks;
+
+       if (ret != NULL) {
+               unused_page_blocks = ret->next;
+               ret->next = NULL;
+       }
+
+       return ret;
+}
+
+void __push_unused_page_block(struct page_block *block)
+{
+       block->next = unused_page_blocks;
+       unused_page_blocks = block;
+}
+
+/* Similar to __page_list_add, except it will actual perform the merge */
+
+void __free_phys(struct page_block *phys, u32 order)
+{
+       struct page_block *cur, *prev;
+       u64 buddy_address;
+       u32 merge;
+
+       if (free_pages[order] == NULL) {
+               free_pages[order] = phys;
+               return;
+       }
+
+       prev = NULL;
+
+       /* For orders under MAX_ORDER, attempt to find buddies, and merge them as
+        * up as far as possible.*/
+
+       for (order = order; order < MAX_ORDER; order++) {
+               buddy_address = (phys->address ^ (1 << (order + PAGE_SHIFT)));
+               merge = 0;
+               for (cur = free_pages[order]; cur; prev = cur, cur = cur->next) {
+
+                       if (cur->address > phys->address) {
+
+                               /* Found a buddy that's bigger than us, so we have the valid
+                                * address, patch cur out of the current order, break to try
+                                * and "free" phys on the next higher order */
+
+                               if (cur->address == buddy_address) {
+                                       __setprev(&free_pages[order], prev, cur->next);
+                                       cur->next = NULL;
+
+                                       __push_unused_page_block(cur);
+                                       merge = 1;
+                                       break;
+
+                               /* Not a buddy, just patch phys into the order. */
+                               } else {
+                                       __setprev(&free_pages[order], prev, phys);
+                                       phys->next = cur;
+                                       return;
+                               }
+                       }
+
+                       /* If address is smaller, we only care if it's a buddy, and
+                        * if it is, it has the valid address*/
+
+                       else if(cur->address == buddy_address) {
+                               __setprev(&free_pages[order], prev, cur->next);
+                               phys->address = cur->address;
+                               __push_unused_page_block(cur);
+                               merge = 1;
+                               break;
+                       }
+               }
+
+               /* If merge is set, we need to advance to the next highest order
+                * to attempt to integrate phys into it.
+                */
+               if(merge)
+                       continue;
+
+               /* If merge is unset, it means that we didn't find a buddy, but we
+                * made it to the end of the order list without finding a greater
+                * page block address, so just tack it on.
+                */
+
+               cur->next = phys;
+               return;
+       }
+
+       /* If we get here, it means that we're at MAX_ORDER, no merging
+        * is possible, so just insert it with __page_list_add
+        */
+
+       __page_list_add(&free_pages[order], phys);
+}
+
+/* Get a physical address of a block of (1 << order) pages
+ * 
+ * returns -E2BIG if order was insane
+ * returns -ENOMEM if a block that size wasn't available
+ */
+
+u64 page_alloc_phys(u32 order)
+{
+       struct page_block *new;
+       u64 ret, end, left_over_pages, pages;
+       int i,j;
+
+       if (order > MAX_ORDER)
+               return 0;
+
+       for (i = order; i <= MAX_ORDER; i++) {
+               if (free_pages[i] == NULL)
+                       continue;
+
+               new = free_pages[i];
+               ret = new->address;
+               free_pages[i] = new->next;
+
+               __page_list_add(&used_pages[order], new);
+
+               /* If it's just the right size, we're done */
+
+               if (i == order)
+                       return ret;
+
+               /* Otherwise, we need to split the block up */
+
+               /* Example, 1M order, being fulfilled by an 8M block The 8M block needs
+                * to be broken in 1M, 1M, 2M, 4M blocks.
+                */
+
+               left_over_pages = (1 << i) - (1 << order);
+               end = ret + (1 << (i + PAGE_SHIFT));
+
+               for(j = i - 1; j >= 0; j--) {
+                       pages = (1 << j);
+                       while (left_over_pages >= pages) {
+                               new = __pop_unused_page_block();
+                               new->address = end - (pages << PAGE_SHIFT);
+                               end = new->address;
+                               __free_phys(new, j);
+
+                               left_over_pages -= pages;
+                       }
+               }
+
+               return ret;
+       }
+
+       return 0;
+}
+
+/* Counterpart to page_alloc_phys, unlike __free_phys, it will search for its
+ * record in used_pages first */
+
+u64 page_alloc_free_phys(u64 phys)
+{
+       struct page_block *cur, *prev;
+       int i;
+
+       prev = NULL;
+       for (i = 0; i < MAX_ORDER; i++) {
+               for(cur = used_pages[i]; cur; prev = cur, cur = cur->next) {
+                       if (cur->address == phys) {
+                               __setprev(&used_pages[i], prev, cur->next);
+                               __free_phys(cur, i);
+                               return 0;
+                       } else if (cur->address > phys)
+                               break;
+               }
+       }
+
+       return -EINVAL;
+}
+
+
+/* Allocate a page to a given virtual address. */
+/* Expects virtual address to be pre-allocated as well. */
+
+void *page_alloc_to_virtual(u64 virtual, u32 order)
+{
+       u64 physical = page_alloc_phys(order);
+       u64 start_virt = virtual;
+       u64 end_virt = virtual + (1 << (order + PAGE_SHIFT));
+
+       while (start_virt < end_virt) {
+               map_page(start_virt, physical);
+               start_virt += PAGE_SIZE;
+               physical += PAGE_SIZE;
+       }
+
+       return (void *) virtual;
+
+}
+
+void *page_alloc(u32 order)
+{
+       void *virtual;
+       void *ret;
+
+       if (order > MAX_ORDER)
+               return NULL;
+
+       virtual = vmalloc("page heap", PAGE_HEAP_VIRTUAL, (1 << (order + PAGE_SHIFT)));
+
+       if (virtual == NULL)
+               return NULL;
+
+       ret = page_alloc_to_virtual((u64) virtual, order);
+
+       if (ret == NULL) {
+               vfree((u64) virtual, (1 << (order + PAGE_SHIFT)));
+               return NULL;
+       }
+
+       return ret;
+}
+
+/* INIT STUFF BEYOND THIS POINT */
+/* The rest of this file is init for the allocators */
+
+/* __init_free_region will place a number of __page_block structs on free_pages
+ * without doing any form of checking, and without removing from used_pages.
+ */
+
+static void __init_free_region(u64 address, u64 pages)
+{
+       struct page_block *new;
+       u32 order, size;
+
+       for(order = MAX_ORDER; pages && order >= 0; order--) {
+
+               // size in pages
+               size = 1 << order;
+
+               while(pages >= size) {
+                       new = __pop_unused_page_block();
+
+                       new->address = address;
+                       new->next = NULL;
+                       __page_list_add(&free_pages[order], new);
+
+                       address += (size << PAGE_SHIFT);
+                       pages -= size;
+               }
+       }
+}
+
+/* After unused_page_blocks has been properly converted to a virtual pointer,
+ * initialize the structs to form a linked list just like an order of free or
+ * used pages so we can allocate and free them
+ */
+
+static void __init_unused_page_blocks(u64 max_page_blocks)
+{
+       int i;
+
+       for(i=0;i < (max_page_blocks - 1); i++)
+               unused_page_blocks[i].next = &unused_page_blocks[i + 1];
+       unused_page_blocks[i].next = NULL;
+}
+
+/* Determine how many extra pages of page structures we'll need for an
+ * allocation of num_pages @ start, assuming it's entirely unpaged.
+ *
+ * The important thing here is that it's address sensitive, so a two page
+ * allocation could return anywhere between 3 and 6 overhead pages depending on
+ * how many page structure borders are being crossed.
+ */
+
+static u32 __overhead_pages(u64 start, u64 num_pages)
+{
+       u64 pdp_pages, pd_pages, pt_pages;
+       u64 end = start + (num_pages * PAGE_SIZE);
+
+       pdp_pages = PML4E(end) - PML4E(start);
+
+       if (pdp_pages) {
+               pd_pages = (512 - PDPE(start)) + PDPE(end);
+           pd_pages += (pdp_pages - 1) * 512;
+       } else
+               pd_pages = PDPE(end) - PDPE(start);
+
+       if (pd_pages) {
+               pt_pages = (512 - PDE(start)) + PDE(end);
+               pt_pages += (pd_pages - 1) * 512;
+       } else
+               pt_pages = PDE(end) - PDE(start);
+
+       return (pdp_pages + pd_pages + pt_pages);
+}
+
+/* Remove pages from the free list, without adding them to the used list, so
+ * they are unfreeable, if we're going to use these pages, either reserved
+ * memory from GRUB or integral kernel structures, it's not going to be through
+ * the page allocator.
+ */
+
+void __reserve_region(u64 address, u64 pages)
+{
+       struct page_block *cur = NULL;
+       struct page_block *prev;
+       u64 last_pages;
+
+       if(PAGE_ALIGN_OFF(address))
+               pages++;
+
+       address = PAGE_ALIGN(address);
+
+       int order, size_pages, size, num_free;
+
+       while (pages > 0) {
+               last_pages = pages;
+               for(order = MAX_ORDER; order >= 0; order--) {
+                       size_pages = 1 << order;
+                       size = size_pages << PAGE_SHIFT;
+
+                       /* First, find the free block that contains address. */
+
+                       prev = NULL;
+                       for (cur = free_pages[order]; cur; prev = cur, cur = cur->next) {
+                               if (cur->address <= address && (cur->address + size) > address)
+                                       break;
+                               if (cur->address > address) {
+                                       cur = NULL;
+                                       break;
+                               }
+                       }
+
+                       /* Didn't find relevant chunk */
+                       if(!cur)
+                               continue;
+
+                       /* Okay, we've found the block, let's break it up */
+
+                       /* First, patch it out of the current order list */
+
+                       __setprev(&free_pages[order], prev, cur->next);
+                       cur->next = NULL;
+
+                       /* If our address is somewhere inside the block, free
+                        * all of the memory before it and shrink sizes.
+                        */
+
+                       if (address != cur->address) {
+                               num_free = (address - cur->address) >> PAGE_SHIFT;
+                               __init_free_region(cur->address, num_free);
+
+                               size_pages -= num_free;
+                               size = (size_pages << PAGE_SHIFT);
+                       }
+
+                       /* If the remaining block is bigger or equal to what we're freeing,
+                        * then just discard this page_block and start again with the
+                        * remaining free block */
+
+                       if ( pages >= size_pages) {
+                               __push_unused_page_block(cur);
+                               pages -= size_pages;
+                               address += size;
+                               break;
+                       }
+
+                       /* Otherwise, we know we have pages at the end to free as well */
+
+                       __init_free_region(address + (pages << PAGE_SHIFT), size_pages - pages);
+
+                       /* Finally, discard cur */
+
+                       __push_unused_page_block(cur);
+                       return;
+               }
+
+               /* If we searched everywhere for this address but didn't find it,
+                * then go ahead and advance. This can happen when you have overlapping
+                * reserves. For example, trying to reserve GRUB info that's already
+                * been reserved in the mmap.
+                */
+
+               if (last_pages == pages) {
+                       pages--;
+                       address += PAGE_SIZE;
+               }
+       }
+}
+
+/* Given physical grub info address, and physical end of head.asm allocated
+ * structures.
+ */
+
+#ifdef TEST_PAGE_ALLOC
+
+static inline void test_orders(struct page_block **head, u64 *orders)
+{
+       struct page_block *block;
+       int counter = 0;
+       int i = 0;
+
+       for(i = 0; i <= MAX_ORDER; i++) {
+               counter = 0;
+               for (block = head[i]; block; block = block->next)
+                       counter++;
+
+               if (counter != orders[i])
+                       while(1);
+       }
+
+}
+
+static inline void test_order_index(struct page_block *block, u32 index, u64 address, u32 nextnull)
+{
+       int i;
+
+       for (i = 0; i < index; i++)
+               block = block->next;
+
+       if(block->address != address)
+               while(1) {};
+       if(nextnull && block->next)
+               while(1) {};
+       if(!nextnull && !block->next)
+               while(1) {};
+}
+
+void test_page_alloc(u64 max_pages)
+{
+       struct page_block *upb_bak = unused_page_blocks;
+       int i;
+
+       u64 free_orders[MAX_ORDER + 1] = { 0 };
+       u64 used_orders[MAX_ORDER + 1] = { 0 };
+
+       test_orders(free_pages, free_orders);
+       test_orders(used_pages, used_orders);
+
+       /* Alloc 1GB of memory */
+
+       __init_free_region(0, (1 * 1024 * 1024 * 1024) >> PAGE_SHIFT);
+
+       /* Should create 128 blocks of 8M */
+       /* No allocations, obviously */
+
+       free_orders[MAX_ORDER] = 128;
+
+       test_orders(free_pages, free_orders);
+       test_orders(used_pages, used_orders);
+
+       test_order_index(free_pages[MAX_ORDER], 0, 0, 0);
+       test_order_index(free_pages[MAX_ORDER], 1, 0x800000, 0);
+       test_order_index(free_pages[MAX_ORDER], 2, 0x1000000, 0);
+
+       /* Reserve 0 - 4M, should split 1 8M block in 2 4M */
+       /* reserve_region won't record it on used_pages */
+
+       __reserve_region(0, 0x400);
+
+       free_orders[MAX_ORDER] = 127;
+       free_orders[MAX_ORDER - 1] = 1;
+
+       test_orders(free_pages, free_orders);
+       test_orders(used_pages, used_orders);
+
+       test_order_index(free_pages[MAX_ORDER], 0, 0x800000, 0);
+       test_order_index(free_pages[MAX_ORDER], 1, 0x1000000, 0);
+       test_order_index(free_pages[MAX_ORDER - 1], 0, 0x400000, 1);
+
+       /* Reserve 12M - 13M, should split the 8M - 16M
+        * block into 1 more 4M, 1 1M, and 1 2M block */
+
+       __reserve_region(0xc00000, 0x100);
+
+       free_orders[MAX_ORDER] = 126;
+       free_orders[MAX_ORDER - 1] = 2;
+       free_orders[MAX_ORDER - 2] = 1;
+       free_orders[MAX_ORDER - 3] = 1;
+
+       test_orders(free_pages, free_orders);
+       test_orders(used_pages, used_orders);
+
+       test_order_index(free_pages[MAX_ORDER], 0, 0x1000000, 0);
+       test_order_index(free_pages[MAX_ORDER], 1, 0x1800000, 0);
+       test_order_index(free_pages[MAX_ORDER - 1], 0, 0x400000, 0);
+       test_order_index(free_pages[MAX_ORDER - 1], 1, 0x800000, 1);
+
+       /* Now using page_alloc_phys should return the first two already
+        * split blocks
+        */
+
+       if(page_alloc_phys(MAX_ORDER - 1) != 0x400000)
+               while(1) {};
+
+       if(page_alloc_phys(MAX_ORDER - 1) != 0x800000)
+               while(1) {};
+
+       free_orders[MAX_ORDER - 1] = 0;
+       used_orders[MAX_ORDER - 1] = 2;
+
+       test_orders(free_pages, free_orders);
+       test_orders(used_pages, used_orders);
+
+       /* A third 4M alloc should split an 8M block and return its
+        * lower address
+        */
+
+       if(page_alloc_phys(MAX_ORDER - 1) != 0x1000000)
+               while(1) {};
+
+       free_orders[MAX_ORDER] -= 1;
+       free_orders[MAX_ORDER - 1] = 1;
+
+       used_orders[MAX_ORDER - 1] = 3;
+
+       test_orders(free_pages, free_orders);
+       test_orders(used_pages, used_orders);
+
+       test_order_index(free_pages[MAX_ORDER], 0, 0x1800000, 0);
+       test_order_index(free_pages[MAX_ORDER - 1], 0, 0x1400000, 1);
+
+       /* Now a free should properly reconstitute the 8M block */
+
+       page_alloc_free_phys(0x1000000);
+
+       free_orders[MAX_ORDER] += 1;
+       free_orders[MAX_ORDER - 1] = 0;
+
+       used_orders[MAX_ORDER - 1] = 2;
+
+       test_orders(free_pages, free_orders);
+       test_orders(used_pages, used_orders);
+
+       test_order_index(free_pages[MAX_ORDER], 0, 0x1000000, 0);
+
+       /* We're done, now reset our structures */
+
+       unused_page_blocks = upb_bak;
+       __init_unused_page_blocks(max_pages);
+
+       for (i = 0; i <= MAX_ORDER; i++) {
+               free_pages[i] = NULL;
+               used_pages[i] = NULL;
+       }
+}
+#endif
+
+struct grub_info *info = NULL;
+struct grub_mmap *mmap = NULL;
+
+int page_alloc_init(struct grub_info *info_phys, u64 end_structures)
+{
+       u64 max_phys_address, max_pages, page_block_pages;
+       u64 base, length, end;
+       u64 alloc_address, alloc_size, free_pages_address;
+       u64 virtual, physical;
+
+       u64 kernel_size, stack_size;
+
+       struct grub_mmap *cur;
+       u32 overhead, i;
+
+       /* Map will increase end_structures if it has to map additional structures,
+        * but it shouldn't have to */
+
+       info = (struct grub_info *)
+               map_page_early(VIRT_BASE | ((long) info_phys), (long) info_phys, &end_structures);
+
+       /* Make sure GRUB was able to create a memory map */
+
+       if (!(info->flags & GF_MMAP_PRESENT)) {
+               return -1;
+       }
+
+       /* This is likely the same set as above, but map to be sure. This also
+        * relies on the fact that __map_one will return an unaligned virtual
+        * address
+        */
+
+       mmap = (struct grub_mmap *)
+               map_page_early(VIRT_BASE | ((long) info->mmap_addr), (long) info->mmap_addr, &end_structures);
+
+       /* For some reason the grub memmap size field doesn't include
+        * the size field itself, so we add 4.
+        */
+
+       max_phys_address = 0;
+       for (i = 0; i < info->mmap_length; i += cur->size + 4) {
+               cur = (struct grub_mmap *) (((long) mmap) + i);
+               end = cur->base + cur->length;
+
+               if (end > max_phys_address)
+                       max_phys_address = end;
+
+               total_mem += cur->length;
+       }
+
+       /* max_page_blocks describes how many page_block structs we'd have if the
+        * entirety of physical memory was allocated in 4k chunks (maximum
+        * fragmentation).
+        */
+
+       max_pages = max_phys_address >> PAGE_SHIFT;
+
+       /* page_block_pages describes how many pages we need to reserve if we are
+        * going to store max_page_blocks.
+        */
+
+       page_block_pages = ((max_pages * sizeof(struct page_block)) >> PAGE_SHIFT) + 1;
+
+       /* Search the mmap for a chunk of contiguous memory that we can use */
+
+       /* Because there should be a large chunk of free memory >1M this should
+        * never fail in practice, but if it does, we're fucked
+        */
+
+       alloc_address = MAXLONG;
+       stack_size = S_PAGES << PAGE_SHIFT;
+
+       for (i = 0; i < info->mmap_length; i += cur->size + 4) {
+               cur = (struct grub_mmap *) (((long) mmap) + i);
+
+               /* We don't care about reserved memory */
+
+               if (cur->type != 1)
+                       continue;
+
+               /* If this is the block our binary is in, then adjust its
+                * base and length to avoid the binary and the page structures
+                * we allocated immediately afterward.
+                */
+
+               /* Assume that GRUB put us in a contiguous block of memory
+                * that started before or at KERNEL_START and ended after
+                * our page structures. If this isn't true then we shouldn't
+                * have gotten this far.
+                */
+
+               if (cur->base <= KERNEL_START && ((cur->base + cur->length) >= end_structures)) {
+                       base = end_structures;
+                       length = cur->length - (end_structures - KERNEL_START);
+               }
+               else if (cur->base <= STACK_PAGES_PHYS &&
+                               ((cur->base + cur->length) >= (STACK_PAGES_PHYS + stack_size))) {
+                       base = STACK_PAGES_PHYS + stack_size;
+                       length = cur->length - stack_size;
+               }
+               else {
+                       base = cur->base;
+                       length = cur->length;
+               }
+
+               /* Round up to page size, adjust length accordingly */
+               length -= PAGE_ALIGN_OFF(base);
+               base = PAGE_ALIGN(base);
+
+               /* If we allocated at this address, find out how many page structure
+                * pages we'd need to add to get it all allocated*/
+
+               overhead = __overhead_pages(base, page_block_pages);
+               alloc_size = ((page_block_pages + overhead) * PAGE_SIZE);
+
+               /* Too short, skip to the next block*/
+               if (length < alloc_size)
+                       continue;
+
+               /* We're page-aligned, non clobbering and large enough, done! */
+               alloc_address = base;
+               break;
+       }
+
+       /* We couldn't find a large enough block, we're fucked. */
+
+       if(alloc_address == MAXLONG)
+               return -1;
+
+       /* big_alloc is now a physical pointer to a contiguous piece of
+        * memory that can hold enough page_block structs to control all of our
+        * pages at maximum fragmentation, as well as the page structures needed to
+        * map them
+        *
+        * We also know we need `overhead` pages of structures (at most).
+        */
+
+       unused_page_blocks = (struct page_block *) (alloc_address + (overhead << PAGE_SHIFT));
+
+
+       free_pages_address = alloc_address;
+
+       for (i = 0; i < page_block_pages; i++) {
+               physical = (u64) unused_page_blocks + (i * PAGE_SIZE);
+               virtual = (VIRT_BASE | physical);
+               map_page_early(virtual, physical, &free_pages_address);
+               __clear_page((u64 *) virtual);
+       }
+
+       /* Convert to a virtual pointer */
+       unused_page_blocks = (struct page_block *) (VIRT_BASE | ((long) unused_page_blocks));
+
+       __init_unused_page_blocks(max_pages);
+
+#ifdef TEST_PAGE_ALLOC
+       test_page_alloc(max_pages);
+#endif
+
+       /* Now put that reserved memory to use */
+       __init_free_region(0, max_pages);
+
+       /* Now iterate once more on the memmap and alloc all the unavailable pages */
+
+       for (i = 0; i < info->mmap_length; i += cur->size + 4) {
+               cur = (struct grub_mmap *) (((long) mmap) + i);
+
+               if (cur->type == 1)
+                       continue;
+
+               /* Partial pages are useless to us, so -expand- bad sections by
+                * page aligning base and rounding the length up to a multiple
+                * of page size
+                */
+
+               base = PAGE_ALIGN(cur->base);
+               length = cur->length + PAGE_ALIGN_OFF(cur->base);
+               if(!PAGE_ALIGNED(length))
+                       length = PAGE_ALIGN(length) + PAGE_SIZE;
+
+               /* Mark the physical addresses as allocated */
+
+               __reserve_region(base, length >> PAGE_SHIFT);
+       }
+
+       /* Finally, mark our own pages as allocated */
+
+       kernel_size = (end_structures - KERNEL_START);
+       stack_size = (S_PAGES << PAGE_SHIFT);
+
+       __reserve_region((u64) info_phys, 1);
+       __reserve_region(info->mmap_addr, 1);
+       __reserve_region(STACK_PAGES_PHYS, S_PAGES);
+       __reserve_region(KERNEL_START, kernel_size >> PAGE_SHIFT);
+       __reserve_region(alloc_address, alloc_size >> PAGE_SHIFT);
+
+       /* and all of our virtual address space as allocated  */
+       /* page_alloc_to_virtual has to be working before now */
+
+       vmalloc_init();
+       vmalloc_vspace("grub info", (u64) info, 1);
+       vmalloc_vspace("grub mmap", (u64) mmap, 1);
+       vmalloc_vspace("kernel stack", (KERNEL_VIRT - stack_size), stack_size);
+       vmalloc_vspace("kernel", KERNEL_VIRT, kernel_size);
+       vmalloc_vspace("page allocator", (VIRT_BASE | alloc_address), alloc_size);
+
+       return 0;
+}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
new file mode 100644 (file)
index 0000000..b2d89e7
--- /dev/null
@@ -0,0 +1,327 @@
+/* vim: set ts=4 sw=4 sts=4 noet : */
+
+/* Virtual address space allocator */
+
+#include <memmap.h>
+#include <page_alloc.h>
+#include <vmalloc.h>
+#include <types.h>
+#include <errno.h>
+
+#undef TEST_VMALLOC
+
+/* NOTE: The names are included here, but as we have no string functions, or
+ * even display capability, these are unused for the moment */
+
+struct virtual_block {
+       u64 address;
+       u64 size;
+       char *name;
+       struct virtual_block *next;
+};
+
+static struct virtual_block *virtual_alloc = NULL;
+static struct virtual_block *spare_blocks = NULL;
+static int alloc_order = 0;
+u32 used_vblocks = 0;
+
+/* Eventually this will be part of the kernel's task struct */
+struct virtual_block *kernel_virtual_space = NULL;
+
+static struct virtual_block *pop_virtual_block(void)
+{
+       struct virtual_block *ret = &virtual_alloc[used_vblocks];
+
+       if (spare_blocks) {
+               ret = spare_blocks;
+               spare_blocks = ret->next;
+               return ret;
+       }
+
+       if (ret > (virtual_alloc + (1 << (alloc_order + PAGE_SHIFT)))) {
+                       while(1);
+       }
+
+       used_vblocks++;
+       return ret;
+}
+
+static void push_virtual_block(struct virtual_block *block)
+{
+       block->next = spare_blocks;
+       spare_blocks = block;
+}
+
+void __alloc_vspace(struct virtual_block **head, char *name, u64 virtual, u64 size)
+{
+       struct virtual_block *cur, *prev, *new;
+       u64 end, cur_end;
+
+       size = PAGE_ALIGN_UP(size + PAGE_ALIGN_OFF(virtual));
+       virtual = PAGE_ALIGN(virtual);
+
+       end = virtual + size;
+
+       prev = NULL;
+       for (cur = *head; cur; prev = cur, cur = cur->next) {
+               cur_end = (cur->address + cur->size);
+
+               /* If our virtual is in this block or adjacent, expand it */
+
+               if (cur->address <= virtual) {
+                       /* cur is adjacent left */
+
+                       /* Start in block */
+                       if (virtual <= cur_end) {
+                               /* Overhangs right */
+                               if (end > cur_end) {
+                                       cur->size += (end - cur_end);
+
+                                       /* Since we're adjusting this size, check if we're now
+                                        * adjacent to the next one */
+
+                                       if (cur->next && cur->next->address <= end) {
+                                               cur->size += cur->next->size - (end - cur->next->address);
+
+                                               /* pointer reuse, new is actually old =P */
+                                               new = cur->next;
+                                               cur->next = new->next;
+                                               push_virtual_block(new);
+                                       }
+                               }
+
+                               return;
+                       }
+
+               /* cur is adjacent right */
+
+               /* Ends in block */
+
+               } else if (cur->address <= end) {
+                       cur->size += size - (end - cur->address);
+                       cur->address = virtual;
+                       return;
+               }
+
+               /* Is not connected, but this is virtual's place in the list */
+               else if (cur->address > virtual) {
+                       break;
+               }
+       }
+
+       new = pop_virtual_block();
+       new->name = name;
+       new->address = virtual;
+       new->size = size;
+
+       if (prev)
+               prev->next = new;
+       else
+               *head = new;
+       new->next = cur;
+}
+
+int __free_vspace(struct virtual_block **head, u64 virtual, u64 size)
+{
+       struct virtual_block *cur, *prev, *new;
+       u64 end, cur_end;
+
+       end = virtual + size;
+
+       prev = NULL;
+       for (cur = *head; cur; prev = cur, cur = cur->next) {
+               cur_end = cur->address + cur->size;
+
+               if (cur->address > virtual)
+                       return -ENOENT;
+
+               if(cur->address == virtual) {
+                       if(size < cur->size) {
+                               /* Shrink */
+                               cur->address = virtual + size;
+                               cur->size = cur->size - size;
+                       }
+                       else {
+                               /* Remove */
+                               if (prev)
+                                       prev->next = cur->next;
+                               else
+                                       *head = cur->next;
+                               push_virtual_block(cur);
+                       }
+                       break;
+               }
+               else if (cur_end > virtual) {
+                       if (end >= cur_end)
+                               cur->size -= (cur_end - virtual);
+                       else {
+                               /* Shrink cur, add a new block for the remaining shard */
+                               cur->size = (virtual - cur->address);
+
+                               new = pop_virtual_block();
+                               new->address = end;
+                               new->size = (cur_end - end);
+                               new->name = cur->name;
+
+                               new->next = cur->next;
+                               cur->next = new;
+                       }
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+#ifdef TEST_VMALLOC
+
+struct virtual_block *test_virtual_space = NULL;
+
+static inline void __vblock_assert(u32 index, u64 address, u64 size, int next_null)
+{
+       struct virtual_block *block;
+       int i = 0;
+
+       block = test_virtual_space;
+       for (i = 0; i < index; i++)
+               block = block->next;
+
+       if (block->address != address)
+               while(1) {};
+       if (block->size != size)
+               while(1) {};
+       if (next_null && block->next)
+               while(1) {};
+       if (!next_null && !block->next)
+               while(1) {};
+}
+
+void test_alloc_vspace(void)
+{
+       __alloc_vspace(&test_virtual_space, "test", 0x1000, 0x1000);
+
+       __vblock_assert(0, 0x1000, 0x1000, 1);
+
+       __alloc_vspace(&test_virtual_space, "test", 0x2000, 0x1000);
+
+       __vblock_assert(0, 0x1000, 0x2000, 1);
+
+       __alloc_vspace(&test_virtual_space, "test", 0x0, 0x1000);
+
+       __vblock_assert(0, 0x0, 0x3000, 1);
+
+       __alloc_vspace(&test_virtual_space, "test", 0x3000, 0x10000);
+
+       __vblock_assert(0, 0x0, 0x13000, 1);
+
+       __alloc_vspace(&test_virtual_space, "test", 0x20000, 0x10000);
+
+       __vblock_assert(0, 0x0, 0x13000, 0);
+       __vblock_assert(1, 0x20000, 0x10000, 1);
+
+       __alloc_vspace(&test_virtual_space, "test", 0x70000, 0x10000);
+
+       __vblock_assert(0, 0x0, 0x13000, 0);
+       __vblock_assert(1, 0x20000, 0x10000, 0);
+       __vblock_assert(2, 0x70000, 0x10000, 1);
+
+       __alloc_vspace(&test_virtual_space, "test", 0xf0000, 0x10000);
+
+       __vblock_assert(0, 0x0, 0x13000, 0);
+       __vblock_assert(1, 0x20000, 0x10000, 0);
+       __vblock_assert(2, 0x70000, 0x10000, 0);
+       __vblock_assert(3, 0xf0000, 0x10000, 1);
+
+       __alloc_vspace(&test_virtual_space, "test", 0x80000, 0x70000);
+
+       __vblock_assert(0, 0x0, 0x13000, 0);
+       __vblock_assert(1, 0x20000, 0x10000, 0);
+       __vblock_assert(2, 0x70000, 0x90000, 1);
+
+       __free_vspace(&test_virtual_space, 0xf0000, 0x800);
+
+       __vblock_assert(0, 0x0, 0x13000, 0);
+       __vblock_assert(1, 0x20000, 0x10000, 0);
+       __vblock_assert(2, 0x70000, 0x80000, 0);
+       __vblock_assert(3, 0xf0800, 0xf800, 1);
+
+       __free_vspace(&test_virtual_space, 0x21000, 0x8000);
+
+       __vblock_assert(0, 0x0, 0x13000, 0);
+       __vblock_assert(1, 0x20000, 0x1000, 0);
+       __vblock_assert(2, 0x29000, 0x7000, 0);
+       __vblock_assert(3, 0x70000, 0x80000, 0);
+       __vblock_assert(4, 0xf0800, 0xf800, 1);
+
+       __free_vspace(&test_virtual_space, 0x11000, 0x2000);
+
+       __vblock_assert(0, 0x0, 0x11000, 0);
+       __vblock_assert(1, 0x20000, 0x1000, 0);
+       __vblock_assert(2, 0x29000, 0x7000, 0);
+       __vblock_assert(3, 0x70000, 0x80000, 0);
+       __vblock_assert(4, 0xf0800, 0xf800, 1);
+
+       __free_vspace(&test_virtual_space, 0x70000, 0x10000);
+
+       __vblock_assert(0, 0x0, 0x11000, 0);
+       __vblock_assert(1, 0x20000, 0x1000, 0);
+       __vblock_assert(2, 0x29000, 0x7000, 0);
+       __vblock_assert(3, 0x80000, 0x70000, 0);
+       __vblock_assert(4, 0xf0800, 0xf800, 1);
+
+       /* Reset */
+       used_vblocks = 0;
+}
+#endif
+
+void *vmalloc(char *name, u64 address_hint, u64 size)
+{
+       struct virtual_block *cur;
+       u64 prev_end, end;
+
+       prev_end = 0;
+       for(cur = kernel_virtual_space; cur; prev_end = end, cur = cur->next) {
+               end = cur->address + cur->size;
+               if (cur->address > address_hint) {
+                       if (size <= (cur->address - prev_end)) {
+                               __alloc_vspace(&kernel_virtual_space, name, address_hint, size);
+                               return (void *) address_hint;
+                       }
+                       else
+                               address_hint = end;
+               }
+       }
+
+       if ((MAX_VIRT_ADDRESS - prev_end) >= size) {
+               __alloc_vspace(&kernel_virtual_space, name, prev_end, size);
+               return (void *) prev_end;
+       }
+
+       return NULL;
+}
+
+void vmalloc_vspace(char *name, u64 address, u64 size)
+{
+       __alloc_vspace(&kernel_virtual_space, name, address, size);
+}
+
+void vfree(u64 address, u64 size)
+{
+       __free_vspace(&kernel_virtual_space, address, size);
+}
+
+extern struct early_reserve early_reserves_start;
+extern struct early_reserve early_reserves_end;
+
+void vmalloc_init(void)
+{
+       /* Grab a page to work with */
+
+       virtual_alloc = page_alloc_to_virtual(VMALLOC_HEAP_VIRTUAL, alloc_order);
+
+#ifdef TEST_VMALLOC
+       test_alloc_vspace();
+#endif
+
+       __alloc_vspace(&kernel_virtual_space, "vmalloc", VMALLOC_HEAP_VIRTUAL, (1 << (alloc_order + PAGE_SHIFT)));
+}