From 03b123a041d2191f8723018e099c11553fba7e4a Mon Sep 17 00:00:00 2001 From: Cassie Jones Date: Fri, 28 Feb 2020 21:03:07 +0100 Subject: [PATCH] Add memory layout docs and assembly experiments My goal was to figure out how I intended to compile the language by writing the assembly myself and figuring out the format of all the objects. I pretty quickly realized that lots of this belongs as runtime routines rather than sequences the compiler outputs, for things like creating closures, allocating objects, etc. What you see here is in a sort of mixed-up state because it started out by making raw system calls (so allocate and free were more involved), but is now set up to call into system calls via libSystem.dylib. It doesn't do anything interesting right now, it was just set up to learn some things. Most of the code here is implementing type-checking for the tagged pointer + 63-bit integer scheme, which is inspired by OCaml. The specific object layouts are described in docs/object-layout.md. --- docs/example.s | 111 ++++++++++++++++++++++++++++++++++++++++++ docs/object-layout.md | 60 +++++++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 docs/example.s create mode 100644 docs/object-layout.md diff --git a/docs/example.s b/docs/example.s new file mode 100644 index 0000000..517298b --- /dev/null +++ b/docs/example.s @@ -0,0 +1,111 @@ +// clang -masm=intel -nostdlib docs/example.s -lSystem -Wl,-e,_start -g -o target/example + +SYS_OFFSET = 0x2000000 +#define SYS(X) (SYS_OFFSET + X) +SYS_EXIT = SYS(0x01) +SYS_WRITE = SYS(0x04) +SYS_MUNMAP = SYS(0x49) +SYS_MMAP = SYS(0xC5) + +PROT_READ = 0x1 +PROT_WRITE = 0x2 +MAP_ANONYMOUS = 0x1000 +MAP_SHARED = 0x1 +MAP_PRIVATE = 0x2 + +STDOUT = 1 +STDERR = 1 + +// args: RDI RSI RDX RCX R8 R9 + +.text +.global _start +_start: + // 16-byte align the stack + sub rsp, 16 + and spl, 0xf0 + + mov rdi, 413 + call ivy_alloc + mov rdi, rax + call _free + mov rdi, 0 + call _exit + + +ivy_alloc: + call _malloc + ret + +ivy_dealloc: + call _free + ret + + +ivy_check_int: + test rdi, 1 + jz ivy_check_ok + cmp qword ptr [rdi], 1 + jne ivy_check_int_fail + jmp ivy_check_ok +ivy_check_lam: + test rdi, 1 + jnz ivy_check_lam_fail + cmp qword ptr [rdi], 0 + jne ivy_check_lam_fail +ivy_check_ok: + ret +ivy_check_int_fail: + mov rsi, [rip + EXPECT_INT_ERROR] + mov rdx, [rip + EXPECT_INT_ERROR_LEN] + jmp ivy_abort +ivy_check_lam_fail: + mov rsi, [rip + EXPECT_LAM_ERROR] + mov rdx, [rip + EXPECT_LAM_ERROR_LEN] + jmp ivy_abort + + +EXPECT_INT_ERROR: .ascii "Error: typecheck expected an integer type" +EXPECT_INT_ERROR_LEN: .quad . - EXPECT_INT_ERROR +EXPECT_LAM_ERROR: .ascii "Error: typecheck expected a lambda type" +EXPECT_LAM_ERROR_LEN: .quad . - EXPECT_LAM_ERROR + + +ivy_abort: + mov rdi, STDERR + call _write + mov rdi, 1 + call _exit + + +ivy_debug_log: + push rax + mov rsi, rsp + sub rsp, 1 + mov rdx, '\n' + mov [rsp], dl + + mov rdx, 0 +ivy_debug_log_loop: + sub rsp, 1 + mov rbx, rax + and rbx, 0xF + add rbx, '0' + mov [rsp], bl + cmp rbx, '9' + jle ivy_debug_skip + add rbx, 'A' - '0' - 10 + mov [rsp], bl +ivy_debug_skip: + shr rax, 4 + add rdx, 1 + cmp rdx, 16 + jl ivy_debug_log_loop + + mov rdi, STDOUT + mov rsi, rsp + mov rdx, 17 + call _write + add rsp, 17 + pop rax + ret diff --git a/docs/object-layout.md b/docs/object-layout.md new file mode 100644 index 0000000..617f70d --- /dev/null +++ b/docs/object-layout.md @@ -0,0 +1,60 @@ +# Object Layouts + + +All heap objects have a 32-bit reference count at [obj + 4]. +Objects are returned at +1. + + +Integers: +A 63-bit signed integer, shifted left 1, with the lsb set. +For example, 1 is stored as 0b11, 2 as 0b101, etc. +Can also be stored as an aligned pointer to an object. + +TODO: support big ints with this layout. + +[u8:tag=1] +[u24:pad] +[u32:refcount] +[i64:value] + + +Closures: +A pointer to the closure entry. +Must be 8-byte aligned. + +00:[u8:tag=0] +01:[u8:pad] +02:[u16:upvars] +04:[u32:refcount] +08:[u64:fn ptr] +10:[u16:params] +12:[u16:filled params] +14:[u32:pad] +18:[params...] +??:[fields...] + + +Indirect Closures: +When a closure has lots of data and is frequently copied, it's beneficial to use an indirect closure that contains a pointer to the fields block so that only the params has to be copied. +These are unimplemented for the time being. + +00:[u8:tag=2] +01:[u8:pad] +02:[u16:upvars] +04:[u32:refcount] +08:[u64:fn ptr] +10:[u16:params] +12:[u16:filled params] +14:[u32:pad] +18:[params...] +??:[u64:fields ptr] + + +fields block: +00:[u16:pad] +02:[u16:field count] +04:[u32:refcount] +08:[fields...] + + +When a closure is destroyed, it iterates over all of its fields and performs a decref on them. -- 2.47.0