diff options
author | Nathan Ringo <nathan@remexre.com> | 2024-11-18 00:34:23 -0600 |
---|---|---|
committer | Nathan Ringo <nathan@remexre.com> | 2024-11-18 00:34:23 -0600 |
commit | 943a6597b2bcd1b3ed208458a5cba61ad5b4051c (patch) | |
tree | d0acf34996941417aca241f5f01e399aaa90af39 | |
parent | 57331ba9756df043b5c665aa4952a0a7b38799e5 (diff) |
...
-rw-r--r-- | README.txt | 218 | ||||
-rw-r--r-- | misc/leaf-classes.lisp | 27 | ||||
-rw-r--r-- | src/Makefile | 6 | ||||
-rw-r--r-- | src/gc.h | 14 | ||||
-rw-r--r-- | src/gc/gc-debug.c | 0 | ||||
-rw-r--r-- | src/gc/gc.c | 99 | ||||
-rw-r--r-- | src/main.c | 5 | ||||
-rw-r--r-- | src/platform.h | 13 | ||||
-rw-r--r-- | src/platform/3ds.c | 33 | ||||
-rw-r--r-- | src/util.c | 37 | ||||
-rw-r--r-- | src/value.c | 111 | ||||
-rw-r--r-- | src/value.h | 12 |
12 files changed, 508 insertions, 67 deletions
diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..0221931 --- /dev/null +++ b/README.txt @@ -0,0 +1,218 @@ +╭──────────────────────────────────────────────────────────────────────────────╮ +│ imb3: A programming language that combines the best parts of Common Lisp and │ +│ Forth to make a very high-level language with easy metaprogramming. │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ The "guts" of the language resemble Common Lisp (e.g. a CLOS-like object │ +│ system, a condition system, a high-performance garbage collector), while the │ +│ surface syntax is all Forth. There is also a type checker for a type system │ +│ that allows dynamically-typed code to exist while also allowing fairly │ +│ complex properties to be checked at compile-time if desired. The cherry on │ +│ top is an efficient native compiler with the REPL interface users of both │ +│ Common Lisp and Forth have come to expect. │ +├──────────────────────────────────────────────────────────────────────────────┤ +│ This initial implementation is designed to be recompilable for a variety of │ +│ platforms, including the Nintendo 3DS. │ +╰──────────────────────────────────────────────────────────────────────────────╯ + +╭──────────────────────────────────────────────────────────────────────────────╮ +│ Garbage Collector │ +╞══════════════════════════════════════════════════════════════════════════════╡ +│ Currently, the garbage collector is a generational garbage collector with │ +│ three generations -- the “nursery,” the “young-heap,” and the “old-heap.” │ +│ Each one uses a different garbage collection algorithm. │ +│ │ +│ The nursery is the region almost all new objects are allocated into. There │ +│ are two exceptions: │ +│ │ +│ 1. Objects that are too large to fit in the nursery. │ +│ 2. Objects with a finalizer. Note that finalizers are not user-exposed, to │ +│ avoid issues with object resurrection -- they are currently only used │ +│ to allow allocating executable code, using a custom allocator with a │ +│ malloc/free -style interface. │ +│ │ +│ These objects are instead allocated into the young-heap if they could fit │ +│ there, and the old-heap if not. │ +│ │ +│ The nursery’s size is chosen when the process starts, and is equal to the │ +│ size of the largest L1d cache. When the nursery fills up, Cheney’s algorithm │ +│ is run to copy any live objects into the young-heap. Because Cheney’s │ +│ algorithm does not traverse dead objects, objects with finalizers must be │ +│ allocated in (and, more importantly, freed from) another region. │ +│ │ +│ As the data in the nursery grows up, the addresses ┌──────────────┐ 0x…8000 │ +│ of slots that have had pointers written into them │remembered set│ │ +│ are written to the top of its region, and the end ├──────────────┤ 0x…7000 │ +│ of the allocatable region grows down. (This is the │ free │ │ +│ only write barrier the collector needs. Notably, │ space │ │ +│ this avoids branches other than “are we out of ├──────────────┤ 0x…5000 │ +│ space for the remembered set (in which case we need │ │ │ +│ to perform a nursery GC).”) │ │ │ +│ │ objects │ │ +│ When a nursery GC occurs, its remembered set is │ │ │ +│ checked for references into the nursery. These are │ │ │ +│ used as roots. References to other regions are └──────────────┘ 0x…0000 │ +│ copied into the remembered set of the young-heap, The Nursery Region’s │ +│ and non-references are discarded. This discarding Layout │ +│ behavior removes another branch -- whenever a value │ +│ not statically known to be an integer is written, │ +│ we can unconditionally add the place it was written to into the remembered │ +│ set. │ +│ │ +│ The young-heap is a region that is larger than the nursery, but small enough │ +│ that collections should still be fast. It is sized to be a fixed multiple of │ +│ the amount of L3 cache in the system (currently, 2). It is collected with a │ +│ mark-compact algorithm based on the Lisp 2 algorithm, which makes finalizers │ +│ essentially free. To ensure that there is always room to complete a nursery │ +│ GC, we reserve a portion of the young-heap equal in size to the nursery, and │ +│ collect the young-heap once there is not enough free memory to allocate │ +│ without cutting into that reservation. │ +│ ╭──────╥────────────────────────────────╮ │ +│ Objects in the young-heap remain │ TODO ║ Is there a selection algorithm │ │ +│ there for a tunable number of GC ╞══════╝ we could use to make a dynamic │ │ +│ cycles (currently defaulting to 3) │ choice for how long objects remain in │ │ +│ before being tenured -- that is, │ the young-heap before being tenured? │ │ +│ moved to the old-heap. ╰───────────────────────────────────────╯ │ +│ │ +│ The young-heap has its own remembered set, whose entries are copied in from │ +│ the nursery’s remembered set. To ensure that these entries are up to date, │ +│ a young-heap GC can only occur when the nursery is empty. After a young-heap │ +│ GC, we do not need to copy the remembered set into the old-heap, because all │ +│ remaining references will stay internal to the old-heap. │ +│ │ +│ Finally, the old-heap stores objects that have outlived their time in the │ +│ other regions. It is organized into several blocks, each of which belongs to │ +│ a size class. New blocks are allocated and freed as needed, so the old-heap │ +│ dynamically grows and shrinks. Old-heap collections occur when the old-heap │ +│ is using twice as much memory as it was the last time a collection completed │ +│ (capped by a memory limit, by default the size of RAM on the entire system). │ +│ │ +│ The old-heap is collected using mark-sweep with bitmap marking, including a │ +│ per-block bit which allows entire blocks to be swept at once if they contain │ +│ only dead objects without finalizers. (A separate bit is maintained when │ +│ objects are allocated and when sweeping, to track whether a block contains │ +│ objects with finalizers or not.) │ +│ │ +│ As the young-heap requires the nursery to be empty before it is collected, │ +│ so too does the old-heap require both the nursery and young-heap to be empty │ +│ before it can be collected. Because collecting the old-heap tends to be so │ +│ slow relative to the nursery and young-heap, this does not add significant │ +│ overhead. │ +│ │ +│ If 5 consecutive old-heap collections recovers less than 2% of heap memory │ +│ needed while the heap is at its maximum size, the process exits with an │ +│ error. This behavior and these constants are taken from OpenJDK, and are an │ +│ attempt to avoid GC thrashing. │ +├──────────────────────────────────────────────────────────────────────────────┤ +│ Thread-based parallelism is not yet implemented. However, parallelizing this │ +│ collector design is relatively straightforward. Each thread should be pinned │ +│ to a hardware core. Threads can then get a per-thread nursery sized to their │ +│ core’s L1d cache, rather than a system-wide maximum. │ +│ │ +│ The young-heap and old-heap are each protected with a mutex, which prevents │ +│ multiple calls to allocate memory from interfering with one another. │ +│ Unfortunately, this also means that nursery collections cannot occur │ +│ concurrently. It is an open question whether this is an issue in practice as │ +│ mutators and nursery collections execute, but it becomes an issue during │ +│ young-heap collections. │ +│ │ +│ To mitigate this issue, starting a young-heap collection does not signal the │ +│ other threads immediately. Instead, each thread is responsible for notifying │ +│ the next thread just before it completes collection, unless another thread │ +│ is already waiting to collect. This maximizes the amount of time threads │ +│ spend mutating instead of waiting to perform a nursery collection. │ +│ │ +│ Once all the nursery collections have completed in serial, the marking phase │ +│ can be performed in parallel with work-stealing. Of the Lisp 2 algorithm’s │ +│ passes, only the “update object fields to refer to the new location” pass is │ +│ obviously parallelizable. Finalizers are also called in this pass, to allow │ +│ them to be executed in parallel as well. (Recall that finalizers are always │ +│ provided by the runtime, so they can be trusted to be thread-safe.) │ +│ │ +│ Finally, the old-heap can easily be both marked and swept in parallel. Each │ +│ block is conceptually owned by a thread, and that thread is responsible for │ +│ sweeping it. │ +├──────────────────────────────────────────────────────────────────────────────┤ +│ There are some experiments it’d be nice to run with this collector: │ +│ │ +│ 1. Right now, the write barrier design is intended to avoid branches as │ +│ much as possible (without relying on page faults). Instead, extra │ +│ nursery collections will be performed due to the remembered set being │ +│ larger than it needs to be. │ +│ │ +│ i. Are the branches really that expensive? In particular, does it end │ +│ up being a net win to add a branch on: │ +│ │ +│ a. Whether the written value is a fixnum at runtime or not. │ +│ │ +│ b. Whether the slot being written to is in a non-nursery object. │ +│ │ +│ c. Whether the value being written is a pointer to a non-old-heap │ +│ object. │ +│ │ +│ d. Whether the value being written is a pointer to an object in a │ +│ strictly younger region than the slot it’s being written to. │ +│ │ +│ ii. Is the branch on whether the nursery is full really that cheap? │ +│ Does it end up being a net win to use a guard page to store the │ +│ nursery separately from the remembered set, and use guard pages to │ +│ fault when each one is full? │ +│ │ +│ 2. The constants listed above were chosen by a combination of “that sounds │ +│ about right,” “Go does that,” and “OpenJDK does that.” It’s plausible │ +│ that instead, good defaults could be learned with a genetic algorithm │ +│ that tunes the constants in a much more general formula, e.g. │ +│ │ +│ nursery size = k₁ + k₂ × L1d size + k₃ × L2 size + k₄ × L3 size + … │ +│ │ +│ It would be useful to try learning good defaults on a wide variety of │ +│ machines and workloads, and seeing if natural constants “fall out.” │ +│ (Note that this means running a single learning process whose loss │ +│ function depends on the performance of the formula across all the │ +│ machines.) │ +│ │ +│ This could also be used alongside any “boolean-valued” experiments, to │ +│ learn whether they’re useful and how they affect the constants. │ +│ │ +│ Finally, this could plausibly even be a useful tool to ship to users │ +│ of the compiler, so they can make tune GC constants for release builds │ +│ of their own software. │ +│ │ +│ 3. The parallel variant pins threads to CPUs because many modern machines │ +│ have heterogenous core clusters, so different CPUs will have different │ +│ L1d sizes. However, this is only likely to have reasonable performance │ +│ with a work-stealing green threading runtime in userspace, and even in │ +│ that case may have worse mutator performance than not pinning threads. │ +│ │ +│ Does pinning and having optimally-cache-sized nurseries really improve │ +│ performance that much versus uniformly sizing the nurseries and not │ +│ pinning? │ +│ │ +│ 4. Parallelizing the “update object fields to refer to the new location” │ +│ pass of the young-heap collection requires the “assign forwarding │ +│ pointers to objects” pass to estimate which parts of the young-heap are │ +│ assigned to which thread, since they must be aligned to the boundaries │ +│ between objects. │ +│ │ +│ In the above design, the passes are serialized, such that only after │ +│ all threads have been assigned to parts of the region do any of them │ +│ start updating object fields. Instead, we could imagine giving threads │ +│ non-uniformly-sized parts of the region, and having them start as soon │ +│ as they are assigned. │ +│ │ +│ Threads could also start the “shift all objects down” pass as soon as │ +│ all threads before them have finished the “update object fields” pass. │ +│ │ +│ i. Does interleaving the first two passes improve performance? │ +│ │ +│ ii. Should threads be given (approximately) equally-sized portions of │ +│ the region, or should they follow some curve, so threads that start │ +│ earlier are given more work to do? │ +│ │ +│ This curve’s parameters should probably be learned as above. │ +│ │ +│ iii. Does interleaving the last two passes improve performance? This │ +│ means shifting objects down while some (later) objects’ fields are │ +│ still being updated. │ +│ │ +│ iv. Does interleaving all three passes improve performance? │ +╰──────────────────────────────────────────────────────────────────────────────╯ diff --git a/misc/leaf-classes.lisp b/misc/leaf-classes.lisp new file mode 100644 index 0000000..711890f --- /dev/null +++ b/misc/leaf-classes.lisp @@ -0,0 +1,27 @@ +(in-package :cl-user) + +(defun class-in-cl-package (class) + (eql (symbol-package (class-name class)) (find-package 'cl))) + +(defun class-has-no-standard-subclasses (class) + (notany #'class-in-cl-package (sb-mop:class-direct-subclasses class))) + +(let ((queue nil) + (set nil)) + (do-external-symbols (sym 'cl) + (let ((class (find-class sym nil))) + (when class + (push class queue)))) + (loop + while queue + do (let ((class (pop queue))) + (cond + ((class-has-no-standard-subclasses class) + (push class set))))) + (flet ((name (class) + (symbol-name (class-name class)))) + (setf set (stable-sort set #'string-lessp :key #'name)) + (setf set (remove-duplicates set :key #'class-name))) + (loop + for class in set + do (format t "~a~%" class))) diff --git a/src/Makefile b/src/Makefile index 432b331..07f1cb5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -12,7 +12,7 @@ LDFLAGS = CC = $(DEVKITPRO)/devkitARM/bin/arm-none-eabi-gcc -CFLAGS_AUTO = -D__3DS__ -ffunction-sections -flto -g -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft -mword-relocations -Og -std=c11 -Wall -Werror=implicit-function-declaration +CFLAGS_AUTO = -D__3DS__ -ffunction-sections -flto -g -march=armv6k -mtune=mpcore -mfloat-abi=hard -mtp=soft -mword-relocations -O3 -std=c11 -Wall -Werror=implicit-function-declaration LDFLAGS_AUTO = -L$(DEVKITPRO)/libctru/lib -specs=3dsx.specs LDLIBS_AUTO = -lctru -lm @@ -38,9 +38,7 @@ all: imb3.3dsx imb3.3dsx: imb3.elf 3dsxtool $< $@ -imb3.elf: $(OBJS) obj/gc/gc.o - $(CC) $(CFLAGS_ALL) $(LDFLAGS_ALL) -o $@ $^ $(LDLIBS_ALL) -imb3-debug.elf: $(OBJS) obj/gc/gc-debug.o +imb3.elf: $(OBJS) obj/gc/gc.o obj/platform/3ds.o $(CC) $(CFLAGS_ALL) $(LDFLAGS_ALL) -o $@ $^ $(LDLIBS_ALL) obj/%.o: $(srcdir)/%.c @@ -2,15 +2,21 @@ #define IMB3_GC_H #include "value.h" -#include <stddef.h> void gc_init(void); void gc_collect(void); struct object; -struct value gc_read_value_slot(struct object *, size_t); -uintptr_t gc_read_untraced_slot(struct object *, size_t); -uint8_t gc_read_untraced_byte(struct object *, size_t); + +struct object *gc_alloc(size_t value_slot_count, size_t untraced_slot_count); +struct object *gc_alloc_compiled_function(size_t value_slot_count, + size_t untraced_slot_count); +struct object *gc_alloc_hashtable_eq(size_t value_slot_count, + size_t untraced_slot_count); + +struct value gc_read_value_slot(const struct object *, size_t); +uintptr_t gc_read_untraced_slot(const struct object *, size_t); +uint8_t gc_read_untraced_byte(const struct object *, size_t); void gc_write_value_slot(struct object *, size_t, struct value); void gc_write_untraced_slot(struct object *, size_t, uintptr_t); void gc_write_untraced_byte(struct object *, size_t, uint8_t); diff --git a/src/gc/gc-debug.c b/src/gc/gc-debug.c deleted file mode 100644 index e69de29..0000000 --- a/src/gc/gc-debug.c +++ /dev/null diff --git a/src/gc/gc.c b/src/gc/gc.c index f53fd79..1c0daec 100644 --- a/src/gc/gc.c +++ b/src/gc/gc.c @@ -1,18 +1,12 @@ +#include "../gc.h" +#include "../platform.h" #include "../util.h" #include <stdbool.h> #include <stddef.h> -#include <stdio.h> +#include <stdint.h> #include <stdlib.h> /** - * These cache-size details are from - * https://www.copetti.org/writings/consoles/nintendo-3ds/. We call the second - * level of cache (only available on N3DS) L3, since this GC design only cares - * about L1D and L3. - */ -static const size_t L1D_SIZE = 16 * 1024, L3_SIZE = 2 * 1024 * 1024; - -/** * Constants that should probably be learned. */ static const size_t NURSERY_L1D_SIZE_FACTOR = 1; @@ -43,19 +37,19 @@ enum object_kind { OBJECT_UNTRACED, /** + * A wrapper for a pointer to a code address. When this object gets collected, + * calls `free_code` on its first untraced slot. + */ + OBJECT_COMPILED_FUNCTION, + + /** * A hashtable keyed by the addresses. This behaves like a normal object, but * the first untraced slot gets set to the `uintptr_t` 1 whenever a collection * traces the hashtable. * * The hashtable uses this information to rehash after each collection. */ - OBJECT_EQ_HASHTABLE, - - /** - * A wrapper for a pointer to a code address. When this object gets collected, - * calls `free_code` on its first untraced slot. - */ - OBJECT_COMPILED_FUNCTION, + OBJECT_HASHTABLE_EQ, }; /** @@ -72,11 +66,11 @@ struct object_size { /** * Returns the number of slots for values the object has. */ -static size_t object_value_slots(struct object_size size) { +static size_t object_size_value_slots(struct object_size size) { switch (size.kind) { case OBJECT_NORMAL: - case OBJECT_EQ_HASHTABLE: case OBJECT_COMPILED_FUNCTION: + case OBJECT_HASHTABLE_EQ: return size.bits >> ((4 * sizeof(size_t)) - 3); case OBJECT_UNTRACED: return 0; @@ -86,11 +80,11 @@ static size_t object_value_slots(struct object_size size) { /** * Returns the number of untraced slots the object has. */ -static size_t object_untraced_slots(struct object_size size) { +static size_t object_size_untraced_slots(struct object_size size) { switch (size.kind) { case OBJECT_NORMAL: - case OBJECT_EQ_HASHTABLE: case OBJECT_COMPILED_FUNCTION: + case OBJECT_HASHTABLE_EQ: return size.bits & ((1 << ((4 * sizeof(size_t)) - 3)) - 1); case OBJECT_UNTRACED: return size.bits; @@ -106,6 +100,20 @@ struct object { }; /** + * Returns the number of slots for values the object has. + */ +static size_t object_value_slots(const struct object *obj) { + return object_size_value_slots(((struct object_size *)obj)[-1]); +} + +/** + * Returns the number of untraced slots the object has. + */ +static size_t object_untraced_slots(const struct object *obj) { + return object_size_untraced_slots(((struct object_size *)obj)[-1]); +} + +/** * This is the layout of an object in the nursery. */ struct nursery_object { @@ -120,8 +128,8 @@ struct nursery_object { * with one. */ struct young_heap_object { - struct object_size size; size_t fwd; + struct object_size size; uintptr_t slots[]; }; @@ -199,10 +207,10 @@ static struct old_heap_block old_heap_sentinels[SIZE_CLASS_COUNT]; void gc_init(void) { // Allocate the nursery and young-heap. nursery_bottom = nursery_next = - (uintptr_t)malloc(L1D_SIZE * NURSERY_L1D_SIZE_FACTOR); + (uintptr_t)malloc(get_l1d_size() * NURSERY_L1D_SIZE_FACTOR); assume(nursery_bottom); young_heap_bottom = young_heap_next = - (uintptr_t)malloc(L3_SIZE * YOUNG_HEAP_L3_SIZE_FACTOR); + (uintptr_t)malloc(get_l3_size() * YOUNG_HEAP_L3_SIZE_FACTOR); assume(young_heap_bottom); // Self-link the old-heap. @@ -211,3 +219,48 @@ void gc_init(void) { &old_heap_sentinels[i]; } } + +struct object *gc_alloc(size_t value_slot_count, size_t untraced_slot_count) { + todo("gc_alloc"); +} + +struct object *gc_alloc_compiled_function(size_t value_slot_count, + size_t untraced_slot_count) { + todo("gc_alloc_compiled_function"); +} + +struct object *gc_alloc_hashtable_eq(size_t value_slot_count, + size_t untraced_slot_count) { + todo("gc_alloc_hashtable_eq"); +} + +struct value gc_read_value_slot(const struct object *obj, size_t slot_index) { + assume(slot_index < object_value_slots(obj)); + todo("gc_read_value_slot"); +} + +uintptr_t gc_read_untraced_slot(const struct object *obj, size_t slot_index) { + assume(slot_index < object_untraced_slots(obj)); + todo("gc_read_untraced_slot"); +} + +uint8_t gc_read_untraced_byte(const struct object *obj, size_t byte_index) { + todo("gc_read_untraced_byte"); +} + +void gc_write_value_slot(struct object *obj, size_t slot_index, + struct value value) { + assume(slot_index < object_value_slots(obj)); + todo("gc_write_value_slot"); +} + +void gc_write_untraced_slot(struct object *obj, size_t slot_index, + uintptr_t value) { + assume(slot_index < object_untraced_slots(obj)); + todo("gc_write_untraced_slot"); +} + +void gc_write_untraced_byte(struct object *obj, size_t byte_index, + uint8_t byte) { + todo("gc_write_untraced_byte"); +} @@ -7,6 +7,9 @@ int main(int argc, char **argv) { gfxInit(GSP_BGR8_OES, GSP_BGR8_OES, false); + gc_init(); + bootstrap(); + // Initialize console on top screen. Using NULL as the second argument tells // the console library to use the internal console structure as current one consoleInit(GFX_TOP, NULL); @@ -20,8 +23,6 @@ int main(int argc, char **argv) { printf("\x1b[29;16HPress Start to exit.\n"); - gc_init(); - // Main loop while (aptMainLoop()) { // Scan all the inputs. This should be done once for each frame diff --git a/src/platform.h b/src/platform.h new file mode 100644 index 0000000..2dbfd26 --- /dev/null +++ b/src/platform.h @@ -0,0 +1,13 @@ +#ifndef IMB3_PLATFORM_H +#define IMB3_PLATFORM_H + +#include <stddef.h> +#include <stdnoreturn.h> + +size_t get_l1d_size(void); +size_t get_l3_size(void); + +void panic_begin(void); +noreturn void panic_end(void); + +#endif // IMB3_PLATFORM_H diff --git a/src/platform/3ds.c b/src/platform/3ds.c new file mode 100644 index 0000000..888baf7 --- /dev/null +++ b/src/platform/3ds.c @@ -0,0 +1,33 @@ +#include <3ds.h> + +#include "../platform.h" +#include <stdio.h> +#include <stdlib.h> + +/** + * These cache-size details are from + * https://www.copetti.org/writings/consoles/nintendo-3ds/. We call the second + * level of cache (only available on N3DS) L3, since the GC design only cares + * about L1D and L3. + */ +size_t get_l1d_size(void) { return 16 * 1024; } +size_t get_l3_size(void) { return 2 * 1024 * 1024; } + +void panic_begin(void) { consoleInit(GFX_TOP, NULL); } + +noreturn void panic_end(void) { + printf("\nPress Start to exit.\n"); + + while (aptMainLoop()) { + hidScanInput(); + u32 keys = hidKeysDown(); + if (keys & KEY_START) + break; + gfxFlushBuffers(); + gfxSwapBuffers(); + gspWaitForVBlank(); + } + + gfxExit(); + exit(1); +} @@ -1,43 +1,12 @@ #include "util.h" +#include "platform.h" #include <stdarg.h> #include <stdio.h> -#include <stdlib.h> - -#ifdef __3DS__ - -#include <3ds.h> - -static void panic_begin(void) { consoleInit(GFX_TOP, NULL); } - -static noreturn void panic(void) { - printf("\nPress Start to exit.\n"); - - while (aptMainLoop()) { - hidScanInput(); - u32 keys = hidKeysDown(); - if (keys & KEY_START) - break; - gfxFlushBuffers(); - gfxSwapBuffers(); - gspWaitForVBlank(); - } - - gfxExit(); - exit(1); -} - -#else - -static void panic_begin(void) {} - -static noreturn void panic(void) { abort(); } - -#endif noreturn void assume__failed(const char *file, int line, const char *expr) { panic_begin(); fprintf(stderr, "%s:%d: assertion failed: %s\n", file, line, expr); - panic(); + panic_end(); } noreturn void todo__impl(const char *file, int line, const char *fmt, ...) { @@ -47,5 +16,5 @@ noreturn void todo__impl(const char *file, int line, const char *fmt, ...) { va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); - panic(); + panic_end(); } diff --git a/src/value.c b/src/value.c index 66d388c..07af689 100644 --- a/src/value.c +++ b/src/value.c @@ -1 +1,112 @@ #include "value.h" +#include "gc.h" + +enum builtin { + BUILTIN_BUILTIN_CLASS_BIGNUM, + BUILTIN_BUILTIN_CLASS_BUILTIN_CLASS, + BUILTIN_BUILTIN_CLASS_BUILTIN_FUNCTION, + BUILTIN_BUILTIN_CLASS_BUILTIN_OBJECT, + BUILTIN_BUILTIN_CLASS_CHARACTER, + BUILTIN_BUILTIN_CLASS_COMPILED_FUNCTION, + BUILTIN_BUILTIN_CLASS_CONS, + BUILTIN_BUILTIN_CLASS_DIRECT_SLOT_DEFINITION, + BUILTIN_BUILTIN_CLASS_EFFECTIVE_SLOT_DEFINITION, + BUILTIN_BUILTIN_CLASS_FIXNUM, + BUILTIN_BUILTIN_CLASS_GENERIC_FUNCTION, + BUILTIN_BUILTIN_CLASS_HASHTABLE_EQ, + BUILTIN_BUILTIN_CLASS_HASHTABLE_EQUAL, + BUILTIN_BUILTIN_CLASS_METHOD, + BUILTIN_BUILTIN_CLASS_NULL, + BUILTIN_BUILTIN_CLASS_PACKAGE, + BUILTIN_BUILTIN_CLASS_STANDARD_CLASS, + BUILTIN_BUILTIN_CLASS_STANDARD_OBJECT, + BUILTIN_BUILTIN_CLASS_STRING, + BUILTIN_BUILTIN_CLASS_SYMBOL, + BUILTIN_BUILTIN_CLASS_THREADED_FUNCTION, + BUILTIN_BUILTIN_CLASS_VECTOR_VALUE_ADJ_FILL, + BUILTIN_BUILTIN_CLASS_VECTOR_VALUE_NOADJ_FILL, + BUILTIN_BUILTIN_CLASS_VECTOR_VALUE_NOADJ_NOFILL, + + BUILTIN_FUNCTION_CLASS_DIRECT_SLOTS, + BUILTIN_FUNCTION_CLASS_DIRECT_SUBCLASSES, + BUILTIN_FUNCTION_CLASS_DIRECT_SUPERCLASSES, + BUILTIN_FUNCTION_CLASS_NAME, + BUILTIN_FUNCTION_CLASS_PRECEDENCE_LIST, + BUILTIN_FUNCTION_CLASS_SLOTS, + BUILTIN_FUNCTION_EQ, + BUILTIN_FUNCTION_EQUAL, + BUILTIN_FUNCTION_GETHASH, + BUILTIN_FUNCTION_GETSLOT, + BUILTIN_FUNCTION_SETHASH, + BUILTIN_FUNCTION_SETSLOT, + BUILTIN_FUNCTION_SLOT_LOCATION, + BUILTIN_FUNCTION_SLOT_NAME, + BUILTIN_FUNCTION_SLOT_TYPE, + BUILTIN_FUNCTION_VECTOR_GET, + BUILTIN_FUNCTION_VECTOR_PUSH, + BUILTIN_FUNCTION_VECTOR_PUSH_EXTEND, + BUILTIN_FUNCTION_VECTOR_SET, + + BUILTIN_VARIABLE_DSTACK, + BUILTIN_VARIABLE_RSTACK, + + BUILTINS_COUNT, +}; + +static struct value builtins[BUILTINS_COUNT]; + +enum builtin_class_slot_index { + BUILTIN_CLASS__NAME, + BUILTIN_CLASS__FINALIZED, + BUILTIN_CLASS__DIRECT_SUPERCLASSES, + BUILTIN_CLASS__DIRECT_SUBCLASSES, + BUILTIN_CLASS__PRECEDENCE_LIST, + + BUILTIN_CLASS_SLOT_COUNT, +}; + +enum standard_class_slot_index { + STANDARD_CLASS__NAME, + STANDARD_CLASS__FINALIZED, + STANDARD_CLASS__DIRECT_SUPERCLASSES, + STANDARD_CLASS__DIRECT_SUBCLASSES, + STANDARD_CLASS__PRECEDENCE_LIST, + STANDARD_CLASS__DIRECT_SLOTS, + STANDARD_CLASS__SLOTS, + + STANDARD_CLASS_SLOT_COUNT, +}; + +static void push(struct value value) {} + +static struct value pop(void) { return (struct value){.bits = 0}; } + +struct value alloc_builtin_object(struct value class, size_t value_slot_count, + size_t untraced_slot_count) { + struct object *obj; + if (class.bits == builtins[BUILTIN_BUILTIN_CLASS_COMPILED_FUNCTION].bits) + obj = gc_alloc_compiled_function(value_slot_count, untraced_slot_count); + else if (class.bits == builtins[BUILTIN_BUILTIN_CLASS_HASHTABLE_EQ].bits) + obj = gc_alloc_hashtable_eq(value_slot_count, untraced_slot_count); + else + obj = gc_alloc(value_slot_count, untraced_slot_count); + gc_write_value_slot(obj, 0, class); + return (struct value){ + .bits = (uintptr_t)obj | TAG_BUILTIN_OBJECT, + }; +} + +void bootstrap(void) { + // Since it is its own class, standard-class needs to be constructed + // manually. + { + struct object *obj = gc_alloc(STANDARD_CLASS_SLOT_COUNT + 1, 0); + builtins[BUILTIN_BUILTIN_CLASS_STANDARD_CLASS] = (struct value){ + .bits = (uintptr_t)obj | TAG_BUILTIN_OBJECT, + }; + gc_write_value_slot(obj, 0, builtins[BUILTIN_BUILTIN_CLASS_STANDARD_CLASS]); + // TODO + } + + // +} diff --git a/src/value.h b/src/value.h index a932514..3795486 100644 --- a/src/value.h +++ b/src/value.h @@ -1,6 +1,7 @@ #ifndef IMB3_VALUE_H #define IMB3_VALUE_H +#include <stddef.h> #include <stdint.h> struct value { @@ -35,4 +36,15 @@ enum value_tag { TAG_STANDARD_OBJECT = 0b110, }; +/** + * Allocates a builtin-object with the given class and slot count. + */ +struct value make_builtin_object(struct value class, size_t value_slot_count, + size_t untraced_slot_count); + +/** + * Bootstraps the class heirarchy. This should only be called once. + */ +void bootstrap(void); + #endif // IMB3_VALUE_H |