//! A general-purpose allocator, based on the design of mimalloc [0, 1]. The design is summarized //! below. //! //! Objects are separated into four classes, by size: //! //! - Small objects are less than or equal to 1KiB in size. Small objects get access to a special //! fast-path for allocation that avoids performing a `.next_power_of_two()` on their size. This //! fast-path is designed to be inlined. They get stored in 64KiB pages, which are stored in 4MiB //! segments. //! - Medium objects are greater than 1KiB in size, but less than or equal to 8KiB. Medium objects //! have another fast-path, but it isn't inlinable and performs a `.next_power_of_two()`. Like //! small objects, they get stored in 64KiB pages, which are stored in 4MiB segments. //! - Large objects are greater than 8KiB in size, but less than or equal to 512KiB. Their pages //! span the entire 4MiB of a segment. //! - Huge objects are greater than 512KiB in size. They get a custom segment size with a single //! page, sized to fit the object. //! //! This design has the property that the first address of an object will be less than 4MiB from //! the start of the segment. By aligning segments to 4MiB, this lets us cheaply get a pointer to //! the segment from a pointer to the object. //! //! [0]: https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action/ //! [1]: https://github.com/microsoft/mimalloc #![no_std] use allocator_api2::alloc::{AllocError, Allocator, Layout}; use contracts::requires; use core::{ cell::RefCell, marker::PhantomData, mem::MaybeUninit, ptr::{addr_of_mut, null_mut, slice_from_raw_parts_mut, NonNull}, sync::atomic::{AtomicPtr, AtomicU16, Ordering}, }; use sptr::invalid_mut; use static_assertions::const_assert_eq; /// The per-CPU structure. This uses a `RefCell` for interior mutability. #[derive(Debug)] pub struct Heap(RefCell>); #[derive(Debug)] struct HeapInner { /// Small objects (<=1024 bytes) have pointers directly to the page they allocate into. These /// are non-owning pointers. pages_direct: [NonNull; LARGEST_SMALL_OBJECT >> 3], /// The sentinel nodes for the circular doubly-linked lists that compose the size classes, up /// to and including 512KiB, plus one more for objects over 512KiB (huge objects). pages: [PageLink; LARGEST_LARGE_OBJECT_SIZE_CLASS + 2], /// The sentinel node for segments that store small or medium objects that have free pages. small_medium_segments_free: SegmentLink, /// The sentinel node for segments that store small or medium objects that have no free pages. small_medium_segments_full: SegmentLink, /// The sentinel node for segments that store large or huge objects. large_huge_segments: SegmentLink, _phantom: PhantomData, } /// The log2 of the number of bytes in a segment (other than a segment created for a huge object). pub const NON_HUGE_SEGMENT_SIZE_BITS: usize = 22; /// The number of bytes in a segment (other than a segment created for a huge object). pub const NON_HUGE_SEGMENT_SIZE: usize = 4 << 20; const_assert_eq!(NON_HUGE_SEGMENT_SIZE, 1 << NON_HUGE_SEGMENT_SIZE_BITS); const SMALL_MEDIUM_PAGE_SIZE_BITS: u32 = 16; const SMALL_MEDIUM_PAGE_SIZE: usize = 1 << SMALL_MEDIUM_PAGE_SIZE_BITS; const SMALL_MEDIUM_PAGES_PER_SEGMENT: usize = 1 << (NON_HUGE_SEGMENT_SIZE_BITS - SMALL_MEDIUM_PAGE_SIZE_BITS as usize); const LARGEST_SMALL_OBJECT: usize = 1024; const LARGEST_MEDIUM_OBJECT: usize = 8 * 1024; const LARGEST_LARGE_OBJECT: usize = 512 * 1024; const LARGEST_SMALL_OBJECT_SIZE_CLASS: usize = 7; const LARGEST_MEDIUM_OBJECT_SIZE_CLASS: usize = 10; const LARGEST_LARGE_OBJECT_SIZE_CLASS: usize = 16; const_assert_eq!( size_class(LARGEST_SMALL_OBJECT), LARGEST_SMALL_OBJECT_SIZE_CLASS ); const_assert_eq!( size_class(LARGEST_MEDIUM_OBJECT), LARGEST_MEDIUM_OBJECT_SIZE_CLASS ); const_assert_eq!( size_class(LARGEST_LARGE_OBJECT), LARGEST_LARGE_OBJECT_SIZE_CLASS ); impl Heap { /// Initializes a `Heap`. After this, the `MaybeUninit` is initialized. pub fn init(maybe_uninit: &mut MaybeUninit>) { // Initialize the `RefCell` part. This... might not be sound. Hm. It also might blow the // stack in debug builds, defeating the point of this whole function... let refcell = unsafe { maybe_uninit .as_mut_ptr() .cast::>>>>() .as_mut() .unwrap_unchecked() .write(RefCell::new(MaybeUninit::uninit())) }; // Make accessors for the fields of the HeapInner. let inner_ptr = refcell.get_mut().as_mut_ptr(); let pages_direct_ptr = |i: usize| { // SAFETY: For the `maybe_uninit` to have been valid, this must be sound. unsafe { NonNull::new_unchecked(addr_of_mut!((*inner_ptr).pages_direct[i])) } }; let pages_ptr = |i: usize| { // SAFETY: For the `maybe_uninit` to have been valid, this must be sound. unsafe { NonNull::new_unchecked(addr_of_mut!((*inner_ptr).pages[i])) } }; // Make a helper to self-link the segment lists. // // SAFETY: See the call sites. let self_link_segments = |ptr: NonNull| unsafe { (*ptr.as_ptr()).prev = ptr; (*ptr.as_ptr()).next = ptr; }; // Initialize each field. for i in 0..128 { // SAFETY: The returned pointer is valid. unsafe { pages_direct_ptr(i).write(NonNull::from(&EMPTY_PAGE)) } } for i in 0..18 { let ptr = pages_ptr(i); // SAFETY: The returned pointer is valid. unsafe { ptr.write(PageLink { prev: ptr, next: ptr, }) } } // SAFETY: For the `maybe_uninit` to have been valid, getting these pointers must be sound. // Self-linking the pointers is also sound as a result. unsafe { self_link_segments(NonNull::new_unchecked(addr_of_mut!( (*inner_ptr).small_medium_segments_free ))); self_link_segments(NonNull::new_unchecked(addr_of_mut!( (*inner_ptr).small_medium_segments_full ))); self_link_segments(NonNull::new_unchecked(addr_of_mut!( (*inner_ptr).large_huge_segments ))); } } /// Donates a segment to be used for small or medium objects. /// /// # Safety /// /// - `ptr` must convey ownership over the entire region. /// - `thread_id` must be correct. pub unsafe fn donate_small_medium_segment(&self, ptr: NonNull<[u8; NON_HUGE_SEGMENT_SIZE]>) { self.0.borrow_mut().add_small_medium_segment(ptr) } } impl HeapInner { /// Initializes a segment for small or medium objects, adding it to this heap. unsafe fn add_small_medium_segment(&mut self, ptr: NonNull<[u8; NON_HUGE_SEGMENT_SIZE]>) { // Initialize the segment to be self-linked. let maybe_uninit: &mut MaybeUninit = ptr.cast().as_mut(); maybe_uninit.write(Segment { link: SegmentLink { prev: ptr.cast(), next: ptr.cast(), }, header: SegmentHeader { thread_id: OS::current_thread_id(), page_shift: SMALL_MEDIUM_PAGE_SIZE_BITS, }, }); // Add the segment to the list of segments with free pages. SegmentLink::add_next(NonNull::from(&self.small_medium_segments_free), ptr.cast()); } /// Allocates small blocks of naturally-aligned memory. #[requires(size > 0)] #[requires(size <= LARGEST_SMALL_OBJECT)] #[inline] fn alloc_small(&mut self, size: usize) -> Result, AllocError> { let size = (size + 7) & !0b111; let small_size_class = (size >> 3) - 1; let mut page = self.pages_direct[small_size_class]; let page = unsafe { page.as_mut() }; match page.header.alloc_list.as_mut() { Some(&mut block) => { let next = unsafe { block.as_ref() }.next.load(Ordering::SeqCst); page.header.alloc_list = NonNull::new(next); page.header.used.fetch_add(1, Ordering::SeqCst); // SAFETY: `block` is a `NonNull`, so the slice pointer will non-null. Ok(unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut(block.as_ptr().cast(), size)) }) } None => self.alloc_generic(size, size), } } /// The slow-path of allocation. Allocates a block of any size and alignment, and does some /// book-keeping tasks along the way. fn alloc_generic(&mut self, size: usize, align: usize) -> Result, AllocError> { /* let size_class = size_class(size); let sentinel: NonNull = NonNull::from(&self.pages[size_class]); let mut page = sentinel; loop { page = unsafe { (*page.as_ptr()).next }; todo!("{page:#?}") } */ todo!("{:p}.alloc_generic({size}, {align})", self) } } unsafe impl Allocator for Heap { #[inline] fn allocate(&self, layout: Layout) -> Result, AllocError> { // Special-case ZSTs. if layout.size() == 0 { return Ok(unsafe { NonNull::new_unchecked(slice_from_raw_parts_mut(invalid_mut(layout.align()), 0)) }); } // An objects with an alignment this big would break our invariants (namely, that we can // get to the address of the segment by aligning down the address of the object to a 4MiB // boundary). if layout.align() >= NON_HUGE_SEGMENT_SIZE { return Err(AllocError); } let mut inner = self.0.borrow_mut(); let size_if_small = layout.size().max(layout.align()); if size_if_small <= LARGEST_SMALL_OBJECT { // Fast path. inner.alloc_small(size_if_small) } else { // Slow path. inner.alloc_generic(layout.size(), layout.align()) } } #[inline] unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { // Special-case ZSTs. if layout.size() == 0 { return; } let self_ptr = self as *const Heap; todo!("{self_ptr:p}.deallocate({ptr:p}, {layout:?})") } } /// Returns the index in the `pages` array of objects with the given size. const fn size_class(size: usize) -> usize { if size < 8 { 0 } else if size > LARGEST_LARGE_OBJECT { 17 } else { (size.next_power_of_two().trailing_zeros() - 3) as usize } } #[test] fn expected_size_class_values() { assert_eq!(size_class(1), 0); assert_eq!(size_class(8), 0); let mut size = 8; let mut expected = 0; while size <= 512 * 1024 { assert_eq!(size_class(size), expected); size <<= 1; expected += 1; } assert_eq!(size_class(512 * 1024), 16); assert_eq!(size_class((512 * 1024) + 1), 17); assert_eq!(size_class(4 * 1024 * 1024), 17); } /// A segment is a container for pages, to amortize some of the overhead of allocating new pages. #[repr(C)] struct Segment { /// The intrusive link in the circular doubly-linked list of segments . link: SegmentLink, /// The fields other than the intrusive link in a segment's header. header: SegmentHeader, } /// The intrusive link in the circular doubly-linked list of pages of each size class. #[derive(Debug)] struct SegmentLink { /// The previous page. prev: NonNull, /// The next page. next: NonNull, } impl SegmentLink { #[requires(SegmentLink::self_linked(item))] pub unsafe fn add_next(list: NonNull, item: NonNull) { let next = (*list.as_ptr()).next; (*item.as_ptr()).next = next; (*item.as_ptr()).prev = list; (*list.as_ptr()).next = item; (*next.as_ptr()).prev = item; } pub unsafe fn self_linked(ptr: NonNull) -> bool { let SegmentLink { prev, next } = *ptr.as_ptr(); ptr == prev && ptr == next } } /// The fields other than the intrusive link in a segment's header. struct SegmentHeader { /// The ID of the thread. thread_id: usize, /// The amount to right-shift the offset of an object from the start of the segment by to get /// its page index. page_shift: u32, } /// A page is a block of memory that contains objects of a single size class. #[repr(C)] struct Page { /// The intrusive link in the circular doubly-linked list of pages of each size class. link: PageLink, /// The fields other than the intrusive link in a page's header. header: PageHeader, } unsafe impl Send for Page {} unsafe impl Sync for Page {} /// A fake page that causes allocation to go into the "slow path," so that we can allocate an /// initial page of that size class. static EMPTY_PAGE: Page = unsafe { Page { link: PageLink { prev: NonNull::new_unchecked(&EMPTY_PAGE.link as *const PageLink as *mut PageLink), next: NonNull::new_unchecked(&EMPTY_PAGE.link as *const PageLink as *mut PageLink), }, header: PageHeader { alloc_list: None, local_free_list: None, thread_free_list: AtomicPtr::new(null_mut()), used: AtomicU16::new(0), thread_freed: AtomicU16::new(0), }, } }; /// The intrusive link in the circular doubly-linked list of pages of each size class. #[derive(Debug)] struct PageLink { /// The previous page. prev: NonNull, /// The next page. next: NonNull, } /// The fields other than the intrusive link in a page's header. struct PageHeader { /// The free list that the owning thread pops from. alloc_list: Option>, /// The free list that the owning thread pushes onto. local_free_list: Option>, /// The free list that other threads push onto. thread_free_list: AtomicPtr, /// The number of objects in the page that have been allocated. used: AtomicU16, /// The number of objects in the thread free list. thread_freed: AtomicU16, } /// An object in a free list. struct Block { next: AtomicPtr, } fn atomic_push(node: &mut Block, list: &AtomicPtr) { loop { let old_list = list.load(Ordering::Acquire); node.next = AtomicPtr::from(old_list); if list .compare_exchange(old_list, node, Ordering::Release, Ordering::Acquire) .is_ok() { break; } } } /// The OS services the allocator needs. /// /// # Safety /// /// All methods must be implemented according to their doc comments. pub unsafe trait OSServices { /// Returns the ID of the current thread. This can be arbitrary, so long as it is consistent /// within a thread and no two threads with the same thread ID run at the same time. fn current_thread_id() -> usize; }