//! A buddy allocator, used to allocate pages. //! //! The allocator can be split into three abstractions: stripes, trees, and the allocator. //! //! TODO: See if there's standard terminology for these. //! //! ## Stripes //! //! The buddy allocator works in terms of size classes, which are power-of-two sized, starting at a //! single page and going up from there. Each stripe corresponds to a single size class and a //! particular region of memory. //! //! A stripe contains a circular doubly-linked free-list for subregions of that size class, and a //! bitset marking whether a particular region has been allocated or not. Being a circular //! doubly-linked list makes it cheap to remove an element whose address we know, as well as cheap //! to push and pop elements. //! //! It's efficient to go from the address of a subregion to the index of its corresponding bit, so //! when we hand out a subregion from the free-list or put one back, it's cheap to read or write //! its bit. //! //! ## Trees //! //! A tree is logically a collection of stripes, one per size class. To pack the structures more //! efficiently, they are stored interleaved with each other, and the tree manages this. //! //! The important logic at the level of trees happens when we allocate a subregion from a size //! class whose stripe's free-list is empty, and when we free subregions. //! //! When a stripe's free-list is empty, the tree instead allocates a subregion of a larger size //! from the next stripe. It can then store the unused portion in the current size class. //! //! The really important bit is the ability to merge subregions when they're freed. When we free a //! subregion of a certain size class, we can check whether its neighbor (its buddy) is unallocated //! as well. If so, we can remove it from the free-list by its address. We can combine the two //! subregions into one of the next larger size class, and then return that subregion to the next //! stripe. //! //! ## The buddy allocator //! //! Finally, the overall buddy allocator needs to be able to handle multiple memory regions. To //! facilitate this, the trees are stored in an array, which forms the overall allocator. #![no_std] extern crate std; // TODO: Remove me mod bitvec; mod free_list; mod tree; use crate::{ bitvec::{Bitset, SubregionStatus}, free_list::{FreeList, FreeListNode}, tree::Tree, }; use allocator_api2::alloc::{AllocError, Layout, LayoutError}; use contracts::{ensures, requires}; use core::{fmt, mem, ptr::NonNull}; use vernos_physmem_free_list::FreeListAllocator; /// A buddy allocator. pub struct BuddyAllocator< 'allocator, const PAGE_SIZE: usize, const PAGE_SIZE_BITS: usize, const SIZE_CLASS_COUNT: usize, > { /// The free list sentinels (`SIZE_CLASS_COUNT` of them). free_list_sentinels: NonNull, /// The metadata associated with each tree. trees: &'allocator mut [Tree<'allocator, PAGE_SIZE, PAGE_SIZE_BITS>], /// The shared bitset. bitset: &'allocator mut Bitset, } impl< 'allocator, const PAGE_SIZE: usize, const PAGE_SIZE_BITS: usize, const SIZE_CLASS_COUNT: usize, > BuddyAllocator<'allocator, PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT> { /// Returns a buddy allocator backed by the page ranges in the free list. pub fn new( mut free_list_alloc: FreeListAllocator<'allocator, PAGE_SIZE>, ) -> Result, AllocError> { assert_eq!(PAGE_SIZE, 1 << PAGE_SIZE_BITS); assert_ne!(SIZE_CLASS_COUNT, 0); // We split up all the nodes into size-class-sized chunks here. After this, it's important // for safety that we never split a node again -- we'll see why later. free_list_alloc.split_to_powers_of_two_no_larger_than(1 << (SIZE_CLASS_COUNT - 1)); // We use one contiguous region to store the free lists and all the information about each // tree (the base, the maximum order, and the bitset). We do one pass through the free list // to find out how much storage we need, then we try to find a convenient spot to steal the // storage we need from a page range in the list. let mut range_count = 0; let mut size_class_counts = [0; SIZE_CLASS_COUNT]; for (_, len_pages) in free_list_alloc.iter() { assert!(len_pages.is_power_of_two()); let size_class = len_pages.trailing_zeros() as usize; assert!((0..SIZE_CLASS_COUNT).contains(&size_class)); range_count += 1; size_class_counts[size_class] += 1; } // Lay out the metadata. // // UNWRAP: The error here really ought to be impossible, because the pages in the free list // needed to fit in there in the first place. let metadata_layout = MetadataLayout::::new( range_count, size_class_counts, ) .unwrap(); // Allocate space for the metadata. // // SAFETY: We need to make _absolutely_ sure nothing in the buddy allocator uses this // memory before we have a chance to remove it from the allocator. This function guarantees // that the returned memory won't overlap with the first page of a live node, so as long as // we don't split a node or write past the first page of one, we should be OK. let metadata = unsafe { free_list_alloc.allocate_without_always_removing_the_node(metadata_layout.layout)? }; // SAFETY: The pointer must be valid to have been returned here. unsafe { metadata.cast::().write_bytes(0, metadata.len()); } // Break out each component of the metadata. // // SAFETY: The layout is computed to make all of this sound if the data is valid. All of // these types are valid when zero-initialized. let free_list_sentinels: NonNull = unsafe { metadata .byte_add(metadata_layout.free_list_sentinels_offset) .cast() }; let trees: &'allocator mut [Tree] = unsafe { NonNull::slice_from_raw_parts( metadata.byte_add(metadata_layout.trees_offset).cast(), metadata_layout.trees_len, ) .as_mut() }; // SAFETY: This type is valid when zero-initialized, and is a newtype for `[usize]` so has // the same layout. let bitset: &'allocator mut Bitset = unsafe { mem::transmute::<&mut [usize], &mut Bitset>( NonNull::slice_from_raw_parts( metadata.byte_add(metadata_layout.bitset_offset).cast(), metadata_layout.bitset_len, ) .as_mut(), ) }; // Self-link each free list. Although they're valid in a UB sense when zeroed, they're not // yet ready to use until this is done. for size_class in 0..SIZE_CLASS_COUNT { // SAFETY: This is the number of nodes we allocate, so we'll stay in-bounds. unsafe { FreeListNode::init_sentinel(free_list_sentinels.add(size_class)) }; } // Initialize the trees, adding subregions into the allocator as we do. We don't actually // assign bitset offsets yet, because we're going to sort the trees by address before we // do. (This isn't algorithmically necessary, but it makes debugging a bit easier.) let mut i = 0; while let Some((ptr, mut len_pages)) = free_list_alloc.pop() { while len_pages > 0 { let size_class = (len_pages.trailing_zeros() as usize).min(SIZE_CLASS_COUNT - 1); trees[i].base_ptr = Some(ptr.cast()); trees[i].size_class = size_class; i += 1; len_pages -= 1 << size_class; } } assert!( i == trees.len() || i + 1 == trees.len(), "found {} trees but allocated {} slots", i, trees.len() ); // Slice the trees slice to fit how many trees actually remained, then sort them by // address. let trees = &mut trees[..i]; trees.sort_by_key(|tree| tree.base_addr()); // Compute the number of bits that belong to each tree and assign them. let mut bitset_offset = 0; for tree in &mut *trees { tree.bitset_offset = bitset_offset; bitset_offset += (1 << (tree.size_class + 1)) - 1; } assert!(bitset_offset <= metadata_layout.bitset_len_bits); // Add regions the trees should own to the free lists, excluding the subregions that // metadata is allocated in. // Since we know that the metadata was allocated at the end of the page range it was // allocated in, we can just test the end addresses. // // SAFETY: This is the one-past-the-end address. let metadata_addr_hi = unsafe { metadata.cast::().add(metadata.len()).as_ptr() } as usize; for tree in &mut *trees { // SAFETY: This is the one-past-the-end address. let tree_addr_hi = tree.base_addr() + (PAGE_SIZE << tree.size_class); if metadata_addr_hi == tree_addr_hi { // If the metadata was present inside the tree... TODO. std::eprintln!("TODO"); } else { // Otherwise, we can take the whole range of the tree and add it to the free list. // SAFETY: There are as many sentinels as size classes, so this will be in-bounds. let sentinel = unsafe { free_list_sentinels.add(tree.size_class) }; // SAFETY: The free list is kept valid. let mut free_list = unsafe { FreeList::from_sentinel(sentinel) }; // SAFETY: This region is not owned or borrowed by anything else (the only thing it // could be would be the metadata, but that's not in this tree), is of the correct // size, and is composed of untyped memory. unsafe { free_list.push(tree.base_ptr.unwrap().cast()) }; // Then, set a bit in the bitset to say that this region is present. tree.bitset_mark_as_present(bitset, tree.size_class, 0); } } // Make the buddy allocator's struct and return it. Ok(BuddyAllocator { free_list_sentinels, trees, bitset, }) } /// Tries to allocate a subregion of a particular size class from the allocator. #[requires(size_class < SIZE_CLASS_COUNT)] pub fn alloc(&mut self, size_class: usize) -> Result, AllocError> { if let Some(ptr) = self.free_list(size_class).pop() { // Fast-path: try allocating from the right size class's free list. // Find the corresponding tree and the offset of the subregion in the tree. let (tree_index, offset) = self.tree_index_and_offset(ptr); let tree = &mut self.trees[tree_index]; // Mark the pointer as no longer being in the tree. tree.bitset_mark_as_absent(&mut self.bitset, size_class, offset); // Return the pointer. Ok(ptr) } else { // Try finding a larger chunk of memory in a larger size class. Err(AllocError) } } /// Returns a subregion of a particular size class to the allocator. /// /// # Safety /// /// - `ptr` must have been returned by a call to `alloc` with the same `size_class`. /// - `ptr` must convey ownership over the entire region. #[requires(size_class < SIZE_CLASS_COUNT)] pub unsafe fn dealloc(&mut self, ptr: NonNull, size_class: usize) { // Find the corresponding tree and the offset of the subregion in the tree. let (tree_index, offset) = self.tree_index_and_offset(ptr); let tree = &mut self.trees[tree_index]; // Ensure that the memory is marked as not being in the free list. assert_eq!( tree.bitset_get(&mut self.bitset, size_class, offset), SubregionStatus::NotInFreeList ); // Start combining size classes. assert!(size_class <= tree.size_class); while size_class < tree.size_class { let buddy_offset = offset ^ (PAGE_SIZE << size_class); todo!("{buddy_offset:#x} {offset:#x}"); } // Mark the pointer as being in the tree. tree.bitset_mark_as_present(&mut self.bitset, size_class, offset); // Return the pointer to the appropriate free list. self.free_list(size_class).push(ptr); } /// Returns the free list with the given size class. #[requires(size_class < SIZE_CLASS_COUNT)] fn free_list(&mut self, size_class: usize) -> FreeList { // SAFETY: The bounds-check is a precondition. let sentinel = unsafe { self.free_list_sentinels.add(size_class) }; // SAFETY: The free list is kept valid. unsafe { FreeList::from_sentinel(sentinel) } } /// Returns the index of the tree this pointer was allocated in. #[ensures(self.trees[ret.0].contains(ptr.as_ptr() as usize))] #[ensures(ret.1 < (1 << self.trees[ret.0].size_class))] fn tree_index_and_offset(&self, ptr: NonNull) -> (usize, usize) { let addr = ptr.as_ptr() as usize; let tree_index = self .trees .binary_search_by_key(&addr, |tree| tree.base_addr()); let tree_index = match tree_index { Ok(i) => i, Err(i) => { assert_ne!(i, 0); i - 1 } }; let offset = addr - self.trees[tree_index].base_addr(); (tree_index, offset) } } impl< 'allocator, const PAGE_SIZE: usize, const PAGE_SIZE_BITS: usize, const SIZE_CLASS_COUNT: usize, > fmt::Debug for BuddyAllocator<'allocator, PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT> { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { struct FreeLists(NonNull); impl fmt::Debug for FreeLists { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_list() .entries((0..SIZE_CLASS_COUNT).map(|size_class| { // SAFETY: The free lists are kept valid, and the range of size classes is // necessarily in-bounds. unsafe { FreeList::from_sentinel(self.0.add(size_class)) } })) .finish() } } fmt.debug_struct("BuddyAllocator") .field( "free_lists", &FreeLists::(self.free_list_sentinels), ) .field("trees", &self.trees) .field("bitset", &self.bitset) .finish() } } #[derive(Debug)] struct MetadataLayout< const PAGE_SIZE: usize, const PAGE_SIZE_BITS: usize, const SIZE_CLASS_COUNT: usize, > { layout: Layout, free_list_sentinels_offset: usize, trees_offset: usize, trees_len: usize, bitset_offset: usize, bitset_len: usize, bitset_len_bits: usize, } impl MetadataLayout { fn new( range_count: usize, size_class_counts: [usize; SIZE_CLASS_COUNT], ) -> Result, LayoutError> { let trees_len = range_count; let mut bitset_len_bits = 0; for (size_class, &count) in size_class_counts.iter().enumerate() { let bits_for_size_class = (1 << (size_class + 1)) - 1; bitset_len_bits += bits_for_size_class * count; } let bitset_len = (bitset_len_bits + usize::BITS as usize - 1) / usize::BITS as usize; let free_list_sentinels_layout = Layout::new::<[FreeListNode; SIZE_CLASS_COUNT]>(); let trees_layout = Layout::array::>(trees_len)?; let bitset_layout = Layout::array::(bitset_len)?; let free_list_sentinels_offset = 0; let (layout, trees_offset) = free_list_sentinels_layout.extend(trees_layout)?; let (layout, bitset_offset) = layout.extend(bitset_layout)?; Ok(MetadataLayout { layout, free_list_sentinels_offset, trees_offset, trees_len, bitset_offset, bitset_len, bitset_len_bits, }) } }