1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
|
//! A buddy allocator, used to allocate pages.
//!
//! The allocator can be split into three abstractions: stripes, trees, and the allocator.
//!
//! TODO: See if there's standard terminology for these.
//!
//! ## Stripes
//!
//! The buddy allocator works in terms of size classes, which are power-of-two sized, starting at a
//! single page and going up from there. Each stripe corresponds to a single size class and a
//! particular region of memory.
//!
//! A stripe contains a circular doubly-linked free-list for subregions of that size class, and a
//! bitset marking whether a particular region has been allocated or not. Being a circular
//! doubly-linked list makes it cheap to remove an element whose address we know, as well as cheap
//! to push and pop elements.
//!
//! It's efficient to go from the address of a subregion to the index of its corresponding bit, so
//! when we hand out a subregion from the free-list or put one back, it's cheap to read or write
//! its bit.
//!
//! ## Trees
//!
//! A tree is logically a collection of stripes, one per size class. To pack the structures more
//! efficiently, they are stored interleaved with each other, and the tree manages this.
//!
//! The important logic at the level of trees happens when we allocate a subregion from a size
//! class whose stripe's free-list is empty, and when we free subregions.
//!
//! When a stripe's free-list is empty, the tree instead allocates a subregion of a larger size
//! from the next stripe. It can then store the unused portion in the current size class.
//!
//! The really important bit is the ability to merge subregions when they're freed. When we free a
//! subregion of a certain size class, we can check whether its neighbor (its buddy) is unallocated
//! as well. If so, we can remove it from the free-list by its address. We can combine the two
//! subregions into one of the next larger size class, and then return that subregion to the next
//! stripe.
//!
//! ## The buddy allocator
//!
//! Finally, the overall buddy allocator needs to be able to handle multiple memory regions. To
//! facilitate this, the trees are stored in an array, which forms the overall allocator.
#![no_std]
extern crate std; // TODO: Remove me
mod bitset;
mod free_list;
mod tree;
use crate::{
bitset::{Bitset, SubregionStatus},
free_list::{FreeList, FreeListNode},
tree::Tree,
};
use allocator_api2::alloc::{AllocError, Layout, LayoutError};
use contracts::{ensures, requires};
use core::{fmt, mem, ptr::NonNull};
use vernos_alloc_physmem_free_list::FreeListAllocator;
/// A buddy allocator.
pub struct BuddyAllocator<
'allocator,
const PAGE_SIZE: usize,
const PAGE_SIZE_BITS: usize,
const SIZE_CLASS_COUNT: usize,
> {
/// The free list sentinels (`SIZE_CLASS_COUNT` of them).
free_list_sentinels: NonNull<FreeListNode>,
/// The metadata associated with each tree.
trees: &'allocator mut [Tree<'allocator, PAGE_SIZE, PAGE_SIZE_BITS>],
/// The shared bitset.
bitset: &'allocator mut Bitset,
}
impl<
'allocator,
const PAGE_SIZE: usize,
const PAGE_SIZE_BITS: usize,
const SIZE_CLASS_COUNT: usize,
> BuddyAllocator<'allocator, PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>
{
/// Returns a buddy allocator backed by the page ranges in the free list.
pub fn new(
mut free_list_alloc: FreeListAllocator<'allocator, PAGE_SIZE>,
) -> Result<BuddyAllocator<'allocator, PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>, AllocError>
{
assert_eq!(PAGE_SIZE, 1 << PAGE_SIZE_BITS);
assert_ne!(SIZE_CLASS_COUNT, 0);
// We split up all the nodes into size-class-sized chunks here. After this, it's important
// for safety that we never split a node again -- we'll see why later.
free_list_alloc.split_to_powers_of_two_no_larger_than(1 << (SIZE_CLASS_COUNT - 1));
// We use one contiguous region to store the free lists and all the information about each
// tree (the base, the maximum order, and the bitset). We do one pass through the free list
// to find out how much storage we need, then we try to find a convenient spot to steal the
// storage we need from a page range in the list.
let mut range_count = 0;
let mut size_class_counts = [0; SIZE_CLASS_COUNT];
for (_, len_pages) in free_list_alloc.iter() {
assert!(len_pages.is_power_of_two());
let size_class = len_pages.trailing_zeros() as usize;
assert!((0..SIZE_CLASS_COUNT).contains(&size_class));
range_count += 1;
size_class_counts[size_class] += 1;
}
// Lay out the metadata.
//
// UNWRAP: The error here really ought to be impossible, because the pages in the free list
// needed to fit in there in the first place.
let metadata_layout = MetadataLayout::<PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>::new(
range_count,
size_class_counts,
)
.unwrap();
// Allocate space for the metadata.
//
// SAFETY: We need to make _absolutely_ sure nothing in the buddy allocator uses this
// memory before we have a chance to remove it from the allocator. This function guarantees
// that the returned memory won't overlap with the first page of a live node, so as long as
// we don't split a node or write past the first page of one, we should be OK.
let metadata = unsafe {
free_list_alloc.allocate_without_always_removing_the_node(metadata_layout.layout)?
};
// SAFETY: The pointer must be valid to have been returned here.
unsafe {
metadata.cast::<u8>().write_bytes(0, metadata.len());
}
// Break out each component of the metadata.
//
// SAFETY: The layout is computed to make all of this sound if the data is valid. All of
// these types are valid when zero-initialized.
let free_list_sentinels: NonNull<FreeListNode> = unsafe {
metadata
.byte_add(metadata_layout.free_list_sentinels_offset)
.cast()
};
let trees: &'allocator mut [Tree<PAGE_SIZE, PAGE_SIZE_BITS>] = unsafe {
NonNull::slice_from_raw_parts(
metadata.byte_add(metadata_layout.trees_offset).cast(),
metadata_layout.trees_len,
)
.as_mut()
};
// SAFETY: This type is valid when zero-initialized, and is a newtype for `[usize]` so has
// the same layout.
let bitset: &'allocator mut Bitset = unsafe {
mem::transmute::<&mut [usize], &mut Bitset>(
NonNull::slice_from_raw_parts(
metadata.byte_add(metadata_layout.bitset_offset).cast(),
metadata_layout.bitset_len,
)
.as_mut(),
)
};
// Self-link each free list. Although they're valid in a UB sense when zeroed, they're not
// yet ready to use until this is done.
for size_class in 0..SIZE_CLASS_COUNT {
// SAFETY: This is the number of nodes we allocate, so we'll stay in-bounds.
unsafe { FreeListNode::init_sentinel(free_list_sentinels.add(size_class)) };
}
// Initialize the trees, adding subregions into the allocator as we do. We don't actually
// assign bitset offsets yet, because we're going to sort the trees by address before we
// do. (This isn't algorithmically necessary, but it makes debugging a bit easier.)
let mut i = 0;
while let Some((ptr, mut len_pages)) = free_list_alloc.pop() {
while len_pages > 0 {
let size_class = (len_pages.trailing_zeros() as usize).min(SIZE_CLASS_COUNT - 1);
trees[i].base_ptr = Some(ptr.cast());
trees[i].size_class = size_class;
i += 1;
len_pages -= 1 << size_class;
}
}
assert!(
i == trees.len() || i + 1 == trees.len(),
"found {} trees but allocated {} slots",
i,
trees.len()
);
// Slice the trees slice to fit how many trees actually remained, then sort them by
// address.
let trees = &mut trees[..i];
trees.sort_by_key(|tree| tree.base_addr());
// Compute the number of bits that belong to each tree and assign them.
let mut bitset_offset = 0;
for tree in &mut *trees {
tree.bitset_offset = bitset_offset;
bitset_offset += (1 << (tree.size_class + 1)) - 1;
}
assert!(bitset_offset <= metadata_layout.bitset_len_bits);
// Add regions the trees should own to the free lists, excluding the subregions that
// metadata is allocated in.
// Since we know that the metadata was allocated at the end of the page range it was
// allocated in, we can just test the end addresses.
//
// SAFETY: This is the one-past-the-end address.
let metadata_addr_hi =
unsafe { metadata.cast::<u8>().add(metadata.len()).as_ptr() } as usize;
for tree in &mut *trees {
// SAFETY: This is the one-past-the-end address.
let tree_addr_hi = tree.base_addr() + (PAGE_SIZE << tree.size_class);
if metadata_addr_hi == tree_addr_hi {
// If the metadata was present inside the tree... TODO.
std::eprintln!("TODO");
} else {
// Otherwise, we can take the whole range of the tree and add it to the free list.
// SAFETY: There are as many sentinels as size classes, so this will be in-bounds.
let sentinel = unsafe { free_list_sentinels.add(tree.size_class) };
// SAFETY: The free list is kept valid.
let mut free_list = unsafe { FreeList::from_sentinel(sentinel) };
// SAFETY: This region is not owned or borrowed by anything else (the only thing it
// could be would be the metadata, but that's not in this tree), is of the correct
// size, and is composed of untyped memory.
unsafe { free_list.push(tree.base_ptr.unwrap().cast()) };
// Then, set a bit in the bitset to say that this region is present.
let Ok(()) = tree.bitset_mark_as_present(bitset, tree.size_class, 0) else {
panic!("the first bit was already set in {tree:#?}\nbitset = {bitset:?}")
};
}
}
// Make the buddy allocator's struct and return it.
Ok(BuddyAllocator {
free_list_sentinels,
trees,
bitset,
})
}
/// Tries to allocate a subregion of a particular size class from the allocator.
#[requires(size_class < SIZE_CLASS_COUNT)]
pub fn alloc(&mut self, size_class: usize) -> Result<NonNull<u8>, AllocError> {
if let Some(ptr) = self.free_list(size_class).pop() {
// Fast-path: try allocating from the right size class's free list.
// Find the corresponding tree and the offset of the subregion in the tree.
let (tree_index, offset) = self.tree_index_and_offset(ptr);
let tree = &mut self.trees[tree_index];
// Mark the pointer as no longer being in the tree.
let Ok(()) = tree.bitset_mark_as_absent(self.bitset, size_class, offset) else {
panic!("the bit for tree {tree_index}, offset {offset} was not set in {self:#?}")
};
// Return the pointer.
Ok(ptr)
} else {
// Try finding a larger chunk of memory in a larger size class.
Err(AllocError)
}
}
/// Returns a subregion of a particular size class to the allocator.
///
/// # Safety
///
/// - `ptr` must have been returned by a call to `alloc` with the same `size_class`.
/// - `ptr` must convey ownership over the entire region.
#[requires(size_class < SIZE_CLASS_COUNT)]
pub unsafe fn dealloc(&mut self, ptr: NonNull<u8>, size_class: usize) {
// Find the corresponding tree and the offset of the subregion in the tree.
let (tree_index, offset) = self.tree_index_and_offset(ptr);
let tree = &mut self.trees[tree_index];
// Ensure that the memory is marked as not being in the free list.
assert_eq!(
tree.bitset_get(&mut self.bitset, size_class, offset),
SubregionStatus::NotInFreeList
);
// Start combining size classes.
assert!(size_class <= tree.size_class);
while size_class < tree.size_class {
let buddy_offset = offset ^ (PAGE_SIZE << size_class);
todo!("{buddy_offset:#x} {offset:#x}");
}
// Mark the pointer as being in the tree.
let Ok(()) = tree.bitset_mark_as_present(self.bitset, size_class, offset) else {
panic!("the bit for tree {tree_index}, offset {offset} was already set in {self:#?}")
};
// Return the pointer to the appropriate free list.
self.free_list(size_class).push(ptr);
}
/// Returns the free list with the given size class.
#[requires(size_class < SIZE_CLASS_COUNT)]
fn free_list(&mut self, size_class: usize) -> FreeList {
// SAFETY: The bounds-check is a precondition.
let sentinel = unsafe { self.free_list_sentinels.add(size_class) };
// SAFETY: The free list is kept valid.
unsafe { FreeList::from_sentinel(sentinel) }
}
/// Returns the index of the tree this pointer was allocated in.
#[ensures(self.trees[ret.0].contains(ptr.as_ptr() as usize))]
#[ensures(ret.1 < (1 << self.trees[ret.0].size_class))]
fn tree_index_and_offset(&self, ptr: NonNull<u8>) -> (usize, usize) {
let addr = ptr.as_ptr() as usize;
let tree_index = self
.trees
.binary_search_by_key(&addr, |tree| tree.base_addr());
let tree_index = match tree_index {
Ok(i) => i,
Err(i) => {
assert_ne!(i, 0);
i - 1
}
};
let offset = addr - self.trees[tree_index].base_addr();
(tree_index, offset)
}
}
impl<
'allocator,
const PAGE_SIZE: usize,
const PAGE_SIZE_BITS: usize,
const SIZE_CLASS_COUNT: usize,
> fmt::Debug for BuddyAllocator<'allocator, PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>
{
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
struct FreeLists<const SIZE_CLASS_COUNT: usize>(NonNull<FreeListNode>);
impl<const SIZE_CLASS_COUNT: usize> fmt::Debug for FreeLists<SIZE_CLASS_COUNT> {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
fmt.debug_list()
.entries((0..SIZE_CLASS_COUNT).map(|size_class| {
// SAFETY: The free lists are kept valid, and the range of size classes is
// necessarily in-bounds.
unsafe { FreeList::from_sentinel(self.0.add(size_class)) }
}))
.finish()
}
}
fmt.debug_struct("BuddyAllocator")
.field(
"free_lists",
&FreeLists::<SIZE_CLASS_COUNT>(self.free_list_sentinels),
)
.field("trees", &self.trees)
.field("bitset", &self.bitset)
.finish()
}
}
#[derive(Debug)]
struct MetadataLayout<
const PAGE_SIZE: usize,
const PAGE_SIZE_BITS: usize,
const SIZE_CLASS_COUNT: usize,
> {
layout: Layout,
free_list_sentinels_offset: usize,
trees_offset: usize,
trees_len: usize,
bitset_offset: usize,
bitset_len: usize,
bitset_len_bits: usize,
}
impl<const PAGE_SIZE: usize, const PAGE_SIZE_BITS: usize, const SIZE_CLASS_COUNT: usize>
MetadataLayout<PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>
{
fn new(
range_count: usize,
size_class_counts: [usize; SIZE_CLASS_COUNT],
) -> Result<MetadataLayout<PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>, LayoutError> {
let trees_len = range_count;
let mut bitset_len_bits = 0;
for (size_class, &count) in size_class_counts.iter().enumerate() {
let bits_for_size_class = (1 << (size_class + 1)) - 1;
bitset_len_bits += bits_for_size_class * count;
}
let bitset_len = (bitset_len_bits + usize::BITS as usize - 1) / usize::BITS as usize;
let free_list_sentinels_layout = Layout::new::<[FreeListNode; SIZE_CLASS_COUNT]>();
let trees_layout = Layout::array::<Tree<PAGE_SIZE, PAGE_SIZE_BITS>>(trees_len)?;
let bitset_layout = Layout::array::<usize>(bitset_len)?;
let free_list_sentinels_offset = 0;
let (layout, trees_offset) = free_list_sentinels_layout.extend(trees_layout)?;
let (layout, bitset_offset) = layout.extend(bitset_layout)?;
Ok(MetadataLayout {
layout,
free_list_sentinels_offset,
trees_offset,
trees_len,
bitset_offset,
bitset_len,
bitset_len_bits,
})
}
}
|