summaryrefslogtreecommitdiff
path: root/crates/buddy_allocator/src/lib.rs
blob: 441c77f232e8b6a8ce1ed70be2c080986d02e9eb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
//! A buddy allocator, used to allocate pages.
//!
//! The allocator can be split into three abstractions: stripes, trees, and the allocator.
//!
//! TODO: See if there's standard terminology for these.
//!
//! ## Stripes
//!
//! The buddy allocator works in terms of size classes, which are power-of-two sized, starting at a
//! single page and going up from there. Each stripe corresponds to a single size class and a
//! particular region of memory.
//!
//! A stripe contains a circular doubly-linked free-list for subregions of that size class, and a
//! bitset marking whether a particular region has been allocated or not. Being a circular
//! doubly-linked list makes it cheap to remove an element whose address we know, as well as cheap
//! to push and pop elements.
//!
//! It's efficient to go from the address of a subregion to the index of its corresponding bit, so
//! when we hand out a subregion from the free-list or put one back, it's cheap to read or write
//! its bit.
//!
//! ## Trees
//!
//! A tree is logically a collection of stripes, one per size class. To pack the structures more
//! efficiently, they are stored interleaved with each other, and the tree manages this.
//!
//! The important logic at the level of trees happens when we allocate a subregion from a size
//! class whose stripe's free-list is empty, and when we free subregions.
//!
//! When a stripe's free-list is empty, the tree instead allocates a subregion of a larger size
//! from the next stripe. It can then store the unused portion in the current size class.
//!
//! The really important bit is the ability to merge subregions when they're freed. When we free a
//! subregion of a certain size class, we can check whether its neighbor (its buddy) is unallocated
//! as well. If so, we can remove it from the free-list by its address. We can combine the two
//! subregions into one of the next larger size class, and then return that subregion to the next
//! stripe.
//!
//! ## The buddy allocator
//!
//! Finally, the overall buddy allocator needs to be able to handle multiple memory regions. To
//! facilitate this, the trees are stored in an array, which forms the overall allocator.
#![no_std]

extern crate std; // TODO: Remove me

mod bitvec;
mod free_list;
mod tree;

// mod stripe;

use crate::{bitvec::BitVec, free_list::FreeList, tree::Tree};
use allocator_api2::alloc::{AllocError, Layout, LayoutError};
use contracts::requires;
use core::{mem, pin::Pin, ptr::NonNull};
use vernos_physmem_free_list::FreeListAllocator;
use vernos_utils::pin::{pin_project_slice_mut, pin_project_slice_mut_iter};

/// A buddy allocator.
#[derive(Debug)]
pub struct BuddyAllocator<
    'allocator,
    const PAGE_SIZE: usize,
    const PAGE_SIZE_BITS: usize,
    const SIZE_CLASS_COUNT: usize,
> {
    /// The free lists.
    free_lists: Pin<&'allocator mut [FreeList; SIZE_CLASS_COUNT]>,

    /// The metadata associated with each tree.
    trees: &'allocator mut [Tree<'allocator, PAGE_SIZE, PAGE_SIZE_BITS>],

    /// The shared bitset.
    bitset: &'allocator mut BitVec,
}

impl<
        'allocator,
        const PAGE_SIZE: usize,
        const PAGE_SIZE_BITS: usize,
        const SIZE_CLASS_COUNT: usize,
    > BuddyAllocator<'allocator, PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>
{
    /// Returns a buddy allocator backed by the page ranges in the free list.
    pub fn new(
        mut free_list_alloc: FreeListAllocator<'allocator, PAGE_SIZE>,
    ) -> Result<BuddyAllocator<'allocator, PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>, AllocError>
    {
        assert_eq!(PAGE_SIZE, 1 << PAGE_SIZE_BITS);
        assert_ne!(SIZE_CLASS_COUNT, 0);

        // We split up all the nodes into size-class-sized chunks here. After this, it's important
        // for safety that we never split a node again -- we'll see why later.
        free_list_alloc.split_to_powers_of_two_no_larger_than(1 << (SIZE_CLASS_COUNT - 1));

        // We use one contiguous region to store the free lists and all the information about each
        // tree (the base, the maximum order, and the bitset). We do one pass through the free list
        // to find out how much storage we need, then we try to find a convenient spot to steal the
        // storage we need from a page range in the list.
        let mut range_count = 0;
        let mut size_class_counts = [0; SIZE_CLASS_COUNT];
        for (_, len_pages) in free_list_alloc.iter() {
            assert!(len_pages.is_power_of_two());
            let size_class = len_pages.trailing_zeros() as usize;
            assert!((0..SIZE_CLASS_COUNT).contains(&size_class));
            range_count += 1;
            size_class_counts[size_class] += 1;
        }

        // Lay out the metadata.
        //
        // UNWRAP: The error here really ought to be impossible, because the pages in the free list
        // needed to fit in there in the first place.
        let metadata_layout = MetadataLayout::<PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>::new(
            range_count,
            size_class_counts,
        )
        .unwrap();

        // Allocate space for the metadata.
        //
        // SAFETY: We need to make _absolutely_ sure nothing in the buddy allocator uses this
        // memory before we have a chance to remove it from the allocator. This function guarantees
        // that the returned memory won't overlap with the first page of a live node, so as long as
        // we don't split a node or write past the first page of one, we should be OK.
        let metadata = unsafe {
            free_list_alloc.allocate_without_always_removing_the_node(metadata_layout.layout)?
        };
        // SAFETY: The pointer must be valid to have been returned here.
        unsafe {
            metadata.cast::<u8>().write_bytes(0, metadata.len());
        }

        // Break out each component of the metadata.
        //
        // SAFETY: The layout is computed to make all of this sound if the data is valid. All of
        // these types are valid when zero-initialized.
        let free_lists: &'allocator mut [FreeList; SIZE_CLASS_COUNT] = unsafe {
            metadata
                .byte_add(metadata_layout.free_lists_offset)
                .cast()
                .as_mut()
        };
        let trees: &'allocator mut [Tree<PAGE_SIZE, PAGE_SIZE_BITS>] = unsafe {
            NonNull::slice_from_raw_parts(
                metadata.byte_add(metadata_layout.trees_offset).cast(),
                metadata_layout.trees_len,
            )
            .as_mut()
        };
        let bitset: &'allocator mut BitVec = unsafe {
            mem::transmute::<&mut [usize], &mut BitVec>(
                NonNull::slice_from_raw_parts(
                    metadata.byte_add(metadata_layout.bitset_offset).cast(),
                    metadata_layout.bitset_len,
                )
                .as_mut(),
            )
        };

        // Pin and self-link each free list. Although they're valid in a UB sense when zeroed,
        // they're not yet ready to use until this is done.
        //
        // SAFETY: We never provide the ability to move a free list.
        let mut free_lists = unsafe { Pin::new_unchecked(free_lists) };
        for free_list in pin_project_slice_mut_iter(free_lists.as_mut()) {
            free_list.self_link();
        }

        // Initialize the trees, adding subregions into the allocator as we do. We don't actually
        // assign bitset offsets yet, because we're going to sort the trees by address before we
        // do. (This isn't algorithmically necessary, but it makes debugging a bit easier.)
        let mut i = 0;
        while let Some((ptr, mut len_pages)) = free_list_alloc.pop() {
            while len_pages > 0 {
                let size_class = (len_pages.trailing_zeros() as usize).min(SIZE_CLASS_COUNT - 1);
                trees[i].base_ptr = ptr.cast().as_ptr();
                trees[i].size_class = size_class;
                i += 1;
                len_pages -= 1 << size_class;
            }
        }
        assert!(
            i == trees.len() || i + 1 == trees.len(),
            "found {} trees but allocated {} slots",
            i,
            trees.len()
        );

        // Slice the trees slice to fit how many trees actually remained, then sort them by
        // address.
        let trees = &mut trees[..i];
        trees.sort_by_key(|tree| tree.base_ptr as usize);

        // Compute the number of bits that belong to each tree and assign them.
        let mut bitset_offset = 0;
        for tree in &mut *trees {
            tree.bitset_offset = bitset_offset;
            bitset_offset += (1 << (tree.size_class + 1)) - 1;
        }
        assert!(bitset_offset <= metadata_layout.bitset_len_bits);

        // Add regions the trees should own to the free lists, excluding the subregions that
        // metadata is allocated in.

        // Since we know that the metadata was allocated at the end of the page range it was
        // allocated in, we can just test the end addresses.
        //
        // SAFETY: This is the one-past-the-end address.
        let metadata_addr_hi =
            unsafe { metadata.cast::<u8>().add(metadata.len()).as_ptr() } as usize;
        for tree in &mut *trees {
            // SAFETY: This is the one-past-the-end address.
            let tree_addr_hi = unsafe { tree.base_ptr.add(1 << tree.size_class) } as usize;
            if metadata_addr_hi == tree_addr_hi {
                // If the metadata was present inside the tree... TODO.
                std::eprintln!("TODO");
            } else {
                // Otherwise, we can take the whole range of the tree and add it to the free list.
                //
                // UNWRAP: The pointer must have been non-null when it was in the free-list
                // allocator.
                let ptr = NonNull::new(tree.base_ptr as *mut u8).unwrap();
                // SAFETY: This region is not owned or borrowed by anything else (the only thing it
                // could be would be the metadata, but that's not in this tree), is of the correct
                // size, and is composed of untyped memory.
                unsafe {
                    pin_project_slice_mut(free_lists.as_mut(), tree.size_class).push(ptr);
                }

                // Then, set a bit in the bitset to say that this region is present.
                assert!(!bitset.get(tree.bitset_offset));
                bitset.set(tree.bitset_offset, true);
            }
        }

        // Make the buddy allocator's struct and return it.
        Ok(BuddyAllocator {
            free_lists,
            trees,
            bitset,
        })
    }

    /// Tries to allocate a subregion of a particular size class from the allocator.
    #[requires((0..SIZE_CLASS_COUNT).contains(&size_class))]
    pub fn alloc(&mut self, size_class: usize) -> Result<NonNull<u8>, AllocError> {
        Err(AllocError)
    }

    /// Returns a subregion of a particular size class to the allocator.
    ///
    /// # Safety
    ///
    /// - `ptr` must have been returned by a call to `alloc` with the same `size_class`.
    /// - `ptr` must convey ownership over the entire region.
    #[requires((0..SIZE_CLASS_COUNT).contains(&size_class))]
    pub unsafe fn dealloc(&mut self, ptr: NonNull<u8>, size_class: usize) {
        todo!()
    }
}

#[derive(Debug)]
struct MetadataLayout<
    const PAGE_SIZE: usize,
    const PAGE_SIZE_BITS: usize,
    const SIZE_CLASS_COUNT: usize,
> {
    layout: Layout,
    free_lists_offset: usize,
    trees_offset: usize,
    trees_len: usize,
    bitset_offset: usize,
    bitset_len: usize,
    bitset_len_bits: usize,
}

impl<const PAGE_SIZE: usize, const PAGE_SIZE_BITS: usize, const SIZE_CLASS_COUNT: usize>
    MetadataLayout<PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>
{
    fn new(
        range_count: usize,
        size_class_counts: [usize; SIZE_CLASS_COUNT],
    ) -> Result<MetadataLayout<PAGE_SIZE, PAGE_SIZE_BITS, SIZE_CLASS_COUNT>, LayoutError> {
        let trees_len = range_count;
        let mut bitset_len_bits = 0;
        for (size_class, &count) in size_class_counts.iter().enumerate() {
            let bits_for_size_class = (1 << (size_class + 1)) - 1;
            bitset_len_bits += bits_for_size_class * count;
        }
        let bitset_len = (bitset_len_bits + usize::BITS as usize - 1) / usize::BITS as usize;

        let free_lists_layout = Layout::new::<[FreeList; SIZE_CLASS_COUNT]>();
        let trees_layout = Layout::array::<Tree<PAGE_SIZE, PAGE_SIZE_BITS>>(trees_len)?;
        let bitset_layout = Layout::array::<usize>(bitset_len)?;

        let free_lists_offset = 0;
        let (layout, trees_offset) = free_lists_layout.extend(trees_layout)?;
        let (layout, bitset_offset) = layout.extend(bitset_layout)?;

        Ok(MetadataLayout {
            layout,
            free_lists_offset,
            trees_offset,
            trees_len,
            bitset_offset,
            bitset_len,
            bitset_len_bits,
        })
    }
}