summaryrefslogtreecommitdiff
path: root/crates/alloc_genmalloc/src/lib.rs
blob: bb024a8ce48d930c4976118fd9025cd4188620c7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
//! A general-purpose allocator, based on the design of mimalloc [0, 1]. The design is summarized
//! below.
//!
//! Objects are separated into four classes, by size:
//!
//! - Small objects are less than or equal to 1KiB in size. Small objects get access to a special
//!   fast-path for allocation that avoids performing a `.next_power_of_two()` on their size. This
//!   fast-path is designed to be inlined. They get stored in 64KiB pages, which are stored in 4MiB
//!   segments.
//! - Medium objects are greater than 1KiB in size, but less than or equal to 8KiB. Medium objects
//!   have another fast-path, but it isn't inlinable and performs a `.next_power_of_two()`. Like
//!   small objects, they get stored in 64KiB pages, which are stored in 4MiB segments.
//! - Large objects are greater than 8KiB in size, but less than or equal to 512KiB. Their pages
//!   span the entire 4MiB of a segment.
//! - Huge objects are greater than 512KiB in size. They get a custom segment size with a single
//!   page, sized to fit the object.
//!
//! This design has the property that the first address of an object will be less than 4MiB from
//! the start of the segment. By aligning segments to 4MiB, this lets us cheaply get a pointer to
//! the segment from a pointer to the object.
//!
//! [0]: https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action/
//! [1]: https://github.com/microsoft/mimalloc
#![no_std]

use allocator_api2::alloc::{AllocError, Allocator, Layout};
use contracts::requires;
use core::{
    cell::RefCell,
    marker::PhantomData,
    mem::MaybeUninit,
    ptr::{addr_of_mut, null_mut, slice_from_raw_parts_mut, NonNull},
    sync::atomic::{AtomicPtr, AtomicU16, Ordering},
};
use sptr::invalid_mut;
use static_assertions::const_assert_eq;

/// The per-CPU structure. This uses a `RefCell` for interior mutability.
#[derive(Debug)]
pub struct Heap<OS: OSServices>(RefCell<HeapInner<OS>>);

#[derive(Debug)]
struct HeapInner<OS: OSServices> {
    /// Small objects (<=1024 bytes) have pointers directly to the page they allocate into. These
    /// are non-owning pointers.
    pages_direct: [NonNull<Page>; LARGEST_SMALL_OBJECT >> 3],

    /// The sentinel nodes for the circular doubly-linked lists that compose the size classes, up
    /// to and including 512KiB, plus one more for objects over 512KiB (huge objects).
    pages: [PageLink; LARGEST_LARGE_OBJECT_SIZE_CLASS + 2],

    /// The sentinel node for segments that store small or medium objects that have free pages.
    small_medium_segments_free: SegmentLink,

    /// The sentinel node for segments that store small or medium objects that have no free pages.
    small_medium_segments_full: SegmentLink,

    /// The sentinel node for segments that store large or huge objects.
    large_huge_segments: SegmentLink,

    _phantom: PhantomData<OS>,
}

/// The log2 of the number of bytes in a segment (other than a segment created for a huge object).
pub const NON_HUGE_SEGMENT_SIZE_BITS: usize = 22;

/// The number of bytes in a segment (other than a segment created for a huge object).
pub const NON_HUGE_SEGMENT_SIZE: usize = 4 << 20;

const_assert_eq!(NON_HUGE_SEGMENT_SIZE, 1 << NON_HUGE_SEGMENT_SIZE_BITS);

const SMALL_MEDIUM_PAGE_SIZE_BITS: u32 = 16;
const SMALL_MEDIUM_PAGE_SIZE: usize = 1 << SMALL_MEDIUM_PAGE_SIZE_BITS;
const SMALL_MEDIUM_PAGES_PER_SEGMENT: usize =
    1 << (NON_HUGE_SEGMENT_SIZE_BITS - SMALL_MEDIUM_PAGE_SIZE_BITS as usize);

const LARGEST_SMALL_OBJECT: usize = 1024;
const LARGEST_MEDIUM_OBJECT: usize = 8 * 1024;
const LARGEST_LARGE_OBJECT: usize = 512 * 1024;
const LARGEST_SMALL_OBJECT_SIZE_CLASS: usize = 7;
const LARGEST_MEDIUM_OBJECT_SIZE_CLASS: usize = 10;
const LARGEST_LARGE_OBJECT_SIZE_CLASS: usize = 16;

const_assert_eq!(
    size_class(LARGEST_SMALL_OBJECT),
    LARGEST_SMALL_OBJECT_SIZE_CLASS
);
const_assert_eq!(
    size_class(LARGEST_MEDIUM_OBJECT),
    LARGEST_MEDIUM_OBJECT_SIZE_CLASS
);
const_assert_eq!(
    size_class(LARGEST_LARGE_OBJECT),
    LARGEST_LARGE_OBJECT_SIZE_CLASS
);

impl<OS: OSServices> Heap<OS> {
    /// Initializes a `Heap`. After this, the `MaybeUninit` is initialized.
    pub fn init(maybe_uninit: &mut MaybeUninit<Heap<OS>>) {
        // Initialize the `RefCell` part. This... might not be sound. Hm. It also might blow the
        // stack in debug builds, defeating the point of this whole function...
        let refcell = unsafe {
            maybe_uninit
                .as_mut_ptr()
                .cast::<MaybeUninit<RefCell<MaybeUninit<HeapInner<OS>>>>>()
                .as_mut()
                .unwrap_unchecked()
                .write(RefCell::new(MaybeUninit::uninit()))
        };

        // Make accessors for the fields of the HeapInner.
        let inner_ptr = refcell.get_mut().as_mut_ptr();
        let pages_direct_ptr = |i: usize| {
            // SAFETY: For the `maybe_uninit` to have been valid, this must be sound.
            unsafe { NonNull::new_unchecked(addr_of_mut!((*inner_ptr).pages_direct[i])) }
        };
        let pages_ptr = |i: usize| {
            // SAFETY: For the `maybe_uninit` to have been valid, this must be sound.
            unsafe { NonNull::new_unchecked(addr_of_mut!((*inner_ptr).pages[i])) }
        };

        // Make a helper to self-link the segment lists.
        //
        // SAFETY: See the call sites.
        let self_link_segments = |ptr: NonNull<SegmentLink>| unsafe {
            (*ptr.as_ptr()).prev = ptr;
            (*ptr.as_ptr()).next = ptr;
        };

        // Initialize each field.
        for i in 0..128 {
            // SAFETY: The returned pointer is valid.
            unsafe { pages_direct_ptr(i).write(NonNull::from(&EMPTY_PAGE)) }
        }
        for i in 0..18 {
            let ptr = pages_ptr(i);
            // SAFETY: The returned pointer is valid.
            unsafe {
                ptr.write(PageLink {
                    prev: ptr,
                    next: ptr,
                })
            }
        }
        // SAFETY: For the `maybe_uninit` to have been valid, getting these pointers must be sound.
        // Self-linking the pointers is also sound as a result.
        unsafe {
            self_link_segments(NonNull::new_unchecked(addr_of_mut!(
                (*inner_ptr).small_medium_segments_free
            )));
            self_link_segments(NonNull::new_unchecked(addr_of_mut!(
                (*inner_ptr).small_medium_segments_full
            )));
            self_link_segments(NonNull::new_unchecked(addr_of_mut!(
                (*inner_ptr).large_huge_segments
            )));
        }
    }

    /// Donates a segment to be used for small or medium objects.
    ///
    /// # Safety
    ///
    /// - `ptr` must convey ownership over the entire region.
    /// - `thread_id` must be correct.
    pub unsafe fn donate_small_medium_segment(&self, ptr: NonNull<[u8; NON_HUGE_SEGMENT_SIZE]>) {
        self.0.borrow_mut().add_small_medium_segment(ptr)
    }
}

impl<OS: OSServices> HeapInner<OS> {
    /// Initializes a segment for small or medium objects, adding it to this heap.
    unsafe fn add_small_medium_segment(&mut self, ptr: NonNull<[u8; NON_HUGE_SEGMENT_SIZE]>) {
        // Initialize the segment to be self-linked.
        let maybe_uninit: &mut MaybeUninit<Segment> = ptr.cast().as_mut();
        maybe_uninit.write(Segment {
            link: SegmentLink {
                prev: ptr.cast(),
                next: ptr.cast(),
            },
            header: SegmentHeader {
                thread_id: OS::current_thread_id(),
                page_shift: SMALL_MEDIUM_PAGE_SIZE_BITS,
            },
        });

        // Add the segment to the list of segments with free pages.
        SegmentLink::add_next(NonNull::from(&self.small_medium_segments_free), ptr.cast());
    }

    /// Allocates small blocks of naturally-aligned memory.
    #[requires(size > 0)]
    #[requires(size <= LARGEST_SMALL_OBJECT)]
    #[inline]
    fn alloc_small(&mut self, size: usize) -> Result<NonNull<[u8]>, AllocError> {
        let size = (size + 7) & !0b111;
        let small_size_class = (size >> 3) - 1;
        let mut page = self.pages_direct[small_size_class];
        let page = unsafe { page.as_mut() };
        match page.header.alloc_list.as_mut() {
            Some(&mut block) => {
                let next = unsafe { block.as_ref() }.next.load(Ordering::SeqCst);
                page.header.alloc_list = NonNull::new(next);
                page.header.used.fetch_add(1, Ordering::SeqCst);
                // SAFETY: `block` is a `NonNull`, so the slice pointer will non-null.
                Ok(unsafe {
                    NonNull::new_unchecked(slice_from_raw_parts_mut(block.as_ptr().cast(), size))
                })
            }
            None => self.alloc_generic(size, size),
        }
    }

    /// The slow-path of allocation. Allocates a block of any size and alignment, and does some
    /// book-keeping tasks along the way.
    fn alloc_generic(&mut self, size: usize, align: usize) -> Result<NonNull<[u8]>, AllocError> {
        /*
        let size_class = size_class(size);
        let sentinel: NonNull<PageLink> = NonNull::from(&self.pages[size_class]);
        let mut page = sentinel;
        loop {
            page = unsafe { (*page.as_ptr()).next };

            todo!("{page:#?}")
        }
        */
        todo!("{:p}.alloc_generic({size}, {align})", self)
    }
}

unsafe impl<OS: OSServices> Allocator for Heap<OS> {
    #[inline]
    fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
        // Special-case ZSTs.
        if layout.size() == 0 {
            return Ok(unsafe {
                NonNull::new_unchecked(slice_from_raw_parts_mut(invalid_mut(layout.align()), 0))
            });
        }

        // An objects with an alignment this big would break our invariants (namely, that we can
        // get to the address of the segment by aligning down the address of the object to a 4MiB
        // boundary).
        if layout.align() >= NON_HUGE_SEGMENT_SIZE {
            return Err(AllocError);
        }

        let mut inner = self.0.borrow_mut();
        let size_if_small = layout.size().max(layout.align());
        if size_if_small <= LARGEST_SMALL_OBJECT {
            // Fast path.
            inner.alloc_small(size_if_small)
        } else {
            // Slow path.
            inner.alloc_generic(layout.size(), layout.align())
        }
    }

    #[inline]
    unsafe fn deallocate(&self, ptr: NonNull<u8>, layout: Layout) {
        // Special-case ZSTs.
        if layout.size() == 0 {
            return;
        }

        let self_ptr = self as *const Heap<OS>;
        todo!("{self_ptr:p}.deallocate({ptr:p}, {layout:?})")
    }
}

/// Returns the index in the `pages` array of objects with the given size.
const fn size_class(size: usize) -> usize {
    if size < 8 {
        0
    } else if size > LARGEST_LARGE_OBJECT {
        17
    } else {
        (size.next_power_of_two().trailing_zeros() - 3) as usize
    }
}

#[test]
fn expected_size_class_values() {
    assert_eq!(size_class(1), 0);
    assert_eq!(size_class(8), 0);

    let mut size = 8;
    let mut expected = 0;
    while size <= 512 * 1024 {
        assert_eq!(size_class(size), expected);
        size <<= 1;
        expected += 1;
    }

    assert_eq!(size_class(512 * 1024), 16);
    assert_eq!(size_class((512 * 1024) + 1), 17);
    assert_eq!(size_class(4 * 1024 * 1024), 17);
}

/// A segment is a container for pages, to amortize some of the overhead of allocating new pages.
#[repr(C)]
struct Segment {
    /// The intrusive link in the circular doubly-linked list of segments .
    link: SegmentLink,

    /// The fields other than the intrusive link in a segment's header.
    header: SegmentHeader,
}

/// The intrusive link in the circular doubly-linked list of pages of each size class.
#[derive(Debug)]
struct SegmentLink {
    /// The previous page.
    prev: NonNull<SegmentLink>,

    /// The next page.
    next: NonNull<SegmentLink>,
}

impl SegmentLink {
    #[requires(SegmentLink::self_linked(item))]
    pub unsafe fn add_next(list: NonNull<SegmentLink>, item: NonNull<SegmentLink>) {
        let next = (*list.as_ptr()).next;
        (*item.as_ptr()).next = next;
        (*item.as_ptr()).prev = list;

        (*list.as_ptr()).next = item;
        (*next.as_ptr()).prev = item;
    }

    pub unsafe fn self_linked(ptr: NonNull<SegmentLink>) -> bool {
        let SegmentLink { prev, next } = *ptr.as_ptr();
        ptr == prev && ptr == next
    }
}

/// The fields other than the intrusive link in a segment's header.
struct SegmentHeader {
    /// The ID of the thread.
    thread_id: usize,

    /// The amount to right-shift the offset of an object from the start of the segment by to get
    /// its page index.
    page_shift: u32,
}

/// A page is a block of memory that contains objects of a single size class.
#[repr(C)]
struct Page {
    /// The intrusive link in the circular doubly-linked list of pages of each size class.
    link: PageLink,

    /// The fields other than the intrusive link in a page's header.
    header: PageHeader,
}

unsafe impl Send for Page {}
unsafe impl Sync for Page {}

/// A fake page that causes allocation to go into the "slow path," so that we can allocate an
/// initial page of that size class.
static EMPTY_PAGE: Page = unsafe {
    Page {
        link: PageLink {
            prev: NonNull::new_unchecked(&EMPTY_PAGE.link as *const PageLink as *mut PageLink),
            next: NonNull::new_unchecked(&EMPTY_PAGE.link as *const PageLink as *mut PageLink),
        },
        header: PageHeader {
            alloc_list: None,
            local_free_list: None,
            thread_free_list: AtomicPtr::new(null_mut()),
            used: AtomicU16::new(0),
            thread_freed: AtomicU16::new(0),
        },
    }
};

/// The intrusive link in the circular doubly-linked list of pages of each size class.
#[derive(Debug)]
struct PageLink {
    /// The previous page.
    prev: NonNull<PageLink>,

    /// The next page.
    next: NonNull<PageLink>,
}

/// The fields other than the intrusive link in a page's header.
struct PageHeader {
    /// The free list that the owning thread pops from.
    alloc_list: Option<NonNull<Block>>,

    /// The free list that the owning thread pushes onto.
    local_free_list: Option<NonNull<Block>>,

    /// The free list that other threads push onto.
    thread_free_list: AtomicPtr<Block>,

    /// The number of objects in the page that have been allocated.
    used: AtomicU16,

    /// The number of objects in the thread free list.
    thread_freed: AtomicU16,
}

/// An object in a free list.
struct Block {
    next: AtomicPtr<Block>,
}

fn atomic_push(node: &mut Block, list: &AtomicPtr<Block>) {
    loop {
        let old_list = list.load(Ordering::Acquire);
        node.next = AtomicPtr::from(old_list);
        if list
            .compare_exchange(old_list, node, Ordering::Release, Ordering::Acquire)
            .is_ok()
        {
            break;
        }
    }
}

/// The OS services the allocator needs.
///
/// # Safety
///
/// All methods must be implemented according to their doc comments.
pub unsafe trait OSServices {
    /// Returns the ID of the current thread. This can be arbitrary, so long as it is consistent
    /// within a thread and no two threads with the same thread ID run at the same time.
    fn current_thread_id() -> usize;
}