stacker/
lib.rs

1//! A library to help grow the stack when it runs out of space.
2//!
3//! This is an implementation of manually instrumented segmented stacks where points in a program's
4//! control flow are annotated with "maybe grow the stack here". Each point of annotation indicates
5//! how far away from the end of the stack it's allowed to be, plus the amount of stack to allocate
6//! if it does reach the end.
7//!
8//! Once a program has reached the end of its stack, a temporary stack on the heap is allocated and
9//! is switched to for the duration of a closure.
10//!
11//! For a set of lower-level primitives, consider the `psm` crate.
12//!
13//! # Examples
14//!
15//! ```
16//! // Grow the stack if we are within the "red zone" of 32K, and if we allocate
17//! // a new stack allocate 1MB of stack space.
18//! //
19//! // If we're already in bounds, just run the provided closure on current stack.
20//! stacker::maybe_grow(32 * 1024, 1024 * 1024, || {
21//!     // guaranteed to have at least 32K of stack
22//! });
23//! ```
24
25#![allow(improper_ctypes)]
26
27#[macro_use]
28extern crate cfg_if;
29extern crate libc;
30#[cfg(windows)]
31extern crate windows_sys;
32#[macro_use]
33extern crate psm;
34
35use std::cell::Cell;
36
37/// Grows the call stack if necessary.
38///
39/// This function is intended to be called at manually instrumented points in a program where
40/// recursion is known to happen quite a bit. This function will check to see if we're within
41/// `red_zone` bytes of the end of the stack, and if so it will allocate a new stack of at least
42/// `stack_size` bytes.
43///
44/// The closure `f` is guaranteed to run on a stack with at least `red_zone` bytes, and it will be
45/// run on the current stack if there's space available.
46#[inline(always)]
47pub fn maybe_grow<R, F: FnOnce() -> R>(red_zone: usize, stack_size: usize, callback: F) -> R {
48    // if we can't guess the remaining stack (unsupported on some platforms) we immediately grow
49    // the stack and then cache the new stack size (which we do know now because we allocated it.
50    let enough_space = match remaining_stack() {
51        Some(remaining) => remaining >= red_zone,
52        None => false,
53    };
54    if enough_space {
55        callback()
56    } else {
57        grow(stack_size, callback)
58    }
59}
60
61/// Always creates a new stack for the passed closure to run on.
62/// The closure will still be on the same thread as the caller of `grow`.
63/// This will allocate a new stack with at least `stack_size` bytes.
64pub fn grow<R, F: FnOnce() -> R>(stack_size: usize, callback: F) -> R {
65    // To avoid monomorphizing `_grow()` and everything it calls,
66    // we convert the generic callback to a dynamic one.
67    let mut opt_callback = Some(callback);
68    let mut ret = None;
69    let ret_ref = &mut ret;
70
71    // This wrapper around `callback` achieves two things:
72    // * It converts the `impl FnOnce` to a `dyn FnMut`.
73    //   `dyn` because we want it to not be generic, and
74    //   `FnMut` because we can't pass a `dyn FnOnce` around without boxing it.
75    // * It eliminates the generic return value, by writing it to the stack of this function.
76    //   Otherwise the closure would have to return an unsized value, which isn't possible.
77    let dyn_callback: &mut dyn FnMut() = &mut || {
78        let taken_callback = opt_callback.take().unwrap();
79        *ret_ref = Some(taken_callback());
80    };
81
82    _grow(stack_size, dyn_callback);
83    ret.unwrap()
84}
85
86/// Queries the amount of remaining stack as interpreted by this library.
87///
88/// This function will return the amount of stack space left which will be used
89/// to determine whether a stack switch should be made or not.
90pub fn remaining_stack() -> Option<usize> {
91    let current_ptr = current_stack_ptr();
92    get_stack_limit().map(|limit| current_ptr - limit)
93}
94
95psm_stack_information!(
96    yes {
97        fn current_stack_ptr() -> usize {
98            psm::stack_pointer() as usize
99        }
100    }
101    no {
102        #[inline(always)]
103        fn current_stack_ptr() -> usize {
104            unsafe {
105                let mut x = std::mem::MaybeUninit::<u8>::uninit();
106                // Unlikely to be ever exercised. As a fallback we execute a volatile read to a
107                // local (to hopefully defeat the optimisations that would make this local a static
108                // global) and take its address. This way we get a very approximate address of the
109                // current frame.
110                x.as_mut_ptr().write_volatile(42);
111                x.as_ptr() as usize
112            }
113        }
114    }
115);
116
117thread_local! {
118    static STACK_LIMIT: Cell<Option<usize>> = Cell::new(unsafe {
119        guess_os_stack_limit()
120    })
121}
122
123#[inline(always)]
124fn get_stack_limit() -> Option<usize> {
125    STACK_LIMIT.with(|s| s.get())
126}
127
128#[inline(always)]
129#[allow(unused)]
130fn set_stack_limit(l: Option<usize>) {
131    STACK_LIMIT.with(|s| s.set(l))
132}
133
134psm_stack_manipulation! {
135    yes {
136        struct StackRestoreGuard {
137            new_stack: *mut std::ffi::c_void,
138            stack_bytes: usize,
139            old_stack_limit: Option<usize>,
140        }
141
142        impl StackRestoreGuard {
143            #[cfg(target_arch = "wasm32")]
144            unsafe fn new(stack_bytes: usize, _page_size: usize) -> StackRestoreGuard {
145                let layout = std::alloc::Layout::from_size_align(stack_bytes, 16).unwrap();
146                let ptr = std::alloc::alloc(layout);
147                assert!(!ptr.is_null(), "unable to allocate stack");
148                StackRestoreGuard {
149                    new_stack: ptr as *mut _,
150                    stack_bytes,
151                    old_stack_limit: get_stack_limit(),
152                }
153            }
154
155            #[cfg(not(target_arch = "wasm32"))]
156            unsafe fn new(stack_bytes: usize, page_size: usize) -> StackRestoreGuard {
157                let new_stack = libc::mmap(
158                    std::ptr::null_mut(),
159                    stack_bytes,
160                    libc::PROT_NONE,
161                    libc::MAP_PRIVATE |
162                    libc::MAP_ANON,
163                    -1, // Some implementations assert fd = -1 if MAP_ANON is specified
164                    0
165                );
166                if new_stack == libc::MAP_FAILED {
167                    let error = std::io::Error::last_os_error();
168                    panic!("allocating stack failed with: {}", error)
169                }
170                let guard = StackRestoreGuard {
171                    new_stack,
172                    stack_bytes,
173                    old_stack_limit: get_stack_limit(),
174                };
175                let above_guard_page = new_stack.add(page_size);
176                #[cfg(not(target_os = "openbsd"))]
177                let result = libc::mprotect(
178                    above_guard_page,
179                    stack_bytes - page_size,
180                    libc::PROT_READ | libc::PROT_WRITE
181                );
182                #[cfg(target_os = "openbsd")]
183                let result = if libc::mmap(
184                        above_guard_page,
185                        stack_bytes - page_size,
186                        libc::PROT_READ | libc::PROT_WRITE,
187                        libc::MAP_FIXED | libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_STACK,
188                        -1,
189                        0) == above_guard_page {
190                    0
191                } else {
192                    -1
193                };
194                if result == -1 {
195                    let error = std::io::Error::last_os_error();
196                    drop(guard);
197                    panic!("setting stack permissions failed with: {}", error)
198                }
199                guard
200            }
201        }
202
203        impl Drop for StackRestoreGuard {
204            fn drop(&mut self) {
205                #[cfg(target_arch = "wasm32")]
206                unsafe {
207                    std::alloc::dealloc(
208                        self.new_stack as *mut u8,
209                        std::alloc::Layout::from_size_align_unchecked(self.stack_bytes, 16),
210                    );
211                }
212                #[cfg(not(target_arch = "wasm32"))]
213                unsafe {
214                    // FIXME: check the error code and decide what to do with it.
215                    // Perhaps a debug_assertion?
216                    libc::munmap(self.new_stack, self.stack_bytes);
217                }
218                set_stack_limit(self.old_stack_limit);
219            }
220        }
221
222        fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
223            // Calculate a number of pages we want to allocate for the new stack.
224            // For maximum portability we want to produce a stack that is aligned to a page and has
225            // a size that’s a multiple of page size. Furthermore we want to allocate two extras pages
226            // for the stack guard. To achieve that we do our calculations in number of pages and
227            // convert to bytes last.
228            let page_size = page_size();
229            let requested_pages = stack_size
230                .checked_add(page_size - 1)
231                .expect("unreasonably large stack requested") / page_size;
232            let stack_pages = std::cmp::max(1, requested_pages) + 2;
233            let stack_bytes = stack_pages.checked_mul(page_size)
234                .expect("unreasonably large stack requested");
235
236            // Next, there are a couple of approaches to how we allocate the new stack. We take the
237            // most obvious path and use `mmap`. We also `mprotect` a guard page into our
238            // allocation.
239            //
240            // We use a guard pattern to ensure we deallocate the allocated stack when we leave
241            // this function and also try to uphold various safety invariants required by `psm`
242            // (such as not unwinding from the callback we pass to it).
243            //
244            // Other than that this code has no meaningful gotchas.
245            unsafe {
246                let guard = StackRestoreGuard::new(stack_bytes, page_size);
247                let above_guard_page = guard.new_stack.add(page_size);
248                set_stack_limit(Some(above_guard_page as usize));
249                let panic = psm::on_stack(above_guard_page as *mut _, stack_size, move || {
250                    std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback)).err()
251                });
252                drop(guard);
253                if let Some(p) = panic {
254                    std::panic::resume_unwind(p);
255                }
256            }
257        }
258
259        fn page_size() -> usize {
260            // FIXME: consider caching the page size.
261            #[cfg(not(target_arch = "wasm32"))]
262            unsafe { libc::sysconf(libc::_SC_PAGE_SIZE) as usize }
263            #[cfg(target_arch = "wasm32")]
264            { 65536 }
265        }
266    }
267
268    no {
269        #[cfg(not(windows))]
270        fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
271            drop(stack_size);
272            callback();
273        }
274    }
275}
276
277cfg_if! {
278    if #[cfg(windows)] {
279        use std::ptr;
280        use std::io;
281        use libc::c_void;
282        use windows_sys::Win32::System::Threading::{SwitchToFiber, IsThreadAFiber, ConvertThreadToFiber,
283            CreateFiber, DeleteFiber, ConvertFiberToThread, SetThreadStackGuarantee
284        };
285        use windows_sys::Win32::Foundation::BOOL;
286        use windows_sys::Win32::System::Memory::VirtualQuery;
287
288        // Make sure the libstacker.a (implemented in C) is linked.
289        // See https://github.com/rust-lang/rust/issues/65610
290        #[link(name="stacker")]
291        extern {
292            fn __stacker_get_current_fiber() -> *mut c_void;
293        }
294
295        struct FiberInfo<F> {
296            callback: std::mem::MaybeUninit<F>,
297            panic: Option<Box<dyn std::any::Any + Send + 'static>>,
298            parent_fiber: *mut c_void,
299        }
300
301        unsafe extern "system" fn fiber_proc<F: FnOnce()>(data: *mut c_void) {
302            // This function is the entry point to our inner fiber, and as argument we get an
303            // instance of `FiberInfo`. We will set-up the "runtime" for the callback and execute
304            // it.
305            let data = &mut *(data as *mut FiberInfo<F>);
306            let old_stack_limit = get_stack_limit();
307            set_stack_limit(guess_os_stack_limit());
308            let callback = data.callback.as_ptr();
309            data.panic = std::panic::catch_unwind(std::panic::AssertUnwindSafe(callback.read())).err();
310
311            // Restore to the previous Fiber
312            set_stack_limit(old_stack_limit);
313            SwitchToFiber(data.parent_fiber);
314        }
315
316        fn _grow(stack_size: usize, callback: &mut dyn FnMut()) {
317            // Fibers (or stackful coroutines) is the only official way to create new stacks on the
318            // same thread on Windows. So in order to extend the stack we create fiber and switch
319            // to it so we can use it's stack. After running `callback` within our fiber, we switch
320            // back to the current stack and destroy the fiber and its associated stack.
321            unsafe {
322                let was_fiber = IsThreadAFiber() == 1 as BOOL;
323                let mut data = FiberInfo {
324                    callback: std::mem::MaybeUninit::new(callback),
325                    panic: None,
326                    parent_fiber: {
327                        if was_fiber {
328                            // Get a handle to the current fiber. We need to use a C implementation
329                            // for this as GetCurrentFiber is an header only function.
330                            __stacker_get_current_fiber()
331                        } else {
332                            // Convert the current thread to a fiber, so we are able to switch back
333                            // to the current stack. Threads coverted to fibers still act like
334                            // regular threads, but they have associated fiber data. We later
335                            // convert it back to a regular thread and free the fiber data.
336                            ConvertThreadToFiber(ptr::null_mut())
337                        }
338                    },
339                };
340
341                if data.parent_fiber.is_null() {
342                    panic!("unable to convert thread to fiber: {}", io::Error::last_os_error());
343                }
344
345                let fiber = CreateFiber(
346                    stack_size as usize,
347                    Some(fiber_proc::<&mut dyn FnMut()>),
348                    &mut data as *mut FiberInfo<&mut dyn FnMut()> as *mut _,
349                );
350                if fiber.is_null() {
351                    panic!("unable to allocate fiber: {}", io::Error::last_os_error());
352                }
353
354                // Switch to the fiber we created. This changes stacks and starts executing
355                // fiber_proc on it. fiber_proc will run `callback` and then switch back to run the
356                // next statement.
357                SwitchToFiber(fiber);
358                DeleteFiber(fiber);
359
360                // Clean-up.
361                if !was_fiber && ConvertFiberToThread() == 0 {
362                        // FIXME: Perhaps should not panic here?
363                        panic!("unable to convert back to thread: {}", io::Error::last_os_error());
364                }
365
366                if let Some(p) = data.panic {
367                    std::panic::resume_unwind(p);
368                }
369            }
370        }
371
372        #[inline(always)]
373        fn get_thread_stack_guarantee() -> usize {
374            let min_guarantee = if cfg!(target_pointer_width = "32") {
375                0x1000
376            } else {
377                0x2000
378            };
379            let mut stack_guarantee = 0;
380            unsafe {
381                // Read the current thread stack guarantee
382                // This is the stack reserved for stack overflow
383                // exception handling.
384                // This doesn't return the true value so we need
385                // some further logic to calculate the real stack
386                // guarantee. This logic is what is used on x86-32 and
387                // x86-64 Windows 10. Other versions and platforms may differ
388                SetThreadStackGuarantee(&mut stack_guarantee)
389            };
390            std::cmp::max(stack_guarantee, min_guarantee) as usize + 0x1000
391        }
392
393        #[inline(always)]
394        unsafe fn guess_os_stack_limit() -> Option<usize> {
395            // Query the allocation which contains our stack pointer in order
396            // to discover the size of the stack
397            //
398            // FIXME: we could read stack base from the TIB, specifically the 3rd element of it.
399            type QueryT = windows_sys::Win32::System::Memory::MEMORY_BASIC_INFORMATION;
400            let mut mi = std::mem::MaybeUninit::<QueryT>::uninit();
401            VirtualQuery(
402                psm::stack_pointer() as *const _,
403                mi.as_mut_ptr(),
404                std::mem::size_of::<QueryT>() as usize,
405            );
406            Some(mi.assume_init().AllocationBase as usize + get_thread_stack_guarantee() + 0x1000)
407        }
408    } else if #[cfg(any(target_os = "linux", target_os="solaris", target_os = "netbsd"))] {
409        unsafe fn guess_os_stack_limit() -> Option<usize> {
410            let mut attr = std::mem::MaybeUninit::<libc::pthread_attr_t>::uninit();
411            assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
412            assert_eq!(libc::pthread_getattr_np(libc::pthread_self(),
413                                                attr.as_mut_ptr()), 0);
414            let mut stackaddr = std::ptr::null_mut();
415            let mut stacksize = 0;
416            assert_eq!(libc::pthread_attr_getstack(
417                attr.as_ptr(), &mut stackaddr, &mut stacksize
418            ), 0);
419            assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
420            Some(stackaddr as usize)
421        }
422    } else if #[cfg(any(target_os = "freebsd", target_os = "dragonfly", target_os = "illumos"))] {
423        unsafe fn guess_os_stack_limit() -> Option<usize> {
424            let mut attr = std::mem::MaybeUninit::<libc::pthread_attr_t>::uninit();
425            assert_eq!(libc::pthread_attr_init(attr.as_mut_ptr()), 0);
426            assert_eq!(libc::pthread_attr_get_np(libc::pthread_self(), attr.as_mut_ptr()), 0);
427            let mut stackaddr = std::ptr::null_mut();
428            let mut stacksize = 0;
429            assert_eq!(libc::pthread_attr_getstack(
430                attr.as_ptr(), &mut stackaddr, &mut stacksize
431            ), 0);
432            assert_eq!(libc::pthread_attr_destroy(attr.as_mut_ptr()), 0);
433            Some(stackaddr as usize)
434        }
435    } else if #[cfg(target_os = "openbsd")] {
436        unsafe fn guess_os_stack_limit() -> Option<usize> {
437            let mut stackinfo = std::mem::MaybeUninit::<libc::stack_t>::uninit();
438            assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), stackinfo.as_mut_ptr()), 0);
439            Some(stackinfo.assume_init().ss_sp as usize - stackinfo.assume_init().ss_size)
440        }
441    } else if #[cfg(target_os = "macos")] {
442        unsafe fn guess_os_stack_limit() -> Option<usize> {
443            Some(libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize -
444                libc::pthread_get_stacksize_np(libc::pthread_self()) as usize)
445        }
446    } else {
447        // fallback for other platforms is to always increase the stack if we're on
448        // the root stack. After we increased the stack once, we know the new stack
449        // size and don't need this pessimization anymore
450        #[inline(always)]
451        unsafe fn guess_os_stack_limit() -> Option<usize> {
452            None
453        }
454    }
455}