chumsky/
recursive.rs

1//! Recursive parsers (parser that include themselves within their patterns).
2//!
3//! *“It's unpleasantly like being drunk."
4//! "What's so unpleasant about being drunk?"
5//! "You ask a glass of water.”*
6//!
7//! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and
8//! definition of parsers more corefully, particularly for mutually-recursive parsers. In such cases, the functions on
9//! [`Recursive`] allow for this.
10
11use super::*;
12
13use alloc::rc::{Rc, Weak};
14
15// TODO: Remove when `OnceCell` is stable
16struct OnceCell<T>(core::cell::RefCell<Option<T>>);
17impl<T> OnceCell<T> {
18    pub fn new() -> Self {
19        Self(core::cell::RefCell::new(None))
20    }
21    pub fn set(&self, x: T) -> Result<(), ()> {
22        let mut inner = self.0.try_borrow_mut().map_err(|_| ())?;
23
24        if inner.is_none() {
25            *inner = Some(x);
26            Ok(())
27        } else {
28            Err(())
29        }
30    }
31    pub fn get(&self) -> Option<core::cell::Ref<T>> {
32        Some(core::cell::Ref::map(self.0.borrow(), |x| {
33            x.as_ref().unwrap()
34        }))
35    }
36}
37
38enum RecursiveInner<T> {
39    Owned(Rc<T>),
40    Unowned(Weak<T>),
41}
42
43type OnceParser<'a, I, O, E> = OnceCell<Box<dyn Parser<I, O, Error = E> + 'a>>;
44
45/// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its
46/// [definition](Recursive::define).
47///
48/// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
49#[must_use]
50pub struct Recursive<'a, I, O, E: Error<I>>(RecursiveInner<OnceParser<'a, I, O, E>>);
51
52impl<'a, I: Clone, O, E: Error<I>> Recursive<'a, I, O, E> {
53    fn cell(&self) -> Rc<OnceParser<'a, I, O, E>> {
54        match &self.0 {
55            RecursiveInner::Owned(x) => x.clone(),
56            RecursiveInner::Unowned(x) => x
57                .upgrade()
58                .expect("Recursive parser used before being defined"),
59        }
60    }
61
62    /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before
63    /// being fulled defined.
64    ///
65    /// Declaring a parser before defining it is required for a parser to reference itself.
66    ///
67    /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser
68    /// for parsing (i.e: via the [`Parser::parse`] method or similar).
69    ///
70    /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if
71    /// possible.
72    ///
73    /// # Examples
74    ///
75    /// ```
76    /// # use chumsky::prelude::*;
77    /// #[derive(Debug, PartialEq)]
78    /// enum Chain {
79    ///     End,
80    ///     Link(char, Box<Chain>),
81    /// }
82    ///
83    /// // Declare the existence of the parser before defining it so that it can reference itself
84    /// let mut chain = Recursive::<_, _, Simple<char>>::declare();
85    ///
86    /// // Define the parser in terms of itself.
87    /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list
88    /// chain.define(just('+')
89    ///     .then(chain.clone())
90    ///     .map(|(c, chain)| Chain::Link(c, Box::new(chain)))
91    ///     .or_not()
92    ///     .map(|chain| chain.unwrap_or(Chain::End)));
93    ///
94    /// assert_eq!(chain.parse(""), Ok(Chain::End));
95    /// assert_eq!(
96    ///     chain.parse("++"),
97    ///     Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
98    /// );
99    /// ```
100    pub fn declare() -> Self {
101        Recursive(RecursiveInner::Owned(Rc::new(OnceCell::new())))
102    }
103
104    /// Defines the parser after declaring it, allowing it to be used for parsing.
105    pub fn define<P: Parser<I, O, Error = E> + 'a>(&mut self, parser: P) {
106        self.cell()
107            .set(Box::new(parser))
108            .unwrap_or_else(|_| panic!("Parser defined more than once"));
109    }
110}
111
112impl<'a, I: Clone, O, E: Error<I>> Clone for Recursive<'a, I, O, E> {
113    fn clone(&self) -> Self {
114        Self(match &self.0 {
115            RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
116            RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
117        })
118    }
119}
120
121impl<'a, I: Clone, O, E: Error<I>> Parser<I, O> for Recursive<'a, I, O, E> {
122    type Error = E;
123
124    fn parse_inner<D: Debugger>(
125        &self,
126        debugger: &mut D,
127        stream: &mut StreamOf<I, Self::Error>,
128    ) -> PResult<I, O, Self::Error> {
129        #[cfg(feature = "stacker")]
130        #[inline(always)]
131        fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
132            stacker::maybe_grow(1024 * 1024, 1024 * 1024, f)
133        }
134        #[cfg(not(feature = "stacker"))]
135        #[inline(always)]
136        fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
137            f()
138        }
139
140        recurse(|| {
141            #[allow(deprecated)]
142            debugger.invoke(
143                self.cell()
144                    .get()
145                    .expect("Recursive parser used before being defined")
146                    .as_ref(),
147                stream,
148            )
149        })
150    }
151
152    fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
153        #[allow(deprecated)]
154        self.parse_inner(d, s)
155    }
156    fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
157        #[allow(deprecated)]
158        self.parse_inner(d, s)
159    }
160}
161
162/// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern).
163///
164/// The given function must create the parser. The parser must not be used to parse input before this function returns.
165///
166/// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`].
167///
168/// The output type of this parser is `O`, the same as the inner parser.
169///
170/// # Examples
171///
172/// ```
173/// # use chumsky::prelude::*;
174/// #[derive(Debug, PartialEq)]
175/// enum Tree {
176///     Leaf(String),
177///     Branch(Vec<Tree>),
178/// }
179///
180/// // Parser that recursively parses nested lists
181/// let tree = recursive::<_, _, _, _, Simple<char>>(|tree| tree
182///     .separated_by(just(','))
183///     .delimited_by(just('['), just(']'))
184///     .map(Tree::Branch)
185///     .or(text::ident().map(Tree::Leaf))
186///     .padded());
187///
188/// assert_eq!(tree.parse("hello"), Ok(Tree::Leaf("hello".to_string())));
189/// assert_eq!(tree.parse("[a, b, c]"), Ok(Tree::Branch(vec![
190///     Tree::Leaf("a".to_string()),
191///     Tree::Leaf("b".to_string()),
192///     Tree::Leaf("c".to_string()),
193/// ])));
194/// // The parser can deal with arbitrarily complex nested lists
195/// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]"), Ok(Tree::Branch(vec![
196///     Tree::Branch(vec![
197///         Tree::Leaf("a".to_string()),
198///         Tree::Leaf("b".to_string()),
199///     ]),
200///     Tree::Leaf("c".to_string()),
201///     Tree::Branch(vec![
202///         Tree::Leaf("d".to_string()),
203///         Tree::Branch(vec![
204///             Tree::Leaf("e".to_string()),
205///             Tree::Leaf("f".to_string()),
206///         ]),
207///     ]),
208/// ])));
209/// ```
210pub fn recursive<
211    'a,
212    I: Clone,
213    O,
214    P: Parser<I, O, Error = E> + 'a,
215    F: FnOnce(Recursive<'a, I, O, E>) -> P,
216    E: Error<I>,
217>(
218    f: F,
219) -> Recursive<'a, I, O, E> {
220    let mut parser = Recursive::declare();
221    parser.define(f(Recursive(match &parser.0 {
222        RecursiveInner::Owned(x) => RecursiveInner::Unowned(Rc::downgrade(x)),
223        RecursiveInner::Unowned(_) => unreachable!(),
224    })));
225    parser
226}