chumsky/recursive.rs
1//! Recursive parsers (parser that include themselves within their patterns).
2//!
3//! *“It's unpleasantly like being drunk."
4//! "What's so unpleasant about being drunk?"
5//! "You ask a glass of water.”*
6//!
7//! The [`recursive()`] function covers most cases, but sometimes it's necessary to manually control the declaration and
8//! definition of parsers more corefully, particularly for mutually-recursive parsers. In such cases, the functions on
9//! [`Recursive`] allow for this.
10
11use super::*;
12
13use alloc::rc::{Rc, Weak};
14
15// TODO: Remove when `OnceCell` is stable
16struct OnceCell<T>(core::cell::RefCell<Option<T>>);
17impl<T> OnceCell<T> {
18 pub fn new() -> Self {
19 Self(core::cell::RefCell::new(None))
20 }
21 pub fn set(&self, x: T) -> Result<(), ()> {
22 let mut inner = self.0.try_borrow_mut().map_err(|_| ())?;
23
24 if inner.is_none() {
25 *inner = Some(x);
26 Ok(())
27 } else {
28 Err(())
29 }
30 }
31 pub fn get(&self) -> Option<core::cell::Ref<T>> {
32 Some(core::cell::Ref::map(self.0.borrow(), |x| {
33 x.as_ref().unwrap()
34 }))
35 }
36}
37
38enum RecursiveInner<T> {
39 Owned(Rc<T>),
40 Unowned(Weak<T>),
41}
42
43type OnceParser<'a, I, O, E> = OnceCell<Box<dyn Parser<I, O, Error = E> + 'a>>;
44
45/// A parser that can be defined in terms of itself by separating its [declaration](Recursive::declare) from its
46/// [definition](Recursive::define).
47///
48/// Prefer to use [`recursive()`], which exists as a convenient wrapper around both operations, if possible.
49#[must_use]
50pub struct Recursive<'a, I, O, E: Error<I>>(RecursiveInner<OnceParser<'a, I, O, E>>);
51
52impl<'a, I: Clone, O, E: Error<I>> Recursive<'a, I, O, E> {
53 fn cell(&self) -> Rc<OnceParser<'a, I, O, E>> {
54 match &self.0 {
55 RecursiveInner::Owned(x) => x.clone(),
56 RecursiveInner::Unowned(x) => x
57 .upgrade()
58 .expect("Recursive parser used before being defined"),
59 }
60 }
61
62 /// Declare the existence of a recursive parser, allowing it to be used to construct parser combinators before
63 /// being fulled defined.
64 ///
65 /// Declaring a parser before defining it is required for a parser to reference itself.
66 ///
67 /// This should be followed by **exactly one** call to the [`Recursive::define`] method prior to using the parser
68 /// for parsing (i.e: via the [`Parser::parse`] method or similar).
69 ///
70 /// Prefer to use [`recursive()`], which is a convenient wrapper around this method and [`Recursive::define`], if
71 /// possible.
72 ///
73 /// # Examples
74 ///
75 /// ```
76 /// # use chumsky::prelude::*;
77 /// #[derive(Debug, PartialEq)]
78 /// enum Chain {
79 /// End,
80 /// Link(char, Box<Chain>),
81 /// }
82 ///
83 /// // Declare the existence of the parser before defining it so that it can reference itself
84 /// let mut chain = Recursive::<_, _, Simple<char>>::declare();
85 ///
86 /// // Define the parser in terms of itself.
87 /// // In this case, the parser parses a right-recursive list of '+' into a singly linked list
88 /// chain.define(just('+')
89 /// .then(chain.clone())
90 /// .map(|(c, chain)| Chain::Link(c, Box::new(chain)))
91 /// .or_not()
92 /// .map(|chain| chain.unwrap_or(Chain::End)));
93 ///
94 /// assert_eq!(chain.parse(""), Ok(Chain::End));
95 /// assert_eq!(
96 /// chain.parse("++"),
97 /// Ok(Chain::Link('+', Box::new(Chain::Link('+', Box::new(Chain::End))))),
98 /// );
99 /// ```
100 pub fn declare() -> Self {
101 Recursive(RecursiveInner::Owned(Rc::new(OnceCell::new())))
102 }
103
104 /// Defines the parser after declaring it, allowing it to be used for parsing.
105 pub fn define<P: Parser<I, O, Error = E> + 'a>(&mut self, parser: P) {
106 self.cell()
107 .set(Box::new(parser))
108 .unwrap_or_else(|_| panic!("Parser defined more than once"));
109 }
110}
111
112impl<'a, I: Clone, O, E: Error<I>> Clone for Recursive<'a, I, O, E> {
113 fn clone(&self) -> Self {
114 Self(match &self.0 {
115 RecursiveInner::Owned(x) => RecursiveInner::Owned(x.clone()),
116 RecursiveInner::Unowned(x) => RecursiveInner::Unowned(x.clone()),
117 })
118 }
119}
120
121impl<'a, I: Clone, O, E: Error<I>> Parser<I, O> for Recursive<'a, I, O, E> {
122 type Error = E;
123
124 fn parse_inner<D: Debugger>(
125 &self,
126 debugger: &mut D,
127 stream: &mut StreamOf<I, Self::Error>,
128 ) -> PResult<I, O, Self::Error> {
129 #[cfg(feature = "stacker")]
130 #[inline(always)]
131 fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
132 stacker::maybe_grow(1024 * 1024, 1024 * 1024, f)
133 }
134 #[cfg(not(feature = "stacker"))]
135 #[inline(always)]
136 fn recurse<R, F: FnOnce() -> R>(f: F) -> R {
137 f()
138 }
139
140 recurse(|| {
141 #[allow(deprecated)]
142 debugger.invoke(
143 self.cell()
144 .get()
145 .expect("Recursive parser used before being defined")
146 .as_ref(),
147 stream,
148 )
149 })
150 }
151
152 fn parse_inner_verbose(&self, d: &mut Verbose, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
153 #[allow(deprecated)]
154 self.parse_inner(d, s)
155 }
156 fn parse_inner_silent(&self, d: &mut Silent, s: &mut StreamOf<I, E>) -> PResult<I, O, E> {
157 #[allow(deprecated)]
158 self.parse_inner(d, s)
159 }
160}
161
162/// Construct a recursive parser (i.e: a parser that may contain itself as part of its pattern).
163///
164/// The given function must create the parser. The parser must not be used to parse input before this function returns.
165///
166/// This is a wrapper around [`Recursive::declare`] and [`Recursive::define`].
167///
168/// The output type of this parser is `O`, the same as the inner parser.
169///
170/// # Examples
171///
172/// ```
173/// # use chumsky::prelude::*;
174/// #[derive(Debug, PartialEq)]
175/// enum Tree {
176/// Leaf(String),
177/// Branch(Vec<Tree>),
178/// }
179///
180/// // Parser that recursively parses nested lists
181/// let tree = recursive::<_, _, _, _, Simple<char>>(|tree| tree
182/// .separated_by(just(','))
183/// .delimited_by(just('['), just(']'))
184/// .map(Tree::Branch)
185/// .or(text::ident().map(Tree::Leaf))
186/// .padded());
187///
188/// assert_eq!(tree.parse("hello"), Ok(Tree::Leaf("hello".to_string())));
189/// assert_eq!(tree.parse("[a, b, c]"), Ok(Tree::Branch(vec![
190/// Tree::Leaf("a".to_string()),
191/// Tree::Leaf("b".to_string()),
192/// Tree::Leaf("c".to_string()),
193/// ])));
194/// // The parser can deal with arbitrarily complex nested lists
195/// assert_eq!(tree.parse("[[a, b], c, [d, [e, f]]]"), Ok(Tree::Branch(vec![
196/// Tree::Branch(vec![
197/// Tree::Leaf("a".to_string()),
198/// Tree::Leaf("b".to_string()),
199/// ]),
200/// Tree::Leaf("c".to_string()),
201/// Tree::Branch(vec![
202/// Tree::Leaf("d".to_string()),
203/// Tree::Branch(vec![
204/// Tree::Leaf("e".to_string()),
205/// Tree::Leaf("f".to_string()),
206/// ]),
207/// ]),
208/// ])));
209/// ```
210pub fn recursive<
211 'a,
212 I: Clone,
213 O,
214 P: Parser<I, O, Error = E> + 'a,
215 F: FnOnce(Recursive<'a, I, O, E>) -> P,
216 E: Error<I>,
217>(
218 f: F,
219) -> Recursive<'a, I, O, E> {
220 let mut parser = Recursive::declare();
221 parser.define(f(Recursive(match &parser.0 {
222 RecursiveInner::Owned(x) => RecursiveInner::Unowned(Rc::downgrade(x)),
223 RecursiveInner::Unowned(_) => unreachable!(),
224 })));
225 parser
226}