oxttl/toolkit/
parser.rs

1use crate::toolkit::error::{TurtleParseError, TurtleSyntaxError};
2use crate::toolkit::lexer::{Lexer, TokenOrLineJump, TokenRecognizer};
3use std::io::Read;
4use std::ops::Deref;
5#[cfg(feature = "async-tokio")]
6use tokio::io::AsyncRead;
7
8pub trait RuleRecognizer: Sized {
9    type TokenRecognizer: TokenRecognizer;
10    type Output;
11    type Context;
12
13    fn error_recovery_state(self) -> Self;
14
15    fn recognize_next(
16        self,
17        token: TokenOrLineJump<<Self::TokenRecognizer as TokenRecognizer>::Token<'_>>,
18        context: &mut Self::Context,
19        results: &mut Vec<Self::Output>,
20        errors: &mut Vec<RuleRecognizerError>,
21    ) -> Self;
22
23    fn recognize_end(
24        self,
25        context: &mut Self::Context,
26        results: &mut Vec<Self::Output>,
27        errors: &mut Vec<RuleRecognizerError>,
28    );
29
30    fn lexer_options(
31        context: &Self::Context,
32    ) -> &<Self::TokenRecognizer as TokenRecognizer>::Options;
33}
34
35pub struct RuleRecognizerError {
36    pub message: String,
37}
38
39impl<S: Into<String>> From<S> for RuleRecognizerError {
40    fn from(message: S) -> Self {
41        Self {
42            message: message.into(),
43        }
44    }
45}
46
47#[allow(clippy::partial_pub_fields)]
48pub struct Parser<B, RR: RuleRecognizer> {
49    lexer: Lexer<B, RR::TokenRecognizer>,
50    state: Option<RR>,
51    pub context: RR::Context,
52    results: Vec<RR::Output>,
53    errors: Vec<RuleRecognizerError>,
54}
55
56impl<B, RR: RuleRecognizer> Parser<B, RR> {
57    pub fn new(lexer: Lexer<B, RR::TokenRecognizer>, recognizer: RR, context: RR::Context) -> Self {
58        Self {
59            lexer,
60            state: Some(recognizer),
61            context,
62            results: vec![],
63            errors: vec![],
64        }
65    }
66}
67
68impl<B: Deref<Target = [u8]>, RR: RuleRecognizer> Parser<B, RR> {
69    #[inline]
70    pub fn is_end(&self) -> bool {
71        self.state.is_none() && self.results.is_empty() && self.errors.is_empty()
72    }
73
74    pub fn parse_next(&mut self) -> Option<Result<RR::Output, TurtleSyntaxError>> {
75        loop {
76            if let Some(error) = self.errors.pop() {
77                return Some(Err(TurtleSyntaxError::new(
78                    self.lexer.last_token_location(),
79                    error
80                        .message
81                        .replace("TOKEN", &self.lexer.last_token_source()),
82                )));
83            }
84            if let Some(result) = self.results.pop() {
85                return Some(Ok(result));
86            }
87            if let Some(result) = self.lexer.parse_next(RR::lexer_options(&self.context)) {
88                match result {
89                    Ok(token) => {
90                        self.state = self.state.take().map(|state| {
91                            state.recognize_next(
92                                token,
93                                &mut self.context,
94                                &mut self.results,
95                                &mut self.errors,
96                            )
97                        });
98                        continue;
99                    }
100                    Err(e) => {
101                        self.state = self.state.take().map(RR::error_recovery_state);
102                        return Some(Err(e));
103                    }
104                }
105            }
106            if self.lexer.is_end() {
107                self.state.take()?.recognize_end(
108                    &mut self.context,
109                    &mut self.results,
110                    &mut self.errors,
111                )
112            } else {
113                return None;
114            }
115        }
116    }
117}
118
119impl<RR: RuleRecognizer> Parser<Vec<u8>, RR> {
120    #[inline]
121    pub fn end(&mut self) {
122        self.lexer.end()
123    }
124
125    pub fn extend_from_slice(&mut self, other: &[u8]) {
126        self.lexer.extend_from_slice(other)
127    }
128
129    pub fn for_reader<R: Read>(self, reader: R) -> ReaderIterator<R, RR> {
130        ReaderIterator {
131            reader,
132            parser: self,
133        }
134    }
135
136    #[cfg(feature = "async-tokio")]
137    pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
138        self,
139        reader: R,
140    ) -> TokioAsyncReaderIterator<R, RR> {
141        TokioAsyncReaderIterator {
142            reader,
143            parser: self,
144        }
145    }
146}
147
148impl<'a, RR: RuleRecognizer> IntoIterator for Parser<&'a [u8], RR> {
149    type Item = Result<RR::Output, TurtleSyntaxError>;
150    type IntoIter = SliceIterator<'a, RR>;
151
152    fn into_iter(self) -> Self::IntoIter {
153        SliceIterator { parser: self }
154    }
155}
156
157#[allow(clippy::partial_pub_fields)]
158pub struct ReaderIterator<R: Read, RR: RuleRecognizer> {
159    reader: R,
160    pub parser: Parser<Vec<u8>, RR>,
161}
162
163impl<R: Read, RR: RuleRecognizer> Iterator for ReaderIterator<R, RR> {
164    type Item = Result<RR::Output, TurtleParseError>;
165
166    fn next(&mut self) -> Option<Self::Item> {
167        while !self.parser.is_end() {
168            if let Some(result) = self.parser.parse_next() {
169                return Some(result.map_err(TurtleParseError::Syntax));
170            }
171            if let Err(e) = self.parser.lexer.extend_from_reader(&mut self.reader) {
172                return Some(Err(e.into()));
173            }
174        }
175        None
176    }
177}
178
179#[cfg(feature = "async-tokio")]
180pub struct TokioAsyncReaderIterator<R: AsyncRead + Unpin, RR: RuleRecognizer> {
181    pub reader: R,
182    pub parser: Parser<Vec<u8>, RR>,
183}
184
185#[cfg(feature = "async-tokio")]
186impl<R: AsyncRead + Unpin, RR: RuleRecognizer> TokioAsyncReaderIterator<R, RR> {
187    pub async fn next(&mut self) -> Option<Result<RR::Output, TurtleParseError>> {
188        while !self.parser.is_end() {
189            if let Some(result) = self.parser.parse_next() {
190                return Some(result.map_err(TurtleParseError::Syntax));
191            }
192            if let Err(e) = self
193                .parser
194                .lexer
195                .extend_from_tokio_async_read(&mut self.reader)
196                .await
197            {
198                return Some(Err(e.into()));
199            }
200        }
201        None
202    }
203}
204
205pub struct SliceIterator<'a, RR: RuleRecognizer> {
206    pub parser: Parser<&'a [u8], RR>,
207}
208
209impl<RR: RuleRecognizer> Iterator for SliceIterator<'_, RR> {
210    type Item = Result<RR::Output, TurtleSyntaxError>;
211
212    fn next(&mut self) -> Option<Self::Item> {
213        self.parser.parse_next()
214    }
215}