oxttl/
line_formats.rs

1//! Shared parser implementation for N-Triples and N-Quads.
2
3use crate::lexer::{N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
4use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError, TokenOrLineJump};
5use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
6#[cfg(feature = "rdf-star")]
7use oxrdf::Triple;
8use oxrdf::{BlankNode, GraphName, Literal, NamedNode, Quad, Subject, Term};
9
10pub struct NQuadsRecognizer {
11    stack: Vec<NQuadsState>,
12    subjects: Vec<Subject>,
13    predicates: Vec<NamedNode>,
14    objects: Vec<Term>,
15}
16
17pub struct NQuadsRecognizerContext {
18    with_graph_name: bool,
19    #[cfg(feature = "rdf-star")]
20    with_quoted_triples: bool,
21    lexer_options: N3LexerOptions,
22}
23
24enum NQuadsState {
25    ExpectSubject,
26    ExpectPredicate,
27    ExpectedObject,
28    ExpectPossibleGraphOrEndOfQuotedTriple,
29    ExpectDot,
30    ExpectLiteralAnnotationOrGraphNameOrDot {
31        value: String,
32    },
33    ExpectLiteralDatatype {
34        value: String,
35    },
36    ExpectLineJump,
37    RecoverToLineJump,
38    #[cfg(feature = "rdf-star")]
39    AfterQuotedSubject,
40    #[cfg(feature = "rdf-star")]
41    AfterQuotedObject,
42}
43
44impl RuleRecognizer for NQuadsRecognizer {
45    type TokenRecognizer = N3Lexer;
46    type Output = Quad;
47    type Context = NQuadsRecognizerContext;
48
49    fn error_recovery_state(mut self) -> Self {
50        self.stack.clear();
51        self.stack.push(NQuadsState::RecoverToLineJump);
52        self.subjects.clear();
53        self.predicates.clear();
54        self.objects.clear();
55        self
56    }
57
58    fn recognize_next(
59        mut self,
60        token: TokenOrLineJump<N3Token<'_>>,
61        context: &mut NQuadsRecognizerContext,
62        results: &mut Vec<Quad>,
63        errors: &mut Vec<RuleRecognizerError>,
64    ) -> Self {
65        match self.stack.pop().unwrap_or(NQuadsState::ExpectSubject) {
66            NQuadsState::ExpectSubject => {
67                let TokenOrLineJump::Token(token) = token else {
68                    return if self.stack.is_empty() {
69                        self
70                    } else {
71                        self.error(
72                            context,
73                            results,
74                            errors,
75                            token,
76                            "line jumps are not allowed inside of quoted triples",
77                        )
78                    };
79                };
80                match token {
81                    N3Token::IriRef(s) => {
82                        self.subjects.push(NamedNode::new_unchecked(s).into());
83                        self.stack.push(NQuadsState::ExpectPredicate);
84                        self
85                    }
86                    N3Token::BlankNodeLabel(s) => {
87                        self.subjects.push(BlankNode::new_unchecked(s).into());
88                        self.stack.push(NQuadsState::ExpectPredicate);
89                        self
90                    }
91                    #[cfg(feature = "rdf-star")]
92                    N3Token::Punctuation("<<") if context.with_quoted_triples => {
93                        self.stack.push(NQuadsState::AfterQuotedSubject);
94                        self.stack.push(NQuadsState::ExpectSubject);
95                        self
96                    }
97                    _ => self.error(
98                        context,
99                        results,
100                        errors,
101                        TokenOrLineJump::Token(token),
102                        "The subject of a triple must be an IRI or a blank node",
103                    ),
104                }
105            }
106            NQuadsState::ExpectPredicate => {
107                let TokenOrLineJump::Token(token) = token else {
108                    return self.error(
109                        context,
110                        results,
111                        errors,
112                        token,
113                        "line jumps are not allowed in the middle of triples",
114                    );
115                };
116                match token {
117                    N3Token::IriRef(p) => {
118                        self.predicates.push(NamedNode::new_unchecked(p));
119                        self.stack.push(NQuadsState::ExpectedObject);
120                        self
121                    }
122                    _ => self.error(
123                        context,
124                        results,
125                        errors,
126                        TokenOrLineJump::Token(token),
127                        "The predicate of a triple must be an IRI",
128                    ),
129                }
130            }
131            NQuadsState::ExpectedObject => {
132                let TokenOrLineJump::Token(token) = token else {
133                    return self.error(
134                        context,
135                        results,
136                        errors,
137                        token,
138                        "line jumps are not allowed in the middle of triples",
139                    );
140                };
141                match token {
142                    N3Token::IriRef(o) => {
143                        self.objects.push(NamedNode::new_unchecked(o).into());
144                        self.stack
145                            .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
146                        self
147                    }
148                    N3Token::BlankNodeLabel(o) => {
149                        self.objects.push(BlankNode::new_unchecked(o).into());
150                        self.stack
151                            .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
152                        self
153                    }
154                    N3Token::String(value) => {
155                        self.stack
156                            .push(NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value });
157                        self
158                    }
159                    #[cfg(feature = "rdf-star")]
160                    N3Token::Punctuation("<<") if context.with_quoted_triples => {
161                        self.stack.push(NQuadsState::AfterQuotedObject);
162                        self.stack.push(NQuadsState::ExpectSubject);
163                        self
164                    }
165                    _ => self.error(
166                        context,
167                        results,
168                        errors,
169                        TokenOrLineJump::Token(token),
170                        "The object of a triple must be an IRI, a blank node or a literal",
171                    ),
172                }
173            }
174            NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token {
175                TokenOrLineJump::Token(N3Token::LangTag(lang_tag)) => {
176                    self.objects.push(
177                        Literal::new_language_tagged_literal_unchecked(
178                            value,
179                            lang_tag.to_ascii_lowercase(),
180                        )
181                        .into(),
182                    );
183                    self.stack
184                        .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
185                    self
186                }
187                TokenOrLineJump::Token(N3Token::Punctuation("^^")) => {
188                    self.stack
189                        .push(NQuadsState::ExpectLiteralDatatype { value });
190                    self
191                }
192                _ => {
193                    self.objects.push(Literal::new_simple_literal(value).into());
194                    self.stack
195                        .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
196                    self.recognize_next(token, context, results, errors)
197                }
198            },
199            NQuadsState::ExpectLiteralDatatype { value } => {
200                let TokenOrLineJump::Token(token) = token else {
201                    return self.error(
202                        context,
203                        results,
204                        errors,
205                        token,
206                        "line jumps are not allowed in the middle of triples",
207                    );
208                };
209                match token {
210                    N3Token::IriRef(d) => {
211                        self.objects.push(
212                            Literal::new_typed_literal(value, NamedNode::new_unchecked(d)).into(),
213                        );
214                        self.stack
215                            .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
216                        self
217                    }
218                    _ => self.error(
219                        context,
220                        results,
221                        errors,
222                        TokenOrLineJump::Token(token),
223                        "A literal datatype must be an IRI",
224                    ),
225                }
226            }
227            NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => {
228                if self.stack.is_empty() {
229                    match token {
230                        TokenOrLineJump::Token(N3Token::IriRef(g)) if context.with_graph_name => {
231                            self.emit_quad(results, NamedNode::new_unchecked(g).into());
232                            self.stack.push(NQuadsState::ExpectDot);
233                            self
234                        }
235                        TokenOrLineJump::Token(N3Token::BlankNodeLabel(g))
236                            if context.with_graph_name =>
237                        {
238                            self.emit_quad(results, BlankNode::new_unchecked(g).into());
239                            self.stack.push(NQuadsState::ExpectDot);
240                            self
241                        }
242                        _ => {
243                            self.emit_quad(results, GraphName::DefaultGraph);
244                            self.stack.push(NQuadsState::ExpectDot);
245                            self.recognize_next(token, context, results, errors)
246                        }
247                    }
248                } else if token == TokenOrLineJump::Token(N3Token::Punctuation(">>")) {
249                    self
250                } else {
251                    self.error(
252                        context,
253                        results,
254                        errors,
255                        token,
256                        "Expecting the end of a quoted triple '>>'",
257                    )
258                }
259            }
260            NQuadsState::ExpectDot => {
261                let TokenOrLineJump::Token(token) = token else {
262                    return self
263                        .error(
264                            context,
265                            results,
266                            errors,
267                            token,
268                            "Quads must be followed by a dot",
269                        )
270                        .recognize_next(TokenOrLineJump::LineJump, context, results, errors);
271                };
272                if let N3Token::Punctuation(".") = token {
273                    self.stack.push(NQuadsState::ExpectLineJump);
274                    self
275                } else {
276                    errors.push("Quads must be followed by a dot".into());
277                    self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
278                }
279            }
280            NQuadsState::ExpectLineJump => {
281                let TokenOrLineJump::Token(token) = token else {
282                    return self;
283                };
284                errors.push(
285                    format!(
286                        "Only a single triple or quad can be written in a line, found {token:?}"
287                    )
288                    .into(),
289                );
290                self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
291            }
292            #[cfg(feature = "rdf-star")]
293            NQuadsState::AfterQuotedSubject => {
294                let triple = Triple {
295                    subject: self.subjects.pop().unwrap(),
296                    predicate: self.predicates.pop().unwrap(),
297                    object: self.objects.pop().unwrap(),
298                };
299                self.subjects.push(triple.into());
300                self.stack.push(NQuadsState::ExpectPredicate);
301                self.recognize_next(token, context, results, errors)
302            }
303            #[cfg(feature = "rdf-star")]
304            NQuadsState::AfterQuotedObject => {
305                let triple = Triple {
306                    subject: self.subjects.pop().unwrap(),
307                    predicate: self.predicates.pop().unwrap(),
308                    object: self.objects.pop().unwrap(),
309                };
310                self.objects.push(triple.into());
311                self.stack
312                    .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
313                self.recognize_next(token, context, results, errors)
314            }
315            NQuadsState::RecoverToLineJump => {
316                if token != TokenOrLineJump::LineJump {
317                    self.stack.push(NQuadsState::RecoverToLineJump);
318                }
319                self
320            }
321        }
322    }
323
324    fn recognize_end(
325        mut self,
326        _context: &mut NQuadsRecognizerContext,
327        results: &mut Vec<Quad>,
328        errors: &mut Vec<RuleRecognizerError>,
329    ) {
330        match &*self.stack {
331            [NQuadsState::ExpectSubject | NQuadsState::ExpectLineJump] | [] => (),
332            [NQuadsState::ExpectDot] => errors.push("Triples must be followed by a dot".into()),
333            [NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple] => {
334                self.emit_quad(results, GraphName::DefaultGraph);
335                errors.push("Triples must be followed by a dot".into())
336            }
337            [NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value }] => {
338                self.objects.push(Literal::new_simple_literal(value).into());
339                self.emit_quad(results, GraphName::DefaultGraph);
340                errors.push("Triples must be followed by a dot".into())
341            }
342            _ => errors.push("Unexpected end".into()), // TODO
343        }
344    }
345
346    fn lexer_options(context: &NQuadsRecognizerContext) -> &N3LexerOptions {
347        &context.lexer_options
348    }
349}
350
351impl NQuadsRecognizer {
352    #[allow(clippy::fn_params_excessive_bools)]
353    pub fn new_parser<B>(
354        data: B,
355        is_ending: bool,
356        with_graph_name: bool,
357        #[cfg(feature = "rdf-star")] with_quoted_triples: bool,
358        unchecked: bool,
359    ) -> Parser<B, Self> {
360        Parser::new(
361            Lexer::new(
362                N3Lexer::new(N3LexerMode::NTriples, unchecked),
363                data,
364                is_ending,
365                MIN_BUFFER_SIZE,
366                MAX_BUFFER_SIZE,
367                Some(b"#"),
368            ),
369            Self {
370                stack: vec![NQuadsState::ExpectSubject],
371                subjects: Vec::new(),
372                predicates: Vec::new(),
373                objects: Vec::new(),
374            },
375            NQuadsRecognizerContext {
376                with_graph_name,
377                #[cfg(feature = "rdf-star")]
378                with_quoted_triples,
379                lexer_options: N3LexerOptions::default(),
380            },
381        )
382    }
383
384    #[must_use]
385    fn error(
386        self,
387        context: &mut NQuadsRecognizerContext,
388        results: &mut Vec<Quad>,
389        errors: &mut Vec<RuleRecognizerError>,
390        token: TokenOrLineJump<N3Token<'_>>,
391        msg: impl Into<RuleRecognizerError>,
392    ) -> Self {
393        errors.push(msg.into());
394        let this = self.error_recovery_state();
395        match token {
396            TokenOrLineJump::Token(_) => this,
397            TokenOrLineJump::LineJump => this.recognize_next(token, context, results, errors), /* We immediately recover */
398        }
399    }
400
401    fn emit_quad(&mut self, results: &mut Vec<Quad>, graph_name: GraphName) {
402        results.push(Quad {
403            subject: self.subjects.pop().unwrap(),
404            predicate: self.predicates.pop().unwrap(),
405            object: self.objects.pop().unwrap(),
406            graph_name,
407        })
408    }
409}