oxttl/
terse.rs

1//! Shared parser implementation for Turtle and TriG.
2
3use crate::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
4use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError, TokenOrLineJump};
5use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
6use oxiri::Iri;
7use oxrdf::vocab::{rdf, xsd};
8#[cfg(feature = "rdf-star")]
9use oxrdf::Triple;
10use oxrdf::{BlankNode, GraphName, Literal, NamedNode, NamedOrBlankNode, Quad, Subject, Term};
11use std::collections::hash_map::Iter;
12use std::collections::HashMap;
13
14pub struct TriGRecognizer {
15    stack: Vec<TriGState>,
16    cur_subject: Vec<Subject>,
17    cur_predicate: Vec<NamedNode>,
18    cur_object: Vec<Term>,
19    cur_graph: GraphName,
20}
21
22#[allow(clippy::partial_pub_fields)]
23pub struct TriGRecognizerContext {
24    pub lexer_options: N3LexerOptions,
25    pub with_graph_name: bool,
26    #[cfg(feature = "rdf-star")]
27    pub with_quoted_triples: bool,
28    prefixes: HashMap<String, Iri<String>>,
29}
30
31impl TriGRecognizerContext {
32    pub fn prefixes(&self) -> Iter<'_, String, Iri<String>> {
33        self.prefixes.iter()
34    }
35}
36
37impl RuleRecognizer for TriGRecognizer {
38    type TokenRecognizer = N3Lexer;
39    type Output = Quad;
40    type Context = TriGRecognizerContext;
41
42    fn error_recovery_state(mut self) -> Self {
43        self.stack.clear();
44        self.cur_subject.clear();
45        self.cur_predicate.clear();
46        self.cur_object.clear();
47        self.cur_graph = GraphName::DefaultGraph;
48        self
49    }
50
51    fn recognize_next(
52        mut self,
53        token: TokenOrLineJump<N3Token<'_>>,
54        context: &mut TriGRecognizerContext,
55        results: &mut Vec<Quad>,
56        errors: &mut Vec<RuleRecognizerError>,
57    ) -> Self {
58        let TokenOrLineJump::Token(token) = token else {
59            return self;
60        };
61        if let Some(rule) = self.stack.pop() {
62            match rule {
63                // [1g] trigDoc      ::=  (directive | block)*
64                // [2g] block        ::=  triplesOrGraph | wrappedGraph | triples2 | "GRAPH" labelOrSubject wrappedGraph
65                // [3]  directive    ::=  prefixID | base | sparqlPrefix | sparqlBase
66                // [4]  prefixID     ::=  '@prefix' PNAME_NS IRIREF '.'
67                // [5]  base         ::=  '@base' IRIREF '.'
68                // [5s] sparqlPrefix ::=  "PREFIX" PNAME_NS IRIREF
69                // [6s] sparqlBase   ::=  "BASE" IRIREF
70                TriGState::TriGDoc => {
71                    self.cur_graph = GraphName::DefaultGraph;
72                    self.stack.push(TriGState::TriGDoc);
73                    match token {
74                        N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => {
75                            self.stack.push(TriGState::BaseExpectIri);
76                            self
77                        }
78                        N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => {
79                            self.stack.push(TriGState::PrefixExpectPrefix);
80                            self
81                        }
82                        N3Token::LangTag("prefix") => {
83                            self.stack.push(TriGState::ExpectDot);
84                            self.stack.push(TriGState::PrefixExpectPrefix);
85                            self
86                        }
87                        N3Token::LangTag("base") => {
88                            self.stack.push(TriGState::ExpectDot);
89                            self.stack.push(TriGState::BaseExpectIri);
90                            self
91                        }
92                        N3Token::PlainKeyword(k)
93                            if k.eq_ignore_ascii_case("graph") && context.with_graph_name =>
94                        {
95                            self.stack.push(TriGState::WrappedGraph);
96                            self.stack.push(TriGState::GraphName);
97                            self
98                        }
99                        N3Token::Punctuation("{") if context.with_graph_name => {
100                            self.stack.push(TriGState::WrappedGraph);
101                            self.recognize_next(
102                                TokenOrLineJump::Token(token),
103                                context,
104                                results,
105                                errors,
106                            )
107                        }
108                        _ => {
109                            self.stack.push(TriGState::TriplesOrGraph);
110                            self.recognize_next(
111                                TokenOrLineJump::Token(token),
112                                context,
113                                results,
114                                errors,
115                            )
116                        }
117                    }
118                }
119                TriGState::ExpectDot => {
120                    self.cur_subject.pop();
121                    if token == N3Token::Punctuation(".") {
122                        self
123                    } else {
124                        errors.push("A dot is expected at the end of statements".into());
125                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
126                    }
127                }
128                TriGState::BaseExpectIri => {
129                    if let N3Token::IriRef(iri) = token {
130                        context.lexer_options.base_iri = Some(Iri::parse_unchecked(iri));
131                        self
132                    } else {
133                        self.error(errors, "The BASE keyword should be followed by an IRI")
134                    }
135                }
136                TriGState::PrefixExpectPrefix => match token {
137                    N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => {
138                        self.stack.push(TriGState::PrefixExpectIri {
139                            name: prefix.to_owned(),
140                        });
141                        self
142                    }
143                    _ => self.error(
144                        errors,
145                        "The PREFIX keyword should be followed by a prefix like 'ex:'",
146                    ),
147                },
148                TriGState::PrefixExpectIri { name } => {
149                    if let N3Token::IriRef(iri) = token {
150                        context.prefixes.insert(name, Iri::parse_unchecked(iri));
151                        self
152                    } else {
153                        self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI")
154                    }
155                }
156                // [3g]  triplesOrGraph  ::=  labelOrSubject ( wrappedGraph | predicateObjectList '.' ) | quotedTriple predicateObjectList '.'
157                // [4g]  triples2        ::=  blankNodePropertyList predicateObjectList? '.' | collection predicateObjectList '.'
158                TriGState::TriplesOrGraph => match token {
159                    N3Token::IriRef(iri) => {
160                        self.stack
161                            .push(TriGState::WrappedGraphOrPredicateObjectList {
162                                term: NamedNode::new_unchecked(iri).into(),
163                            });
164                        self
165                    }
166                    N3Token::PrefixedName {
167                        prefix,
168                        local,
169                        might_be_invalid_iri,
170                    } => match resolve_local_name(
171                        prefix,
172                        &local,
173                        might_be_invalid_iri,
174                        &context.prefixes,
175                    ) {
176                        Ok(t) => {
177                            self.stack
178                                .push(TriGState::WrappedGraphOrPredicateObjectList {
179                                    term: t.into(),
180                                });
181                            self
182                        }
183                        Err(e) => self.error(errors, e),
184                    },
185                    N3Token::BlankNodeLabel(label) => {
186                        self.stack
187                            .push(TriGState::WrappedGraphOrPredicateObjectList {
188                                term: BlankNode::new_unchecked(label).into(),
189                            });
190                        self
191                    }
192                    N3Token::Punctuation("[") => {
193                        self.stack
194                            .push(TriGState::WrappedGraphBlankNodePropertyListCurrent);
195                        self
196                    }
197                    N3Token::Punctuation("(") => {
198                        self.stack.push(TriGState::ExpectDot);
199                        self.stack.push(TriGState::PredicateObjectList);
200                        self.stack.push(TriGState::SubjectCollectionBeginning);
201                        self
202                    }
203                    #[cfg(feature = "rdf-star")]
204                    N3Token::Punctuation("<<") if context.with_quoted_triples => {
205                        self.stack.push(TriGState::ExpectDot);
206                        self.stack.push(TriGState::PredicateObjectList);
207                        self.stack.push(TriGState::SubjectQuotedTripleEnd);
208                        self.stack.push(TriGState::QuotedObject);
209                        self.stack.push(TriGState::Verb);
210                        self.stack.push(TriGState::QuotedSubject);
211                        self
212                    }
213                    _ => self.error(errors, "TOKEN is not a valid subject or graph name"),
214                },
215                TriGState::WrappedGraphOrPredicateObjectList { term } => {
216                    if token == N3Token::Punctuation("{") && context.with_graph_name {
217                        self.cur_graph = term.into();
218                        self.stack.push(TriGState::WrappedGraph);
219                    } else {
220                        self.cur_subject.push(term.into());
221                        self.stack.push(TriGState::ExpectDot);
222                        self.stack.push(TriGState::PredicateObjectList);
223                    }
224                    self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
225                }
226                TriGState::WrappedGraphBlankNodePropertyListCurrent => {
227                    if token == N3Token::Punctuation("]") {
228                        self.stack
229                            .push(TriGState::WrappedGraphOrPredicateObjectList {
230                                term: BlankNode::default().into(),
231                            });
232                        self
233                    } else {
234                        self.cur_subject.push(BlankNode::default().into());
235                        self.stack.push(TriGState::ExpectDot);
236                        self.stack.push(TriGState::SubjectBlankNodePropertyListEnd);
237                        self.stack.push(TriGState::PredicateObjectList);
238                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
239                    }
240                }
241                TriGState::SubjectBlankNodePropertyListEnd => {
242                    if token == N3Token::Punctuation("]") {
243                        self.stack
244                            .push(TriGState::SubjectBlankNodePropertyListAfter);
245                        self
246                    } else {
247                        errors.push("blank node property lists should end with a ']'".into());
248                        self.stack
249                            .push(TriGState::SubjectBlankNodePropertyListAfter);
250                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
251                    }
252                }
253                TriGState::SubjectBlankNodePropertyListAfter => {
254                    if matches!(token, N3Token::Punctuation("." | "}")) {
255                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
256                    } else {
257                        self.stack.push(TriGState::PredicateObjectList);
258                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
259                    }
260                }
261                TriGState::SubjectCollectionBeginning => {
262                    if let N3Token::Punctuation(")") = token {
263                        self.cur_subject.push(rdf::NIL.into());
264                        self
265                    } else {
266                        let root = BlankNode::default();
267                        self.cur_subject.push(root.clone().into());
268                        self.cur_subject.push(root.into());
269                        self.cur_predicate.push(rdf::FIRST.into());
270                        self.stack.push(TriGState::SubjectCollectionPossibleEnd);
271                        self.stack.push(TriGState::Object);
272                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
273                    }
274                }
275                TriGState::SubjectCollectionPossibleEnd => {
276                    let old = self.cur_subject.pop().unwrap();
277                    self.cur_object.pop();
278                    if let N3Token::Punctuation(")") = token {
279                        self.cur_predicate.pop();
280                        results.push(Quad::new(old, rdf::REST, rdf::NIL, self.cur_graph.clone()));
281                        self
282                    } else {
283                        let new = BlankNode::default();
284                        results.push(Quad::new(
285                            old,
286                            rdf::REST,
287                            new.clone(),
288                            self.cur_graph.clone(),
289                        ));
290                        self.cur_subject.push(new.into());
291                        self.stack.push(TriGState::ObjectCollectionPossibleEnd);
292                        self.stack.push(TriGState::Object);
293                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
294                    }
295                }
296                // [5g]  wrappedGraph  ::=  '{' triplesBlock? '}'
297                // [6g]  triplesBlock  ::=  triples ('.' triplesBlock?)?
298                TriGState::WrappedGraph => {
299                    if token == N3Token::Punctuation("{") {
300                        self.stack.push(TriGState::WrappedGraphPossibleEnd);
301                        self.stack.push(TriGState::Triples);
302                        self
303                    } else {
304                        self.error(errors, "The GRAPH keyword should be followed by a graph name and a value in '{'")
305                    }
306                }
307                TriGState::WrappedGraphPossibleEnd => {
308                    self.cur_subject.pop();
309                    match token {
310                        N3Token::Punctuation("}") => self,
311                        N3Token::Punctuation(".") => {
312                            self.stack.push(TriGState::WrappedGraphPossibleEnd);
313                            self.stack.push(TriGState::Triples);
314                            self
315                        }
316                        _ => {
317                            errors.push(
318                                "A '}' or a '.' is expected at the end of a graph block".into(),
319                            );
320                            self.recognize_next(
321                                TokenOrLineJump::Token(token),
322                                context,
323                                results,
324                                errors,
325                            )
326                        }
327                    }
328                }
329                // [6]  triples   ::=  subject predicateObjectList | blankNodePropertyList predicateObjectList?
330                // [10]  subject  ::=  iri | BlankNode | collection | quotedTriple
331                TriGState::Triples => match token {
332                    N3Token::Punctuation("}") => {
333                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
334                        // Early end
335                    }
336                    N3Token::Punctuation("[") => {
337                        self.cur_subject.push(BlankNode::default().into());
338                        self.stack
339                            .push(TriGState::TriplesBlankNodePropertyListCurrent);
340                        self
341                    }
342                    N3Token::IriRef(iri) => {
343                        self.cur_subject.push(NamedNode::new_unchecked(iri).into());
344                        self.stack.push(TriGState::PredicateObjectList);
345                        self
346                    }
347                    N3Token::PrefixedName {
348                        prefix,
349                        local,
350                        might_be_invalid_iri,
351                    } => match resolve_local_name(
352                        prefix,
353                        &local,
354                        might_be_invalid_iri,
355                        &context.prefixes,
356                    ) {
357                        Ok(t) => {
358                            self.cur_subject.push(t.into());
359                            self.stack.push(TriGState::PredicateObjectList);
360                            self
361                        }
362                        Err(e) => self.error(errors, e),
363                    },
364                    N3Token::BlankNodeLabel(label) => {
365                        self.cur_subject
366                            .push(BlankNode::new_unchecked(label).into());
367                        self.stack.push(TriGState::PredicateObjectList);
368                        self
369                    }
370                    N3Token::Punctuation("(") => {
371                        self.stack.push(TriGState::PredicateObjectList);
372                        self.stack.push(TriGState::SubjectCollectionBeginning);
373                        self
374                    }
375                    #[cfg(feature = "rdf-star")]
376                    N3Token::Punctuation("<<") if context.with_quoted_triples => {
377                        self.stack.push(TriGState::PredicateObjectList);
378                        self.stack.push(TriGState::SubjectQuotedTripleEnd);
379                        self.stack.push(TriGState::QuotedObject);
380                        self.stack.push(TriGState::Verb);
381                        self.stack.push(TriGState::QuotedSubject);
382                        self
383                    }
384                    _ => self.error(errors, "TOKEN is not a valid RDF subject"),
385                },
386                TriGState::TriplesBlankNodePropertyListCurrent => {
387                    if token == N3Token::Punctuation("]") {
388                        self.stack.push(TriGState::PredicateObjectList);
389                        self
390                    } else {
391                        self.stack.push(TriGState::SubjectBlankNodePropertyListEnd);
392                        self.stack.push(TriGState::PredicateObjectList);
393                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
394                    }
395                }
396                // [7g]  labelOrSubject  ::=  iri | BlankNode
397                TriGState::GraphName => match token {
398                    N3Token::IriRef(iri) => {
399                        self.cur_graph = NamedNode::new_unchecked(iri).into();
400                        self
401                    }
402                    N3Token::PrefixedName {
403                        prefix,
404                        local,
405                        might_be_invalid_iri,
406                    } => match resolve_local_name(
407                        prefix,
408                        &local,
409                        might_be_invalid_iri,
410                        &context.prefixes,
411                    ) {
412                        Ok(t) => {
413                            self.cur_graph = t.into();
414                            self
415                        }
416                        Err(e) => self.error(errors, e),
417                    },
418                    N3Token::BlankNodeLabel(label) => {
419                        self.cur_graph = BlankNode::new_unchecked(label).into();
420                        self
421                    }
422                    N3Token::Punctuation("[") => {
423                        self.stack.push(TriGState::GraphNameAnonEnd);
424                        self
425                    }
426                    _ => self.error(errors, "TOKEN is not a valid graph name"),
427                },
428                TriGState::GraphNameAnonEnd => {
429                    if token == N3Token::Punctuation("]") {
430                        self.cur_graph = BlankNode::default().into();
431                        self
432                    } else {
433                        self.error(errors, "Anonymous blank node with a property list are not allowed as graph name")
434                    }
435                }
436                // [7]  predicateObjectList  ::=  verb objectList (';' (verb objectList)?)*
437                TriGState::PredicateObjectList => {
438                    self.stack.push(TriGState::PredicateObjectListEnd);
439                    self.stack.push(TriGState::ObjectsList);
440                    self.stack.push(TriGState::Verb);
441                    self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
442                }
443                TriGState::PredicateObjectListEnd => {
444                    self.cur_predicate.pop();
445                    if token == N3Token::Punctuation(";") {
446                        self.stack
447                            .push(TriGState::PredicateObjectListPossibleContinuation);
448                        self
449                    } else {
450                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
451                    }
452                }
453                TriGState::PredicateObjectListPossibleContinuation => {
454                    if token == N3Token::Punctuation(";") {
455                        self.stack
456                            .push(TriGState::PredicateObjectListPossibleContinuation);
457                        self
458                    } else if matches!(token, N3Token::Punctuation("." | "}" | "]")) {
459                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
460                    } else {
461                        self.stack.push(TriGState::PredicateObjectListEnd);
462                        self.stack.push(TriGState::ObjectsList);
463                        self.stack.push(TriGState::Verb);
464                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
465                    }
466                }
467                // [8]   objectList  ::=  object annotation? ( ',' object annotation? )*
468                // [30t] annotation  ::=  '{|' predicateObjectList '|}'
469                TriGState::ObjectsList => {
470                    self.stack.push(TriGState::ObjectsListEnd);
471                    self.stack.push(TriGState::Object);
472                    self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
473                }
474                TriGState::ObjectsListEnd => match token {
475                    N3Token::Punctuation(",") => {
476                        self.cur_object.pop();
477                        self.stack.push(TriGState::ObjectsListEnd);
478                        self.stack.push(TriGState::Object);
479                        self
480                    }
481                    #[cfg(feature = "rdf-star")]
482                    N3Token::Punctuation("{|") => {
483                        let triple = Triple::new(
484                            self.cur_subject.last().unwrap().clone(),
485                            self.cur_predicate.last().unwrap().clone(),
486                            self.cur_object.pop().unwrap(),
487                        );
488                        self.cur_subject.push(triple.into());
489                        self.stack.push(TriGState::AnnotationEnd);
490                        self.stack.push(TriGState::PredicateObjectList);
491                        self
492                    }
493                    _ => {
494                        self.cur_object.pop();
495                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
496                    }
497                },
498                #[cfg(feature = "rdf-star")]
499                TriGState::AnnotationEnd => {
500                    self.cur_subject.pop();
501                    self.stack.push(TriGState::ObjectsListAfterAnnotation);
502                    if token == N3Token::Punctuation("|}") {
503                        self
504                    } else {
505                        self.error(errors, "Annotations should end with '|}'")
506                    }
507                }
508                #[cfg(feature = "rdf-star")]
509                TriGState::ObjectsListAfterAnnotation => {
510                    if token == N3Token::Punctuation(",") {
511                        self.stack.push(TriGState::ObjectsListEnd);
512                        self.stack.push(TriGState::Object);
513                        self
514                    } else {
515                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
516                    }
517                }
518                // [9]   verb       ::=  predicate | 'a'
519                // [11]  predicate  ::=  iri
520                TriGState::Verb => match token {
521                    N3Token::PlainKeyword("a") => {
522                        self.cur_predicate.push(rdf::TYPE.into());
523                        self
524                    }
525                    N3Token::IriRef(iri) => {
526                        self.cur_predicate.push(NamedNode::new_unchecked(iri));
527                        self
528                    }
529                    N3Token::PrefixedName {
530                        prefix,
531                        local,
532                        might_be_invalid_iri,
533                    } => match resolve_local_name(
534                        prefix,
535                        &local,
536                        might_be_invalid_iri,
537                        &context.prefixes,
538                    ) {
539                        Ok(t) => {
540                            self.cur_predicate.push(t);
541                            self
542                        }
543                        Err(e) => self.error(errors, e),
544                    },
545                    _ => self.error(errors, "TOKEN is not a valid predicate"),
546                },
547                // [12]    object                 ::=  iri | BlankNode | collection | blankNodePropertyList | literal | quotedTriple
548                // [13]    literal                ::=  RDFLiteral | NumericLiteral | BooleanLiteral
549                // [14]    blank                  ::=  BlankNode | collection
550                // [15]    blankNodePropertyList  ::=  '[' predicateObjectList ']'
551                // [16]    collection             ::=  '(' object* ')'
552                // [17]    NumericLiteral         ::=  INTEGER | DECIMAL | DOUBLE
553                // [128s]  RDFLiteral             ::=  String (LANGTAG | '^^' iri)?
554                // [133s]  BooleanLiteral         ::=  'true' | 'false'
555                // [18]    String                 ::=  STRING_LITERAL_QUOTE | STRING_LITERAL_SINGLE_QUOTE | STRING_LITERAL_LONG_SINGLE_QUOTE | STRING_LITERAL_LONG_QUOTE
556                // [135s]  iri                    ::=  IRIREF | PrefixedName
557                // [136s]  PrefixedName           ::=  PNAME_LN | PNAME_NS
558                // [137s]  BlankNode              ::=  BLANK_NODE_LABEL | ANON
559                TriGState::Object => match token {
560                    N3Token::IriRef(iri) => {
561                        self.cur_object.push(NamedNode::new_unchecked(iri).into());
562                        self.emit_quad(results);
563                        self
564                    }
565                    N3Token::PrefixedName {
566                        prefix,
567                        local,
568                        might_be_invalid_iri,
569                    } => match resolve_local_name(
570                        prefix,
571                        &local,
572                        might_be_invalid_iri,
573                        &context.prefixes,
574                    ) {
575                        Ok(t) => {
576                            self.cur_object.push(t.into());
577                            self.emit_quad(results);
578                            self
579                        }
580                        Err(e) => self.error(errors, e),
581                    },
582                    N3Token::BlankNodeLabel(label) => {
583                        self.cur_object.push(BlankNode::new_unchecked(label).into());
584                        self.emit_quad(results);
585                        self
586                    }
587                    N3Token::Punctuation("[") => {
588                        self.stack
589                            .push(TriGState::ObjectBlankNodePropertyListCurrent);
590                        self
591                    }
592                    N3Token::Punctuation("(") => {
593                        self.stack.push(TriGState::ObjectCollectionBeginning);
594                        self
595                    }
596                    N3Token::String(value) => {
597                        self.stack
598                            .push(TriGState::LiteralPossibleSuffix { value, emit: true });
599                        self
600                    }
601                    N3Token::Integer(v) => {
602                        self.cur_object
603                            .push(Literal::new_typed_literal(v, xsd::INTEGER).into());
604                        self.emit_quad(results);
605                        self
606                    }
607                    N3Token::Decimal(v) => {
608                        self.cur_object
609                            .push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
610                        self.emit_quad(results);
611                        self
612                    }
613                    N3Token::Double(v) => {
614                        self.cur_object
615                            .push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
616                        self.emit_quad(results);
617                        self
618                    }
619                    N3Token::PlainKeyword("true") => {
620                        self.cur_object
621                            .push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
622                        self.emit_quad(results);
623                        self
624                    }
625                    N3Token::PlainKeyword("false") => {
626                        self.cur_object
627                            .push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
628                        self.emit_quad(results);
629                        self
630                    }
631                    #[cfg(feature = "rdf-star")]
632                    N3Token::Punctuation("<<") if context.with_quoted_triples => {
633                        self.stack
634                            .push(TriGState::ObjectQuotedTripleEnd { emit: true });
635                        self.stack.push(TriGState::QuotedObject);
636                        self.stack.push(TriGState::Verb);
637                        self.stack.push(TriGState::QuotedSubject);
638                        self
639                    }
640                    _ => self.error(errors, "TOKEN is not a valid RDF object"),
641                },
642                TriGState::ObjectBlankNodePropertyListCurrent => {
643                    if token == N3Token::Punctuation("]") {
644                        self.cur_object.push(BlankNode::default().into());
645                        self.emit_quad(results);
646                        self
647                    } else {
648                        self.cur_subject.push(BlankNode::default().into());
649                        self.stack.push(TriGState::ObjectBlankNodePropertyListEnd);
650                        self.stack.push(TriGState::PredicateObjectList);
651                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
652                    }
653                }
654                TriGState::ObjectBlankNodePropertyListEnd => {
655                    if token == N3Token::Punctuation("]") {
656                        self.cur_object.push(self.cur_subject.pop().unwrap().into());
657                        self.emit_quad(results);
658                        self
659                    } else {
660                        self.error(errors, "blank node property lists should end with a ']'")
661                    }
662                }
663                TriGState::ObjectCollectionBeginning => {
664                    if let N3Token::Punctuation(")") = token {
665                        self.cur_object.push(rdf::NIL.into());
666                        self.emit_quad(results);
667                        self
668                    } else {
669                        let root = BlankNode::default();
670                        self.cur_object.push(root.clone().into());
671                        self.emit_quad(results);
672                        self.cur_subject.push(root.into());
673                        self.cur_predicate.push(rdf::FIRST.into());
674                        self.stack.push(TriGState::ObjectCollectionPossibleEnd);
675                        self.stack.push(TriGState::Object);
676                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
677                    }
678                }
679                TriGState::ObjectCollectionPossibleEnd => {
680                    let old = self.cur_subject.pop().unwrap();
681                    self.cur_object.pop();
682                    if let N3Token::Punctuation(")") = token {
683                        self.cur_predicate.pop();
684                        results.push(Quad::new(old, rdf::REST, rdf::NIL, self.cur_graph.clone()));
685                        self
686                    } else {
687                        let new = BlankNode::default();
688                        results.push(Quad::new(
689                            old,
690                            rdf::REST,
691                            new.clone(),
692                            self.cur_graph.clone(),
693                        ));
694                        self.cur_subject.push(new.into());
695                        self.stack.push(TriGState::ObjectCollectionPossibleEnd);
696                        self.stack.push(TriGState::Object);
697                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
698                    }
699                }
700                TriGState::LiteralPossibleSuffix { value, emit } => match token {
701                    N3Token::LangTag(lang) => {
702                        self.cur_object.push(
703                            Literal::new_language_tagged_literal_unchecked(
704                                value,
705                                lang.to_ascii_lowercase(),
706                            )
707                            .into(),
708                        );
709                        if emit {
710                            self.emit_quad(results);
711                        }
712                        self
713                    }
714                    N3Token::Punctuation("^^") => {
715                        self.stack
716                            .push(TriGState::LiteralExpectDatatype { value, emit });
717                        self
718                    }
719                    _ => {
720                        self.cur_object
721                            .push(Literal::new_simple_literal(value).into());
722                        if emit {
723                            self.emit_quad(results);
724                        }
725                        self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
726                    }
727                },
728                TriGState::LiteralExpectDatatype { value, emit } => match token {
729                    N3Token::IriRef(datatype) => {
730                        self.cur_object.push(
731                            Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype))
732                                .into(),
733                        );
734                        if emit {
735                            self.emit_quad(results);
736                        }
737                        self
738                    }
739                    N3Token::PrefixedName {
740                        prefix,
741                        local,
742                        might_be_invalid_iri,
743                    } => match resolve_local_name(
744                        prefix,
745                        &local,
746                        might_be_invalid_iri,
747                        &context.prefixes,
748                    ) {
749                        Ok(t) => {
750                            self.cur_object
751                                .push(Literal::new_typed_literal(value, t).into());
752                            if emit {
753                                self.emit_quad(results);
754                            }
755                            self
756                        }
757                        Err(e) => self.error(errors, e),
758                    },
759                    _ => self
760                        .error(errors, "Expecting a datatype IRI after ^^, found TOKEN")
761                        .recognize_next(TokenOrLineJump::Token(token), context, results, errors),
762                },
763                // [27t]  quotedTriple  ::=  '<<' qtSubject verb qtObject '>>'
764                #[cfg(feature = "rdf-star")]
765                TriGState::SubjectQuotedTripleEnd => {
766                    let triple = Triple::new(
767                        self.cur_subject.pop().unwrap(),
768                        self.cur_predicate.pop().unwrap(),
769                        self.cur_object.pop().unwrap(),
770                    );
771                    self.cur_subject.push(triple.into());
772                    if token == N3Token::Punctuation(">>") {
773                        self
774                    } else {
775                        self.error(
776                            errors,
777                            "Expecting '>>' to close a quoted triple, found TOKEN",
778                        )
779                    }
780                }
781                #[cfg(feature = "rdf-star")]
782                TriGState::ObjectQuotedTripleEnd { emit } => {
783                    let triple = Triple::new(
784                        self.cur_subject.pop().unwrap(),
785                        self.cur_predicate.pop().unwrap(),
786                        self.cur_object.pop().unwrap(),
787                    );
788                    self.cur_object.push(triple.into());
789                    if emit {
790                        self.emit_quad(results);
791                    }
792                    if token == N3Token::Punctuation(">>") {
793                        self
794                    } else {
795                        self.error(
796                            errors,
797                            "Expecting '>>' to close a quoted triple, found TOKEN",
798                        )
799                    }
800                }
801                // [28t]  qtSubject  ::=  iri | BlankNode | quotedTriple
802                #[cfg(feature = "rdf-star")]
803                TriGState::QuotedSubject => match token {
804                    N3Token::Punctuation("[") => {
805                        self.cur_subject.push(BlankNode::default().into());
806                        self.stack.push(TriGState::QuotedAnonEnd);
807                        self
808                    }
809                    N3Token::IriRef(iri) => {
810                        self.cur_subject.push(NamedNode::new_unchecked(iri).into());
811                        self
812                    }
813                    N3Token::PrefixedName {
814                        prefix,
815                        local,
816                        might_be_invalid_iri,
817                    } => match resolve_local_name(
818                        prefix,
819                        &local,
820                        might_be_invalid_iri,
821                        &context.prefixes,
822                    ) {
823                        Ok(t) => {
824                            self.cur_subject.push(t.into());
825                            self
826                        }
827                        Err(e) => self.error(errors, e),
828                    },
829                    N3Token::BlankNodeLabel(label) => {
830                        self.cur_subject
831                            .push(BlankNode::new_unchecked(label).into());
832                        self
833                    }
834                    N3Token::Punctuation("<<") => {
835                        self.stack.push(TriGState::SubjectQuotedTripleEnd);
836                        self.stack.push(TriGState::QuotedObject);
837                        self.stack.push(TriGState::Verb);
838                        self.stack.push(TriGState::QuotedSubject);
839                        self
840                    }
841                    _ => self.error(
842                        errors,
843                        "TOKEN is not a valid RDF quoted triple subject: TOKEN",
844                    ),
845                },
846                // [29t]  qtObject  ::=  iri | BlankNode | literal | quotedTriple
847                #[cfg(feature = "rdf-star")]
848                TriGState::QuotedObject => match token {
849                    N3Token::Punctuation("[") => {
850                        self.cur_object.push(BlankNode::default().into());
851                        self.stack.push(TriGState::QuotedAnonEnd);
852                        self
853                    }
854                    N3Token::IriRef(iri) => {
855                        self.cur_object.push(NamedNode::new_unchecked(iri).into());
856                        self
857                    }
858                    N3Token::PrefixedName {
859                        prefix,
860                        local,
861                        might_be_invalid_iri,
862                    } => match resolve_local_name(
863                        prefix,
864                        &local,
865                        might_be_invalid_iri,
866                        &context.prefixes,
867                    ) {
868                        Ok(t) => {
869                            self.cur_object.push(t.into());
870                            self
871                        }
872                        Err(e) => self.error(errors, e),
873                    },
874                    N3Token::BlankNodeLabel(label) => {
875                        self.cur_object.push(BlankNode::new_unchecked(label).into());
876                        self
877                    }
878                    N3Token::String(value) => {
879                        self.stack
880                            .push(TriGState::LiteralPossibleSuffix { value, emit: false });
881                        self
882                    }
883                    N3Token::Integer(v) => {
884                        self.cur_object
885                            .push(Literal::new_typed_literal(v, xsd::INTEGER).into());
886                        self
887                    }
888                    N3Token::Decimal(v) => {
889                        self.cur_object
890                            .push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
891                        self
892                    }
893                    N3Token::Double(v) => {
894                        self.cur_object
895                            .push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
896                        self
897                    }
898                    N3Token::PlainKeyword("true") => {
899                        self.cur_object
900                            .push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
901                        self
902                    }
903                    N3Token::PlainKeyword("false") => {
904                        self.cur_object
905                            .push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
906                        self
907                    }
908                    N3Token::Punctuation("<<") => {
909                        self.stack
910                            .push(TriGState::ObjectQuotedTripleEnd { emit: false });
911                        self.stack.push(TriGState::QuotedObject);
912                        self.stack.push(TriGState::Verb);
913                        self.stack.push(TriGState::QuotedSubject);
914                        self
915                    }
916                    _ => self.error(errors, "TOKEN is not a valid RDF quoted triple object"),
917                },
918                #[cfg(feature = "rdf-star")]
919                TriGState::QuotedAnonEnd => {
920                    if token == N3Token::Punctuation("]") {
921                        self
922                    } else {
923                        self.error(errors, "Anonymous blank node with a property list are not allowed in quoted triples")
924                    }
925                }
926            }
927        } else if token == N3Token::Punctuation(".") || token == N3Token::Punctuation("}") {
928            // TODO: be smarter depending if we are in '{' or not
929            self.stack.push(TriGState::TriGDoc);
930            self
931        } else {
932            self
933        }
934    }
935
936    fn recognize_end(
937        mut self,
938        _context: &mut TriGRecognizerContext,
939        results: &mut Vec<Self::Output>,
940        errors: &mut Vec<RuleRecognizerError>,
941    ) {
942        match &*self.stack {
943            [] | [TriGState::TriGDoc] => {
944                debug_assert!(
945                    self.cur_subject.is_empty(),
946                    "The cur_subject stack must be empty if the state stack is empty"
947                );
948                debug_assert!(
949                    self.cur_predicate.is_empty(),
950                    "The cur_predicate stack must be empty if the state stack is empty"
951                );
952                debug_assert!(
953                    self.cur_object.is_empty(),
954                    "The cur_object stack must be empty if the state stack is empty"
955                );
956            }
957            [.., TriGState::LiteralPossibleSuffix { value, emit: true }] => {
958                self.cur_object
959                    .push(Literal::new_simple_literal(value).into());
960                self.emit_quad(results);
961                errors.push("Triples should be followed by a dot".into())
962            }
963            _ => errors.push("Unexpected end".into()), // TODO
964        }
965    }
966
967    fn lexer_options(context: &TriGRecognizerContext) -> &N3LexerOptions {
968        &context.lexer_options
969    }
970}
971
972impl TriGRecognizer {
973    #[allow(clippy::fn_params_excessive_bools)]
974    pub fn new_parser<B>(
975        data: B,
976        is_ending: bool,
977        with_graph_name: bool,
978        #[cfg(feature = "rdf-star")] with_quoted_triples: bool,
979        unchecked: bool,
980        base_iri: Option<Iri<String>>,
981        prefixes: HashMap<String, Iri<String>>,
982    ) -> Parser<B, Self> {
983        Parser::new(
984            Lexer::new(
985                N3Lexer::new(N3LexerMode::Turtle, unchecked),
986                data,
987                is_ending,
988                MIN_BUFFER_SIZE,
989                MAX_BUFFER_SIZE,
990                Some(b"#"),
991            ),
992            Self {
993                stack: vec![TriGState::TriGDoc],
994                cur_subject: Vec::new(),
995                cur_predicate: Vec::new(),
996                cur_object: Vec::new(),
997                cur_graph: GraphName::DefaultGraph,
998            },
999            TriGRecognizerContext {
1000                with_graph_name,
1001                #[cfg(feature = "rdf-star")]
1002                with_quoted_triples,
1003                prefixes,
1004                lexer_options: N3LexerOptions { base_iri },
1005            },
1006        )
1007    }
1008
1009    #[must_use]
1010    fn error(
1011        mut self,
1012        errors: &mut Vec<RuleRecognizerError>,
1013        msg: impl Into<RuleRecognizerError>,
1014    ) -> Self {
1015        errors.push(msg.into());
1016        self.stack.clear();
1017        self.cur_subject.clear();
1018        self.cur_predicate.clear();
1019        self.cur_object.clear();
1020        self.cur_graph = GraphName::DefaultGraph;
1021        self
1022    }
1023
1024    fn emit_quad(&mut self, results: &mut Vec<Quad>) {
1025        results.push(Quad::new(
1026            self.cur_subject.last().unwrap().clone(),
1027            self.cur_predicate.last().unwrap().clone(),
1028            self.cur_object.last().unwrap().clone(),
1029            self.cur_graph.clone(),
1030        ));
1031    }
1032}
1033
1034#[derive(Debug)]
1035enum TriGState {
1036    TriGDoc,
1037    ExpectDot,
1038    BaseExpectIri,
1039    PrefixExpectPrefix,
1040    PrefixExpectIri {
1041        name: String,
1042    },
1043    TriplesOrGraph,
1044    WrappedGraphBlankNodePropertyListCurrent,
1045    SubjectBlankNodePropertyListEnd,
1046    SubjectBlankNodePropertyListAfter,
1047    SubjectCollectionBeginning,
1048    SubjectCollectionPossibleEnd,
1049    WrappedGraphOrPredicateObjectList {
1050        term: NamedOrBlankNode,
1051    },
1052    WrappedGraph,
1053    WrappedGraphPossibleEnd,
1054    GraphName,
1055    GraphNameAnonEnd,
1056    Triples,
1057    TriplesBlankNodePropertyListCurrent,
1058    PredicateObjectList,
1059    PredicateObjectListEnd,
1060    PredicateObjectListPossibleContinuation,
1061    ObjectsList,
1062    ObjectsListEnd,
1063    #[cfg(feature = "rdf-star")]
1064    AnnotationEnd,
1065    #[cfg(feature = "rdf-star")]
1066    ObjectsListAfterAnnotation,
1067    Verb,
1068    Object,
1069    ObjectBlankNodePropertyListCurrent,
1070    ObjectBlankNodePropertyListEnd,
1071    ObjectCollectionBeginning,
1072    ObjectCollectionPossibleEnd,
1073    LiteralPossibleSuffix {
1074        value: String,
1075        emit: bool,
1076    },
1077    LiteralExpectDatatype {
1078        value: String,
1079        emit: bool,
1080    },
1081    #[cfg(feature = "rdf-star")]
1082    SubjectQuotedTripleEnd,
1083    #[cfg(feature = "rdf-star")]
1084    ObjectQuotedTripleEnd {
1085        emit: bool,
1086    },
1087    #[cfg(feature = "rdf-star")]
1088    QuotedSubject,
1089    #[cfg(feature = "rdf-star")]
1090    QuotedObject,
1091    #[cfg(feature = "rdf-star")]
1092    QuotedAnonEnd,
1093}