1use crate::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
4use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError, TokenOrLineJump};
5use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
6use oxiri::Iri;
7use oxrdf::vocab::{rdf, xsd};
8#[cfg(feature = "rdf-star")]
9use oxrdf::Triple;
10use oxrdf::{BlankNode, GraphName, Literal, NamedNode, NamedOrBlankNode, Quad, Subject, Term};
11use std::collections::hash_map::Iter;
12use std::collections::HashMap;
13
14pub struct TriGRecognizer {
15 stack: Vec<TriGState>,
16 cur_subject: Vec<Subject>,
17 cur_predicate: Vec<NamedNode>,
18 cur_object: Vec<Term>,
19 cur_graph: GraphName,
20}
21
22#[allow(clippy::partial_pub_fields)]
23pub struct TriGRecognizerContext {
24 pub lexer_options: N3LexerOptions,
25 pub with_graph_name: bool,
26 #[cfg(feature = "rdf-star")]
27 pub with_quoted_triples: bool,
28 prefixes: HashMap<String, Iri<String>>,
29}
30
31impl TriGRecognizerContext {
32 pub fn prefixes(&self) -> Iter<'_, String, Iri<String>> {
33 self.prefixes.iter()
34 }
35}
36
37impl RuleRecognizer for TriGRecognizer {
38 type TokenRecognizer = N3Lexer;
39 type Output = Quad;
40 type Context = TriGRecognizerContext;
41
42 fn error_recovery_state(mut self) -> Self {
43 self.stack.clear();
44 self.cur_subject.clear();
45 self.cur_predicate.clear();
46 self.cur_object.clear();
47 self.cur_graph = GraphName::DefaultGraph;
48 self
49 }
50
51 fn recognize_next(
52 mut self,
53 token: TokenOrLineJump<N3Token<'_>>,
54 context: &mut TriGRecognizerContext,
55 results: &mut Vec<Quad>,
56 errors: &mut Vec<RuleRecognizerError>,
57 ) -> Self {
58 let TokenOrLineJump::Token(token) = token else {
59 return self;
60 };
61 if let Some(rule) = self.stack.pop() {
62 match rule {
63 TriGState::TriGDoc => {
71 self.cur_graph = GraphName::DefaultGraph;
72 self.stack.push(TriGState::TriGDoc);
73 match token {
74 N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => {
75 self.stack.push(TriGState::BaseExpectIri);
76 self
77 }
78 N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => {
79 self.stack.push(TriGState::PrefixExpectPrefix);
80 self
81 }
82 N3Token::LangTag("prefix") => {
83 self.stack.push(TriGState::ExpectDot);
84 self.stack.push(TriGState::PrefixExpectPrefix);
85 self
86 }
87 N3Token::LangTag("base") => {
88 self.stack.push(TriGState::ExpectDot);
89 self.stack.push(TriGState::BaseExpectIri);
90 self
91 }
92 N3Token::PlainKeyword(k)
93 if k.eq_ignore_ascii_case("graph") && context.with_graph_name =>
94 {
95 self.stack.push(TriGState::WrappedGraph);
96 self.stack.push(TriGState::GraphName);
97 self
98 }
99 N3Token::Punctuation("{") if context.with_graph_name => {
100 self.stack.push(TriGState::WrappedGraph);
101 self.recognize_next(
102 TokenOrLineJump::Token(token),
103 context,
104 results,
105 errors,
106 )
107 }
108 _ => {
109 self.stack.push(TriGState::TriplesOrGraph);
110 self.recognize_next(
111 TokenOrLineJump::Token(token),
112 context,
113 results,
114 errors,
115 )
116 }
117 }
118 }
119 TriGState::ExpectDot => {
120 self.cur_subject.pop();
121 if token == N3Token::Punctuation(".") {
122 self
123 } else {
124 errors.push("A dot is expected at the end of statements".into());
125 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
126 }
127 }
128 TriGState::BaseExpectIri => {
129 if let N3Token::IriRef(iri) = token {
130 context.lexer_options.base_iri = Some(Iri::parse_unchecked(iri));
131 self
132 } else {
133 self.error(errors, "The BASE keyword should be followed by an IRI")
134 }
135 }
136 TriGState::PrefixExpectPrefix => match token {
137 N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => {
138 self.stack.push(TriGState::PrefixExpectIri {
139 name: prefix.to_owned(),
140 });
141 self
142 }
143 _ => self.error(
144 errors,
145 "The PREFIX keyword should be followed by a prefix like 'ex:'",
146 ),
147 },
148 TriGState::PrefixExpectIri { name } => {
149 if let N3Token::IriRef(iri) = token {
150 context.prefixes.insert(name, Iri::parse_unchecked(iri));
151 self
152 } else {
153 self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI")
154 }
155 }
156 TriGState::TriplesOrGraph => match token {
159 N3Token::IriRef(iri) => {
160 self.stack
161 .push(TriGState::WrappedGraphOrPredicateObjectList {
162 term: NamedNode::new_unchecked(iri).into(),
163 });
164 self
165 }
166 N3Token::PrefixedName {
167 prefix,
168 local,
169 might_be_invalid_iri,
170 } => match resolve_local_name(
171 prefix,
172 &local,
173 might_be_invalid_iri,
174 &context.prefixes,
175 ) {
176 Ok(t) => {
177 self.stack
178 .push(TriGState::WrappedGraphOrPredicateObjectList {
179 term: t.into(),
180 });
181 self
182 }
183 Err(e) => self.error(errors, e),
184 },
185 N3Token::BlankNodeLabel(label) => {
186 self.stack
187 .push(TriGState::WrappedGraphOrPredicateObjectList {
188 term: BlankNode::new_unchecked(label).into(),
189 });
190 self
191 }
192 N3Token::Punctuation("[") => {
193 self.stack
194 .push(TriGState::WrappedGraphBlankNodePropertyListCurrent);
195 self
196 }
197 N3Token::Punctuation("(") => {
198 self.stack.push(TriGState::ExpectDot);
199 self.stack.push(TriGState::PredicateObjectList);
200 self.stack.push(TriGState::SubjectCollectionBeginning);
201 self
202 }
203 #[cfg(feature = "rdf-star")]
204 N3Token::Punctuation("<<") if context.with_quoted_triples => {
205 self.stack.push(TriGState::ExpectDot);
206 self.stack.push(TriGState::PredicateObjectList);
207 self.stack.push(TriGState::SubjectQuotedTripleEnd);
208 self.stack.push(TriGState::QuotedObject);
209 self.stack.push(TriGState::Verb);
210 self.stack.push(TriGState::QuotedSubject);
211 self
212 }
213 _ => self.error(errors, "TOKEN is not a valid subject or graph name"),
214 },
215 TriGState::WrappedGraphOrPredicateObjectList { term } => {
216 if token == N3Token::Punctuation("{") && context.with_graph_name {
217 self.cur_graph = term.into();
218 self.stack.push(TriGState::WrappedGraph);
219 } else {
220 self.cur_subject.push(term.into());
221 self.stack.push(TriGState::ExpectDot);
222 self.stack.push(TriGState::PredicateObjectList);
223 }
224 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
225 }
226 TriGState::WrappedGraphBlankNodePropertyListCurrent => {
227 if token == N3Token::Punctuation("]") {
228 self.stack
229 .push(TriGState::WrappedGraphOrPredicateObjectList {
230 term: BlankNode::default().into(),
231 });
232 self
233 } else {
234 self.cur_subject.push(BlankNode::default().into());
235 self.stack.push(TriGState::ExpectDot);
236 self.stack.push(TriGState::SubjectBlankNodePropertyListEnd);
237 self.stack.push(TriGState::PredicateObjectList);
238 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
239 }
240 }
241 TriGState::SubjectBlankNodePropertyListEnd => {
242 if token == N3Token::Punctuation("]") {
243 self.stack
244 .push(TriGState::SubjectBlankNodePropertyListAfter);
245 self
246 } else {
247 errors.push("blank node property lists should end with a ']'".into());
248 self.stack
249 .push(TriGState::SubjectBlankNodePropertyListAfter);
250 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
251 }
252 }
253 TriGState::SubjectBlankNodePropertyListAfter => {
254 if matches!(token, N3Token::Punctuation("." | "}")) {
255 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
256 } else {
257 self.stack.push(TriGState::PredicateObjectList);
258 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
259 }
260 }
261 TriGState::SubjectCollectionBeginning => {
262 if let N3Token::Punctuation(")") = token {
263 self.cur_subject.push(rdf::NIL.into());
264 self
265 } else {
266 let root = BlankNode::default();
267 self.cur_subject.push(root.clone().into());
268 self.cur_subject.push(root.into());
269 self.cur_predicate.push(rdf::FIRST.into());
270 self.stack.push(TriGState::SubjectCollectionPossibleEnd);
271 self.stack.push(TriGState::Object);
272 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
273 }
274 }
275 TriGState::SubjectCollectionPossibleEnd => {
276 let old = self.cur_subject.pop().unwrap();
277 self.cur_object.pop();
278 if let N3Token::Punctuation(")") = token {
279 self.cur_predicate.pop();
280 results.push(Quad::new(old, rdf::REST, rdf::NIL, self.cur_graph.clone()));
281 self
282 } else {
283 let new = BlankNode::default();
284 results.push(Quad::new(
285 old,
286 rdf::REST,
287 new.clone(),
288 self.cur_graph.clone(),
289 ));
290 self.cur_subject.push(new.into());
291 self.stack.push(TriGState::ObjectCollectionPossibleEnd);
292 self.stack.push(TriGState::Object);
293 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
294 }
295 }
296 TriGState::WrappedGraph => {
299 if token == N3Token::Punctuation("{") {
300 self.stack.push(TriGState::WrappedGraphPossibleEnd);
301 self.stack.push(TriGState::Triples);
302 self
303 } else {
304 self.error(errors, "The GRAPH keyword should be followed by a graph name and a value in '{'")
305 }
306 }
307 TriGState::WrappedGraphPossibleEnd => {
308 self.cur_subject.pop();
309 match token {
310 N3Token::Punctuation("}") => self,
311 N3Token::Punctuation(".") => {
312 self.stack.push(TriGState::WrappedGraphPossibleEnd);
313 self.stack.push(TriGState::Triples);
314 self
315 }
316 _ => {
317 errors.push(
318 "A '}' or a '.' is expected at the end of a graph block".into(),
319 );
320 self.recognize_next(
321 TokenOrLineJump::Token(token),
322 context,
323 results,
324 errors,
325 )
326 }
327 }
328 }
329 TriGState::Triples => match token {
332 N3Token::Punctuation("}") => {
333 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
334 }
336 N3Token::Punctuation("[") => {
337 self.cur_subject.push(BlankNode::default().into());
338 self.stack
339 .push(TriGState::TriplesBlankNodePropertyListCurrent);
340 self
341 }
342 N3Token::IriRef(iri) => {
343 self.cur_subject.push(NamedNode::new_unchecked(iri).into());
344 self.stack.push(TriGState::PredicateObjectList);
345 self
346 }
347 N3Token::PrefixedName {
348 prefix,
349 local,
350 might_be_invalid_iri,
351 } => match resolve_local_name(
352 prefix,
353 &local,
354 might_be_invalid_iri,
355 &context.prefixes,
356 ) {
357 Ok(t) => {
358 self.cur_subject.push(t.into());
359 self.stack.push(TriGState::PredicateObjectList);
360 self
361 }
362 Err(e) => self.error(errors, e),
363 },
364 N3Token::BlankNodeLabel(label) => {
365 self.cur_subject
366 .push(BlankNode::new_unchecked(label).into());
367 self.stack.push(TriGState::PredicateObjectList);
368 self
369 }
370 N3Token::Punctuation("(") => {
371 self.stack.push(TriGState::PredicateObjectList);
372 self.stack.push(TriGState::SubjectCollectionBeginning);
373 self
374 }
375 #[cfg(feature = "rdf-star")]
376 N3Token::Punctuation("<<") if context.with_quoted_triples => {
377 self.stack.push(TriGState::PredicateObjectList);
378 self.stack.push(TriGState::SubjectQuotedTripleEnd);
379 self.stack.push(TriGState::QuotedObject);
380 self.stack.push(TriGState::Verb);
381 self.stack.push(TriGState::QuotedSubject);
382 self
383 }
384 _ => self.error(errors, "TOKEN is not a valid RDF subject"),
385 },
386 TriGState::TriplesBlankNodePropertyListCurrent => {
387 if token == N3Token::Punctuation("]") {
388 self.stack.push(TriGState::PredicateObjectList);
389 self
390 } else {
391 self.stack.push(TriGState::SubjectBlankNodePropertyListEnd);
392 self.stack.push(TriGState::PredicateObjectList);
393 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
394 }
395 }
396 TriGState::GraphName => match token {
398 N3Token::IriRef(iri) => {
399 self.cur_graph = NamedNode::new_unchecked(iri).into();
400 self
401 }
402 N3Token::PrefixedName {
403 prefix,
404 local,
405 might_be_invalid_iri,
406 } => match resolve_local_name(
407 prefix,
408 &local,
409 might_be_invalid_iri,
410 &context.prefixes,
411 ) {
412 Ok(t) => {
413 self.cur_graph = t.into();
414 self
415 }
416 Err(e) => self.error(errors, e),
417 },
418 N3Token::BlankNodeLabel(label) => {
419 self.cur_graph = BlankNode::new_unchecked(label).into();
420 self
421 }
422 N3Token::Punctuation("[") => {
423 self.stack.push(TriGState::GraphNameAnonEnd);
424 self
425 }
426 _ => self.error(errors, "TOKEN is not a valid graph name"),
427 },
428 TriGState::GraphNameAnonEnd => {
429 if token == N3Token::Punctuation("]") {
430 self.cur_graph = BlankNode::default().into();
431 self
432 } else {
433 self.error(errors, "Anonymous blank node with a property list are not allowed as graph name")
434 }
435 }
436 TriGState::PredicateObjectList => {
438 self.stack.push(TriGState::PredicateObjectListEnd);
439 self.stack.push(TriGState::ObjectsList);
440 self.stack.push(TriGState::Verb);
441 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
442 }
443 TriGState::PredicateObjectListEnd => {
444 self.cur_predicate.pop();
445 if token == N3Token::Punctuation(";") {
446 self.stack
447 .push(TriGState::PredicateObjectListPossibleContinuation);
448 self
449 } else {
450 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
451 }
452 }
453 TriGState::PredicateObjectListPossibleContinuation => {
454 if token == N3Token::Punctuation(";") {
455 self.stack
456 .push(TriGState::PredicateObjectListPossibleContinuation);
457 self
458 } else if matches!(token, N3Token::Punctuation("." | "}" | "]")) {
459 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
460 } else {
461 self.stack.push(TriGState::PredicateObjectListEnd);
462 self.stack.push(TriGState::ObjectsList);
463 self.stack.push(TriGState::Verb);
464 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
465 }
466 }
467 TriGState::ObjectsList => {
470 self.stack.push(TriGState::ObjectsListEnd);
471 self.stack.push(TriGState::Object);
472 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
473 }
474 TriGState::ObjectsListEnd => match token {
475 N3Token::Punctuation(",") => {
476 self.cur_object.pop();
477 self.stack.push(TriGState::ObjectsListEnd);
478 self.stack.push(TriGState::Object);
479 self
480 }
481 #[cfg(feature = "rdf-star")]
482 N3Token::Punctuation("{|") => {
483 let triple = Triple::new(
484 self.cur_subject.last().unwrap().clone(),
485 self.cur_predicate.last().unwrap().clone(),
486 self.cur_object.pop().unwrap(),
487 );
488 self.cur_subject.push(triple.into());
489 self.stack.push(TriGState::AnnotationEnd);
490 self.stack.push(TriGState::PredicateObjectList);
491 self
492 }
493 _ => {
494 self.cur_object.pop();
495 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
496 }
497 },
498 #[cfg(feature = "rdf-star")]
499 TriGState::AnnotationEnd => {
500 self.cur_subject.pop();
501 self.stack.push(TriGState::ObjectsListAfterAnnotation);
502 if token == N3Token::Punctuation("|}") {
503 self
504 } else {
505 self.error(errors, "Annotations should end with '|}'")
506 }
507 }
508 #[cfg(feature = "rdf-star")]
509 TriGState::ObjectsListAfterAnnotation => {
510 if token == N3Token::Punctuation(",") {
511 self.stack.push(TriGState::ObjectsListEnd);
512 self.stack.push(TriGState::Object);
513 self
514 } else {
515 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
516 }
517 }
518 TriGState::Verb => match token {
521 N3Token::PlainKeyword("a") => {
522 self.cur_predicate.push(rdf::TYPE.into());
523 self
524 }
525 N3Token::IriRef(iri) => {
526 self.cur_predicate.push(NamedNode::new_unchecked(iri));
527 self
528 }
529 N3Token::PrefixedName {
530 prefix,
531 local,
532 might_be_invalid_iri,
533 } => match resolve_local_name(
534 prefix,
535 &local,
536 might_be_invalid_iri,
537 &context.prefixes,
538 ) {
539 Ok(t) => {
540 self.cur_predicate.push(t);
541 self
542 }
543 Err(e) => self.error(errors, e),
544 },
545 _ => self.error(errors, "TOKEN is not a valid predicate"),
546 },
547 TriGState::Object => match token {
560 N3Token::IriRef(iri) => {
561 self.cur_object.push(NamedNode::new_unchecked(iri).into());
562 self.emit_quad(results);
563 self
564 }
565 N3Token::PrefixedName {
566 prefix,
567 local,
568 might_be_invalid_iri,
569 } => match resolve_local_name(
570 prefix,
571 &local,
572 might_be_invalid_iri,
573 &context.prefixes,
574 ) {
575 Ok(t) => {
576 self.cur_object.push(t.into());
577 self.emit_quad(results);
578 self
579 }
580 Err(e) => self.error(errors, e),
581 },
582 N3Token::BlankNodeLabel(label) => {
583 self.cur_object.push(BlankNode::new_unchecked(label).into());
584 self.emit_quad(results);
585 self
586 }
587 N3Token::Punctuation("[") => {
588 self.stack
589 .push(TriGState::ObjectBlankNodePropertyListCurrent);
590 self
591 }
592 N3Token::Punctuation("(") => {
593 self.stack.push(TriGState::ObjectCollectionBeginning);
594 self
595 }
596 N3Token::String(value) => {
597 self.stack
598 .push(TriGState::LiteralPossibleSuffix { value, emit: true });
599 self
600 }
601 N3Token::Integer(v) => {
602 self.cur_object
603 .push(Literal::new_typed_literal(v, xsd::INTEGER).into());
604 self.emit_quad(results);
605 self
606 }
607 N3Token::Decimal(v) => {
608 self.cur_object
609 .push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
610 self.emit_quad(results);
611 self
612 }
613 N3Token::Double(v) => {
614 self.cur_object
615 .push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
616 self.emit_quad(results);
617 self
618 }
619 N3Token::PlainKeyword("true") => {
620 self.cur_object
621 .push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
622 self.emit_quad(results);
623 self
624 }
625 N3Token::PlainKeyword("false") => {
626 self.cur_object
627 .push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
628 self.emit_quad(results);
629 self
630 }
631 #[cfg(feature = "rdf-star")]
632 N3Token::Punctuation("<<") if context.with_quoted_triples => {
633 self.stack
634 .push(TriGState::ObjectQuotedTripleEnd { emit: true });
635 self.stack.push(TriGState::QuotedObject);
636 self.stack.push(TriGState::Verb);
637 self.stack.push(TriGState::QuotedSubject);
638 self
639 }
640 _ => self.error(errors, "TOKEN is not a valid RDF object"),
641 },
642 TriGState::ObjectBlankNodePropertyListCurrent => {
643 if token == N3Token::Punctuation("]") {
644 self.cur_object.push(BlankNode::default().into());
645 self.emit_quad(results);
646 self
647 } else {
648 self.cur_subject.push(BlankNode::default().into());
649 self.stack.push(TriGState::ObjectBlankNodePropertyListEnd);
650 self.stack.push(TriGState::PredicateObjectList);
651 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
652 }
653 }
654 TriGState::ObjectBlankNodePropertyListEnd => {
655 if token == N3Token::Punctuation("]") {
656 self.cur_object.push(self.cur_subject.pop().unwrap().into());
657 self.emit_quad(results);
658 self
659 } else {
660 self.error(errors, "blank node property lists should end with a ']'")
661 }
662 }
663 TriGState::ObjectCollectionBeginning => {
664 if let N3Token::Punctuation(")") = token {
665 self.cur_object.push(rdf::NIL.into());
666 self.emit_quad(results);
667 self
668 } else {
669 let root = BlankNode::default();
670 self.cur_object.push(root.clone().into());
671 self.emit_quad(results);
672 self.cur_subject.push(root.into());
673 self.cur_predicate.push(rdf::FIRST.into());
674 self.stack.push(TriGState::ObjectCollectionPossibleEnd);
675 self.stack.push(TriGState::Object);
676 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
677 }
678 }
679 TriGState::ObjectCollectionPossibleEnd => {
680 let old = self.cur_subject.pop().unwrap();
681 self.cur_object.pop();
682 if let N3Token::Punctuation(")") = token {
683 self.cur_predicate.pop();
684 results.push(Quad::new(old, rdf::REST, rdf::NIL, self.cur_graph.clone()));
685 self
686 } else {
687 let new = BlankNode::default();
688 results.push(Quad::new(
689 old,
690 rdf::REST,
691 new.clone(),
692 self.cur_graph.clone(),
693 ));
694 self.cur_subject.push(new.into());
695 self.stack.push(TriGState::ObjectCollectionPossibleEnd);
696 self.stack.push(TriGState::Object);
697 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
698 }
699 }
700 TriGState::LiteralPossibleSuffix { value, emit } => match token {
701 N3Token::LangTag(lang) => {
702 self.cur_object.push(
703 Literal::new_language_tagged_literal_unchecked(
704 value,
705 lang.to_ascii_lowercase(),
706 )
707 .into(),
708 );
709 if emit {
710 self.emit_quad(results);
711 }
712 self
713 }
714 N3Token::Punctuation("^^") => {
715 self.stack
716 .push(TriGState::LiteralExpectDatatype { value, emit });
717 self
718 }
719 _ => {
720 self.cur_object
721 .push(Literal::new_simple_literal(value).into());
722 if emit {
723 self.emit_quad(results);
724 }
725 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
726 }
727 },
728 TriGState::LiteralExpectDatatype { value, emit } => match token {
729 N3Token::IriRef(datatype) => {
730 self.cur_object.push(
731 Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype))
732 .into(),
733 );
734 if emit {
735 self.emit_quad(results);
736 }
737 self
738 }
739 N3Token::PrefixedName {
740 prefix,
741 local,
742 might_be_invalid_iri,
743 } => match resolve_local_name(
744 prefix,
745 &local,
746 might_be_invalid_iri,
747 &context.prefixes,
748 ) {
749 Ok(t) => {
750 self.cur_object
751 .push(Literal::new_typed_literal(value, t).into());
752 if emit {
753 self.emit_quad(results);
754 }
755 self
756 }
757 Err(e) => self.error(errors, e),
758 },
759 _ => self
760 .error(errors, "Expecting a datatype IRI after ^^, found TOKEN")
761 .recognize_next(TokenOrLineJump::Token(token), context, results, errors),
762 },
763 #[cfg(feature = "rdf-star")]
765 TriGState::SubjectQuotedTripleEnd => {
766 let triple = Triple::new(
767 self.cur_subject.pop().unwrap(),
768 self.cur_predicate.pop().unwrap(),
769 self.cur_object.pop().unwrap(),
770 );
771 self.cur_subject.push(triple.into());
772 if token == N3Token::Punctuation(">>") {
773 self
774 } else {
775 self.error(
776 errors,
777 "Expecting '>>' to close a quoted triple, found TOKEN",
778 )
779 }
780 }
781 #[cfg(feature = "rdf-star")]
782 TriGState::ObjectQuotedTripleEnd { emit } => {
783 let triple = Triple::new(
784 self.cur_subject.pop().unwrap(),
785 self.cur_predicate.pop().unwrap(),
786 self.cur_object.pop().unwrap(),
787 );
788 self.cur_object.push(triple.into());
789 if emit {
790 self.emit_quad(results);
791 }
792 if token == N3Token::Punctuation(">>") {
793 self
794 } else {
795 self.error(
796 errors,
797 "Expecting '>>' to close a quoted triple, found TOKEN",
798 )
799 }
800 }
801 #[cfg(feature = "rdf-star")]
803 TriGState::QuotedSubject => match token {
804 N3Token::Punctuation("[") => {
805 self.cur_subject.push(BlankNode::default().into());
806 self.stack.push(TriGState::QuotedAnonEnd);
807 self
808 }
809 N3Token::IriRef(iri) => {
810 self.cur_subject.push(NamedNode::new_unchecked(iri).into());
811 self
812 }
813 N3Token::PrefixedName {
814 prefix,
815 local,
816 might_be_invalid_iri,
817 } => match resolve_local_name(
818 prefix,
819 &local,
820 might_be_invalid_iri,
821 &context.prefixes,
822 ) {
823 Ok(t) => {
824 self.cur_subject.push(t.into());
825 self
826 }
827 Err(e) => self.error(errors, e),
828 },
829 N3Token::BlankNodeLabel(label) => {
830 self.cur_subject
831 .push(BlankNode::new_unchecked(label).into());
832 self
833 }
834 N3Token::Punctuation("<<") => {
835 self.stack.push(TriGState::SubjectQuotedTripleEnd);
836 self.stack.push(TriGState::QuotedObject);
837 self.stack.push(TriGState::Verb);
838 self.stack.push(TriGState::QuotedSubject);
839 self
840 }
841 _ => self.error(
842 errors,
843 "TOKEN is not a valid RDF quoted triple subject: TOKEN",
844 ),
845 },
846 #[cfg(feature = "rdf-star")]
848 TriGState::QuotedObject => match token {
849 N3Token::Punctuation("[") => {
850 self.cur_object.push(BlankNode::default().into());
851 self.stack.push(TriGState::QuotedAnonEnd);
852 self
853 }
854 N3Token::IriRef(iri) => {
855 self.cur_object.push(NamedNode::new_unchecked(iri).into());
856 self
857 }
858 N3Token::PrefixedName {
859 prefix,
860 local,
861 might_be_invalid_iri,
862 } => match resolve_local_name(
863 prefix,
864 &local,
865 might_be_invalid_iri,
866 &context.prefixes,
867 ) {
868 Ok(t) => {
869 self.cur_object.push(t.into());
870 self
871 }
872 Err(e) => self.error(errors, e),
873 },
874 N3Token::BlankNodeLabel(label) => {
875 self.cur_object.push(BlankNode::new_unchecked(label).into());
876 self
877 }
878 N3Token::String(value) => {
879 self.stack
880 .push(TriGState::LiteralPossibleSuffix { value, emit: false });
881 self
882 }
883 N3Token::Integer(v) => {
884 self.cur_object
885 .push(Literal::new_typed_literal(v, xsd::INTEGER).into());
886 self
887 }
888 N3Token::Decimal(v) => {
889 self.cur_object
890 .push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
891 self
892 }
893 N3Token::Double(v) => {
894 self.cur_object
895 .push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
896 self
897 }
898 N3Token::PlainKeyword("true") => {
899 self.cur_object
900 .push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
901 self
902 }
903 N3Token::PlainKeyword("false") => {
904 self.cur_object
905 .push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
906 self
907 }
908 N3Token::Punctuation("<<") => {
909 self.stack
910 .push(TriGState::ObjectQuotedTripleEnd { emit: false });
911 self.stack.push(TriGState::QuotedObject);
912 self.stack.push(TriGState::Verb);
913 self.stack.push(TriGState::QuotedSubject);
914 self
915 }
916 _ => self.error(errors, "TOKEN is not a valid RDF quoted triple object"),
917 },
918 #[cfg(feature = "rdf-star")]
919 TriGState::QuotedAnonEnd => {
920 if token == N3Token::Punctuation("]") {
921 self
922 } else {
923 self.error(errors, "Anonymous blank node with a property list are not allowed in quoted triples")
924 }
925 }
926 }
927 } else if token == N3Token::Punctuation(".") || token == N3Token::Punctuation("}") {
928 self.stack.push(TriGState::TriGDoc);
930 self
931 } else {
932 self
933 }
934 }
935
936 fn recognize_end(
937 mut self,
938 _context: &mut TriGRecognizerContext,
939 results: &mut Vec<Self::Output>,
940 errors: &mut Vec<RuleRecognizerError>,
941 ) {
942 match &*self.stack {
943 [] | [TriGState::TriGDoc] => {
944 debug_assert!(
945 self.cur_subject.is_empty(),
946 "The cur_subject stack must be empty if the state stack is empty"
947 );
948 debug_assert!(
949 self.cur_predicate.is_empty(),
950 "The cur_predicate stack must be empty if the state stack is empty"
951 );
952 debug_assert!(
953 self.cur_object.is_empty(),
954 "The cur_object stack must be empty if the state stack is empty"
955 );
956 }
957 [.., TriGState::LiteralPossibleSuffix { value, emit: true }] => {
958 self.cur_object
959 .push(Literal::new_simple_literal(value).into());
960 self.emit_quad(results);
961 errors.push("Triples should be followed by a dot".into())
962 }
963 _ => errors.push("Unexpected end".into()), }
965 }
966
967 fn lexer_options(context: &TriGRecognizerContext) -> &N3LexerOptions {
968 &context.lexer_options
969 }
970}
971
972impl TriGRecognizer {
973 #[allow(clippy::fn_params_excessive_bools)]
974 pub fn new_parser<B>(
975 data: B,
976 is_ending: bool,
977 with_graph_name: bool,
978 #[cfg(feature = "rdf-star")] with_quoted_triples: bool,
979 unchecked: bool,
980 base_iri: Option<Iri<String>>,
981 prefixes: HashMap<String, Iri<String>>,
982 ) -> Parser<B, Self> {
983 Parser::new(
984 Lexer::new(
985 N3Lexer::new(N3LexerMode::Turtle, unchecked),
986 data,
987 is_ending,
988 MIN_BUFFER_SIZE,
989 MAX_BUFFER_SIZE,
990 Some(b"#"),
991 ),
992 Self {
993 stack: vec![TriGState::TriGDoc],
994 cur_subject: Vec::new(),
995 cur_predicate: Vec::new(),
996 cur_object: Vec::new(),
997 cur_graph: GraphName::DefaultGraph,
998 },
999 TriGRecognizerContext {
1000 with_graph_name,
1001 #[cfg(feature = "rdf-star")]
1002 with_quoted_triples,
1003 prefixes,
1004 lexer_options: N3LexerOptions { base_iri },
1005 },
1006 )
1007 }
1008
1009 #[must_use]
1010 fn error(
1011 mut self,
1012 errors: &mut Vec<RuleRecognizerError>,
1013 msg: impl Into<RuleRecognizerError>,
1014 ) -> Self {
1015 errors.push(msg.into());
1016 self.stack.clear();
1017 self.cur_subject.clear();
1018 self.cur_predicate.clear();
1019 self.cur_object.clear();
1020 self.cur_graph = GraphName::DefaultGraph;
1021 self
1022 }
1023
1024 fn emit_quad(&mut self, results: &mut Vec<Quad>) {
1025 results.push(Quad::new(
1026 self.cur_subject.last().unwrap().clone(),
1027 self.cur_predicate.last().unwrap().clone(),
1028 self.cur_object.last().unwrap().clone(),
1029 self.cur_graph.clone(),
1030 ));
1031 }
1032}
1033
1034#[derive(Debug)]
1035enum TriGState {
1036 TriGDoc,
1037 ExpectDot,
1038 BaseExpectIri,
1039 PrefixExpectPrefix,
1040 PrefixExpectIri {
1041 name: String,
1042 },
1043 TriplesOrGraph,
1044 WrappedGraphBlankNodePropertyListCurrent,
1045 SubjectBlankNodePropertyListEnd,
1046 SubjectBlankNodePropertyListAfter,
1047 SubjectCollectionBeginning,
1048 SubjectCollectionPossibleEnd,
1049 WrappedGraphOrPredicateObjectList {
1050 term: NamedOrBlankNode,
1051 },
1052 WrappedGraph,
1053 WrappedGraphPossibleEnd,
1054 GraphName,
1055 GraphNameAnonEnd,
1056 Triples,
1057 TriplesBlankNodePropertyListCurrent,
1058 PredicateObjectList,
1059 PredicateObjectListEnd,
1060 PredicateObjectListPossibleContinuation,
1061 ObjectsList,
1062 ObjectsListEnd,
1063 #[cfg(feature = "rdf-star")]
1064 AnnotationEnd,
1065 #[cfg(feature = "rdf-star")]
1066 ObjectsListAfterAnnotation,
1067 Verb,
1068 Object,
1069 ObjectBlankNodePropertyListCurrent,
1070 ObjectBlankNodePropertyListEnd,
1071 ObjectCollectionBeginning,
1072 ObjectCollectionPossibleEnd,
1073 LiteralPossibleSuffix {
1074 value: String,
1075 emit: bool,
1076 },
1077 LiteralExpectDatatype {
1078 value: String,
1079 emit: bool,
1080 },
1081 #[cfg(feature = "rdf-star")]
1082 SubjectQuotedTripleEnd,
1083 #[cfg(feature = "rdf-star")]
1084 ObjectQuotedTripleEnd {
1085 emit: bool,
1086 },
1087 #[cfg(feature = "rdf-star")]
1088 QuotedSubject,
1089 #[cfg(feature = "rdf-star")]
1090 QuotedObject,
1091 #[cfg(feature = "rdf-star")]
1092 QuotedAnonEnd,
1093}