1use crate::error::*;
4use crate::gnquads::parse_variable;
5use crate::gtriple_allocator::GeneralizedTripleAllocator;
6use crate::shared::*;
7use crate::turtle::*;
8use crate::utils::*;
9use oxiri::Iri;
10use rio_api::model::*;
11use rio_api::parser::GeneralizedQuadsParser;
12use std::collections::HashMap;
13use std::io::BufRead;
14use std::mem::swap;
15
16pub struct GTriGParser<R: BufRead> {
49 read: LookAheadByteReader<R>,
50 base_iri: Option<Iri<String>>,
51 prefixes: HashMap<String, String>,
52 bnode_id_generator: BlankNodeIdGenerator,
53 triple_alloc: GeneralizedTripleAllocator,
54 graph_name_alloc: GeneralizedTripleAllocator,
55 temp_buf: String,
56}
57
58impl<R: BufRead> GTriGParser<R> {
59 pub fn new(reader: R, base_iri: Option<Iri<String>>) -> Self {
61 Self {
62 read: LookAheadByteReader::new(reader),
63 base_iri,
64 prefixes: HashMap::default(),
65 bnode_id_generator: BlankNodeIdGenerator::default(),
66 triple_alloc: GeneralizedTripleAllocator::new(),
67 graph_name_alloc: GeneralizedTripleAllocator::new(),
68 temp_buf: String::with_capacity(64),
69 }
70 }
71
72 fn make_quad(&self) -> GeneralizedQuad<'_> {
73 self.triple_alloc
74 .top_quad(self.graph_name_alloc.current_subject())
75 }
76}
77
78impl<R: BufRead> GeneralizedQuadsParser for GTriGParser<R> {
79 type Error = TurtleError;
80
81 fn parse_step<E: From<TurtleError>>(
82 &mut self,
83 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
84 ) -> Result<(), E> {
85 self.parse_generalized_block_or_directive(on_quad)
86 }
87
88 fn is_end(&self) -> bool {
89 self.read.current().is_none()
90 }
91}
92
93impl<R: BufRead> GTriGParser<R> {
94 fn parse_generalized_block_or_directive<E: From<TurtleError>>(
95 &mut self,
96 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
97 ) -> Result<(), E> {
98 skip_whitespace(&mut self.read)?;
101
102 if self.read.current().is_none() {
103 Ok(())
104 } else if self.read.starts_with(b"@prefix") {
105 self.parse_generalized_prefix_id()?;
106 Ok(())
107 } else if self.read.starts_with(b"@base") {
108 self.base_iri = Some(parse_base(
109 &mut self.read,
110 &mut self.temp_buf,
111 &self.base_iri,
112 )?);
113 Ok(())
114 } else if self.read.starts_with_ignore_ascii_case(b"BASE")
115 && self
116 .read
117 .ahead(4)?
118 .map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
119 {
120 self.base_iri = Some(parse_sparql_base(
121 &mut self.read,
122 &mut self.temp_buf,
123 &self.base_iri,
124 )?);
125 Ok(())
126 } else if self.read.starts_with_ignore_ascii_case(b"PREFIX")
127 && self
128 .read
129 .ahead(6)?
130 .map_or(true, |c| c.is_ascii_whitespace())
131 {
132 self.parse_generalized_sparql_prefix()?;
133 Ok(())
134 } else if self.read.starts_with_ignore_ascii_case(b"GRAPH")
135 && self
136 .read
137 .ahead(5)?
138 .map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
139 {
140 self.read.consume_many("GRAPH".len())?;
141 skip_whitespace(&mut self.read)?;
142 self.graph_name_alloc.push_triple_start();
143 self.parse_generalized_term(0, true)?;
144 skip_whitespace(&mut self.read)?;
145 self.parse_generalized_wrapped_graph(on_quad)?;
146 self.graph_name_alloc.pop_term(0);
147 self.graph_name_alloc.pop_top_empty_triple();
148
149 debug_assert_eq!(self.triple_alloc.complete_len(), 0);
150 debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
151 debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
152 debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
153
154 Ok(())
155 } else if self.read.current() == Some(b'{') {
156 self.parse_generalized_wrapped_graph(on_quad)?;
157
158 debug_assert_eq!(self.triple_alloc.complete_len(), 0);
159 debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
160 debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
161 debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
162
163 Ok(())
164 } else if self.read.current() == Some(b'[')
165 && !is_followed_by_space_and_closing_bracket(&mut self.read)?
166 || self.read.current() == Some(b'(')
167 {
168 self.parse_generalized_triples2(on_quad)?;
169
170 debug_assert_eq!(self.triple_alloc.complete_len(), 0);
171 debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
172 debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
173 debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
174
175 Ok(())
176 } else {
177 self.parse_generalized_triples_or_graph(on_quad)?;
178
179 debug_assert_eq!(self.triple_alloc.complete_len(), 0);
180 debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
181 debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
182 debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
183
184 Ok(())
185 }
186 }
187
188 fn parse_generalized_prefix_id(&mut self) -> Result<(), TurtleError> {
189 self.read.consume_many("@prefix".len())?;
191 skip_whitespace(&mut self.read)?;
192
193 let mut prefix = String::default();
194 parse_pname_ns(&mut self.read, &mut prefix)?;
195 skip_whitespace(&mut self.read)?;
196
197 let mut value = String::default();
198 parse_generalized_iriref(
199 &mut self.read,
200 &mut value,
201 &mut self.temp_buf,
202 self.base_iri.as_ref(),
203 )?;
204 skip_whitespace(&mut self.read)?;
205
206 self.read.check_is_current(b'.')?;
207 self.read.consume()?;
208
209 self.prefixes.insert(prefix, value);
210 Ok(())
211 }
212
213 fn parse_generalized_sparql_prefix(&mut self) -> Result<(), TurtleError> {
214 self.read.consume_many("PREFIX".len())?;
216 skip_whitespace(&mut self.read)?;
217
218 let mut prefix = String::default();
219 parse_pname_ns(&mut self.read, &mut prefix)?;
220 skip_whitespace(&mut self.read)?;
221
222 let mut value = String::default();
223 parse_generalized_iriref(
224 &mut self.read,
225 &mut value,
226 &mut self.temp_buf,
227 self.base_iri.as_ref(),
228 )?;
229 skip_whitespace(&mut self.read)?;
230
231 self.prefixes.insert(prefix, value);
232 Ok(())
233 }
234
235 fn parse_generalized_wrapped_graph<E: From<TurtleError>>(
236 &mut self,
237 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
238 ) -> Result<(), E> {
239 self.read.check_is_current(b'{')?;
242 self.read.consume()?;
243 skip_whitespace(&mut self.read)?;
244
245 loop {
246 if self.read.current() == Some(b'}') {
247 self.read.consume()?;
248 break;
249 }
250
251 self.parse_generalized_triples(on_quad)?;
252 debug_assert_eq!(self.triple_alloc.complete_len(), 0);
253 debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
254 match self.read.required_current()? {
255 b'.' => {
256 self.read.consume()?;
257 skip_whitespace(&mut self.read)?;
258 }
259 b'}' => {
260 self.read.consume()?;
261 break;
262 }
263 _ => self.read.unexpected_char_error()?,
264 }
265 }
266 Ok(())
267 }
268
269 fn parse_generalized_triples<E: From<TurtleError>>(
270 &mut self,
271 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
272 ) -> Result<(), E> {
273 match self.read.current() {
275 Some(b'[') if !is_followed_by_space_and_closing_bracket(&mut self.read)? => {
276 let bn = self.parse_generalized_blank_node_property_list(on_quad)?;
277 skip_whitespace(&mut self.read)?;
278 if self.read.current() != Some(b'.') && self.read.current() != Some(b'}') {
279 self.triple_alloc.push_triple_start();
280 self.triple_alloc.try_push_atom(0, |b, _| {
281 b.push_str(bn.as_ref());
282 Ok(GeneralizedTerm::from(BlankNode { id: b }))
283 })?;
284 self.parse_generalized_predicate_object_list(on_quad)?;
285 self.triple_alloc.pop_term(0);
286 self.triple_alloc.pop_top_empty_triple();
287 }
288 }
289 _ => {
290 self.triple_alloc.push_triple_start();
291 self.parse_generalized_node(0, on_quad)?;
292 skip_whitespace(&mut self.read)?;
293 self.parse_generalized_predicate_object_list(on_quad)?;
294 self.triple_alloc.pop_term(0);
295 self.triple_alloc.pop_top_empty_triple();
296 }
297 }
298 Ok(())
299 }
300
301 fn parse_generalized_triples2<E: From<TurtleError>>(
302 &mut self,
303 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
304 ) -> Result<(), E> {
305 match self.read.current() {
307 Some(b'[') => {
308 let bn = self.parse_generalized_blank_node_property_list(on_quad)?;
309 skip_whitespace(&mut self.read)?;
310 if self.read.current() != Some(b'.') {
311 self.triple_alloc.push_triple_start();
312 self.triple_alloc.try_push_atom(0, |b, _| {
313 b.push_str(bn.as_ref());
314 Ok(GeneralizedTerm::from(BlankNode { id: b }))
315 })?;
316 self.parse_generalized_predicate_object_list(on_quad)?;
317 self.triple_alloc.pop_term(0);
318 self.triple_alloc.pop_top_empty_triple();
319 }
320 }
321 _ => {
322 let collec = self.parse_generalized_collection(on_quad)?;
323 self.triple_alloc.push_triple_start();
324 self.triple_alloc
325 .try_push_atom(0, |b, _| allocate_collection(collec, b))?;
326 skip_whitespace(&mut self.read)?;
327 self.parse_generalized_predicate_object_list(on_quad)?;
328 self.triple_alloc.pop_term(0);
329 self.triple_alloc.pop_top_empty_triple();
330 }
331 }
332 self.read.check_is_current(b'.')?;
333 self.read.consume()?;
334 Ok(())
335 }
336
337 fn parse_generalized_triples_or_graph<E: From<TurtleError>>(
338 &mut self,
339 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
340 ) -> Result<(), E> {
341 self.triple_alloc.push_triple_start();
343 self.parse_generalized_node(0, on_quad)?;
344 skip_whitespace(&mut self.read)?;
345
346 if self.read.current() == Some(b'{') {
347 swap(&mut self.triple_alloc, &mut self.graph_name_alloc);
349 self.parse_generalized_wrapped_graph(on_quad)?;
350 self.graph_name_alloc.pop_term(0);
351 self.graph_name_alloc.pop_top_empty_triple();
352 } else {
353 self.parse_generalized_predicate_object_list(on_quad)?;
354 self.triple_alloc.pop_term(0);
355 self.triple_alloc.pop_top_empty_triple();
356
357 self.read.check_is_current(b'.')?;
358 self.read.consume()?;
359 }
360 Ok(())
361 }
362
363 fn parse_generalized_blank_node_property_list<E: From<TurtleError>>(
364 &mut self,
365 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
366 ) -> Result<BlankNodeId, E> {
367 self.read.check_is_current(b'[')?;
368 self.read.consume()?;
369 skip_whitespace(&mut self.read)?;
370
371 let id = self.bnode_id_generator.generate();
372 if self.read.current() == Some(b']') {
373 self.read.consume()?;
374 return Ok(id);
375 }
376
377 self.triple_alloc.push_triple_start();
378 self.triple_alloc.try_push_atom(0, |b, _| {
379 b.push_str(id.as_ref());
380 Ok(GeneralizedTerm::from(BlankNode { id: b }))
381 })?;
382
383 loop {
384 self.parse_generalized_predicate_object_list(on_quad)?;
385 skip_whitespace(&mut self.read)?;
386
387 if self.read.current() == Some(b']') {
388 break;
389 }
390 }
391 self.read.consume()?;
392 self.triple_alloc.pop_term(0);
393 self.triple_alloc.pop_top_empty_triple();
394 Ok(id)
395 }
396
397 fn parse_generalized_collection<E: From<TurtleError>>(
398 &mut self,
399 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
400 ) -> Result<Option<BlankNodeId>, E> {
401 self.read.check_is_current(b'(')?;
403 self.read.consume()?;
404 let mut root: Option<BlankNodeId> = None;
405 loop {
406 skip_whitespace(&mut self.read)?;
407
408 if self.read.current().is_none() {
409 self.read.unexpected_char_error()?;
410 unreachable!(); } else if self.read.current() != Some(b')') {
412 let new = self.bnode_id_generator.generate();
413 if root.is_none() {
414 root = Some(new);
415 self.triple_alloc.push_triple_start();
416 } else {
417 self.triple_alloc.try_push_atom(1, |_, _| {
418 Ok(GeneralizedTerm::from(NamedNode { iri: RDF_REST }))
419 })?;
420 self.triple_alloc.try_push_atom(2, |b, _| {
421 b.push_str(new.as_ref());
422 Ok(GeneralizedTerm::from(BlankNode { id: b }))
423 })?;
424 on_quad(self.make_quad())?;
425 self.triple_alloc.pop_term(2);
426 self.triple_alloc.pop_term(1);
427 self.triple_alloc.pop_term(0);
428 }
429
430 self.triple_alloc.try_push_atom(0, |b, _| {
431 b.push_str(new.as_ref());
432 Ok(GeneralizedTerm::from(BlankNode { id: b }))
433 })?;
434 self.triple_alloc.try_push_atom(1, |_, _| {
435 Ok(GeneralizedTerm::from(NamedNode { iri: RDF_FIRST }))
436 })?;
437 self.parse_generalized_node(2, on_quad)?;
438 on_quad(self.make_quad())?;
439 self.triple_alloc.pop_term(2);
440 self.triple_alloc.pop_term(1);
441 } else {
442 break;
444 }
445 }
446 self.read.consume()?;
447 if root.is_some() {
448 self.triple_alloc.try_push_atom(1, |_, _| {
449 Ok(GeneralizedTerm::from(NamedNode { iri: RDF_REST }))
450 })?;
451 self.triple_alloc.try_push_atom(2, |_, _| {
452 Ok(GeneralizedTerm::from(NamedNode { iri: RDF_NIL }))
453 })?;
454 on_quad(self.make_quad())?;
455 self.triple_alloc.pop_top_triple();
456 }
457 Ok(root)
458 }
459
460 fn parse_generalized_predicate_object_list<E: From<TurtleError>>(
461 &mut self,
462 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
463 ) -> Result<(), E> {
464 loop {
466 self.parse_generalized_verb(on_quad)?;
467 skip_whitespace(&mut self.read)?;
468
469 self.parse_generalized_object_list(on_quad)?;
470 skip_whitespace(&mut self.read)?;
471
472 self.triple_alloc.pop_term(1);
473 if self.read.current() != Some(b';') {
474 return Ok(());
475 }
476 while self.read.current() == Some(b';') {
477 self.read.consume()?;
478 skip_whitespace(&mut self.read)?;
479 }
480 match self.read.current() {
481 Some(b'.') | Some(b']') | Some(b'}') | None => return Ok(()),
482 Some(b'|') => return Ok(()),
483 _ => (), }
485 }
486 }
487
488 fn parse_generalized_verb<E: From<TurtleError>>(
489 &mut self,
490 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
491 ) -> Result<(), E> {
492 if self.read.current() == Some(b'a') {
494 match self.read.next()? {
495 Some(c)
497 if is_possible_pn_chars_ascii(c) || c == b'.' || c == b':' || c > MAX_ASCII =>
498 {
499 self.parse_generalized_node(1, on_quad)
500 }
501 _ => {
502 self.read.consume()?;
503 self.triple_alloc.try_push_atom(1, |_, _| {
504 Ok(GeneralizedTerm::from(NamedNode { iri: RDF_TYPE }))
505 })
506 }
507 }
508 } else {
509 self.parse_generalized_node(1, on_quad)
510 }
511 }
512
513 fn parse_generalized_object_list<E: From<TurtleError>>(
514 &mut self,
515 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
516 ) -> Result<(), E> {
517 loop {
519 self.parse_generalized_node(2, on_quad)?;
520 on_quad(self.make_quad())?;
521
522 skip_whitespace(&mut self.read)?;
523 if self.read.current() == Some(b'{') {
524 self.read.check_is_next(b'|')?;
525 self.read.consume_many(2)?;
526 skip_whitespace(&mut self.read)?;
527
528 self.triple_alloc.push_triple_start();
529 self.triple_alloc.push_quoted_triple(0);
530 self.parse_generalized_predicate_object_list(on_quad)?;
531
532 self.read.check_is_current(b'|')?;
533 self.read.check_is_next(b'}')?;
534 self.read.consume_many(2)?;
535 skip_whitespace(&mut self.read)?;
536 self.triple_alloc.pop_annotation_triple();
537 }
538
539 self.triple_alloc.pop_term(2);
540 if self.read.current() != Some(b',') {
541 return Ok(());
542 }
543 self.read.consume()?;
544 skip_whitespace(&mut self.read)?;
545 }
546 }
547
548 fn parse_generalized_node<E: From<TurtleError>>(
549 &mut self,
550 pos: usize,
551 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
552 ) -> Result<(), E> {
553 match self.read.current() {
555 Some(b'[') => {
556 let bn = self.parse_generalized_blank_node_property_list(on_quad)?;
557 self.triple_alloc.try_push_atom(pos, |b, _| {
558 b.push_str(bn.as_ref());
559 Ok(GeneralizedTerm::from(BlankNode { id: b }))
560 })
561 }
562 Some(b'(') => {
563 let collec = self.parse_generalized_collection(on_quad)?;
564 self.triple_alloc
565 .try_push_atom(pos, |b, _| allocate_collection(collec, b))?;
566 Ok(())
567 }
568 _ => {
569 self.parse_generalized_term(pos, false)?;
570 Ok(())
571 }
572 }
573 }
574
575 fn parse_generalized_term(&mut self, pos: usize, graph_name: bool) -> Result<(), TurtleError> {
576 let read = &mut self.read;
577 let alloc = if graph_name {
578 &mut self.graph_name_alloc
579 } else {
580 &mut self.triple_alloc
581 };
582 match read.required_current()? {
583 b'<' => {
584 if read.required_next()? == b'<' {
585 read.consume_many(2)?;
586 skip_whitespace(read)?;
587
588 alloc.push_triple_start();
589 for i in 0..3 {
591 self.parse_generalized_term(i, graph_name)?;
592 skip_whitespace(&mut self.read)?;
593 }
594 self.read.check_is_current(b'>')?;
595 self.read.check_is_next(b'>')?;
596 self.read.consume_many(2)?;
597 let alloc = if graph_name {
599 &mut self.graph_name_alloc
600 } else {
601 &mut self.triple_alloc
602 };
603 alloc.push_quoted_triple(pos);
604 Ok(())
605 } else {
606 let temp_buf = &mut &mut self.temp_buf;
607 let base_iri = self.base_iri.as_ref();
608 alloc.try_push_atom(pos, |b, _| {
609 parse_generalized_iriref(read, b, temp_buf, base_iri)?;
610 Ok(GeneralizedTerm::from(NamedNode { iri: b }))
611 })
612 }
613 }
614 b'_' | b'[' => {
615 let bnode_id_generator = &mut self.bnode_id_generator;
616 alloc.try_push_atom(pos, |b, _| {
617 parse_blank_node(read, b, bnode_id_generator).map(GeneralizedTerm::from)
618 })
619 }
620 b'"' | b'\'' | b'+' | b'-' | b'.' | b'0'..=b'9' => {
621 let temp_buf = &mut &mut self.temp_buf;
622 let base_iri = &self.base_iri;
623 let prefixes = &self.prefixes;
624 alloc.try_push_atom(pos, |b1, b2| {
625 parse_literal(read, b1, b2, temp_buf, base_iri, prefixes)
626 .map(GeneralizedTerm::from)
627 })
628 }
629 b'?' | b'$' => alloc.try_push_atom(pos, |b, _| {
630 parse_variable(read, b).map(GeneralizedTerm::from)
631 }),
632 _ => {
633 let base_iri = &self.base_iri;
634 let prefixes = &self.prefixes;
635 if read.starts_with(b"true")
636 && read.ahead(4)?.map_or(true, |c| {
637 c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
638 })
639 || read.starts_with(b"false")
640 && read.ahead(5)?.map_or(true, |c| {
641 c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
642 })
643 {
644 let temp_buf = &mut &mut self.temp_buf;
645 alloc.try_push_atom(pos, |b1, b2| {
646 parse_literal(read, b1, b2, temp_buf, base_iri, prefixes)
647 .map(GeneralizedTerm::from)
648 })
649 } else {
650 alloc.try_push_atom(pos, |b, _| {
651 parse_prefixed_name(read, b, prefixes).map(GeneralizedTerm::from)
652 })
653 }
654 }
655 }
656 }
657}
658
659pub fn parse_generalized_iriref(
660 read: &mut LookAheadByteReader<impl BufRead>,
661 buffer: &mut String,
662 temp_buf: &mut String,
663 base_iri: Option<&Iri<String>>,
664) -> Result<(), TurtleError> {
665 if let Some(base_iri) = base_iri {
666 parse_iriref(read, temp_buf)?;
667 let result = base_iri.resolve_into(temp_buf, buffer).map_err(|error| {
668 read.parse_error(TurtleErrorKind::InvalidIri {
669 iri: temp_buf.to_owned(),
670 error,
671 })
672 });
673 temp_buf.clear();
674 result
675 } else {
676 parse_iriref(read, buffer)
677 }
678}
679
680fn parse_literal<'a>(
681 read: &mut LookAheadByteReader<impl BufRead>,
682 buffer: &'a mut String,
683 annotation_buffer: &'a mut String,
684 temp_buf: &mut String,
685 base_iri: &Option<Iri<String>>,
686 prefixes: &HashMap<String, String>,
687) -> Result<Literal<'a>, TurtleError> {
688 match read.required_current()? {
690 b'"' | b'\'' => {
691 match parse_rdf_literal(
692 read,
693 buffer,
694 annotation_buffer,
695 temp_buf,
696 base_iri,
697 prefixes,
698 )? {
699 Literal::LanguageTaggedString { .. } => Ok(Literal::LanguageTaggedString {
700 value: buffer,
701 language: annotation_buffer,
702 }),
703 Literal::Simple { .. } => Ok(Literal::Simple { value: buffer }),
704 Literal::Typed { .. } => Ok(Literal::Typed {
705 value: buffer,
706 datatype: NamedNode {
707 iri: annotation_buffer,
708 },
709 }),
710 }
711 }
712 b'+' | b'-' | b'.' | b'0'..=b'9' => {
713 match parse_numeric_literal(read, buffer)? {
714 Literal::Typed { datatype, .. } => {
715 annotation_buffer.push_str(datatype.iri);
716 }
717 _ => unreachable!(),
718 }
719 Ok(Literal::Typed {
720 value: buffer,
721 datatype: NamedNode {
722 iri: annotation_buffer,
723 },
724 })
725 }
726 _ => {
727 match parse_boolean_literal(read, buffer)? {
728 Literal::Typed { datatype, .. } => {
729 annotation_buffer.push_str(datatype.iri);
730 }
731 _ => unreachable!(),
732 }
733 Ok(Literal::Typed {
734 value: buffer,
735 datatype: NamedNode {
736 iri: annotation_buffer,
737 },
738 })
739 }
740 }
741}
742
743#[allow(clippy::unnecessary_wraps)]
744fn allocate_collection(
745 collection: Option<BlankNodeId>,
746 buffer: &mut String,
747) -> Result<GeneralizedTerm<'_>, TurtleError> {
748 match collection {
749 Some(id) => {
750 buffer.push_str(id.as_ref());
751 Ok(BlankNode { id: buffer }.into())
752 }
753 None => Ok(NamedNode { iri: RDF_NIL }.into()),
754 }
755}
756
757#[cfg(test)]
760mod test {
761 use super::*;
762 use std::io::Cursor;
763
764 const OK_TURTLE_ERROR: Result<(), TurtleError> = Ok(());
765
766 type OwnedQuad = (OwnedTerm, OwnedTerm, OwnedTerm, Option<OwnedTerm>);
767
768 #[test]
769 fn relative_iri_references() -> Result<(), TurtleError> {
770 let got = parse_gtrig(
771 r#"
772 <../s1> <#p1> </o1>.
773 { <../s2> <#p2> </o2> }
774 <//g3> { <../s3> <#p3> </o3> }
775 GRAPH <//g4> { <../s4> <#p4> </o4> }
776 "#,
777 )?;
778
779 let expected = parse_gnq(
780 r#"
781 <../s1> <#p1> </o1>.
782 <../s2> <#p2> </o2>.
783 <../s3> <#p3> </o3> <//g3>.
784 <../s4> <#p4> </o4> <//g4>.
785 "#,
786 )?;
787
788 assert_eq!(expected, got);
789 Ok(())
790 }
791
792 #[test]
793 fn relative_prefixes() -> Result<(), TurtleError> {
794 let got = parse_gtrig(
795 r#"
796 @prefix s: <../>.
797 PREFIX p: <#>
798 PREFIX o: </>
799 PREFIX g: <//>
800
801 s:s1 p:p1 o:o1.
802 { s:s2 p:p2 o:o2 }
803 g:g3 { s:s3 p:p3 o:o3 }
804 GRAPH g:g4 { s:s4 p:p4 o:o4 }
805 "#,
806 )?;
807
808 let expected = parse_gnq(
809 r#"
810 <../s1> <#p1> </o1>.
811 <../s2> <#p2> </o2>.
812 <../s3> <#p3> </o3> <//g3>.
813 <../s4> <#p4> </o4> <//g4>.
814 "#,
815 )?;
816
817 assert_eq!(expected, got);
818 Ok(())
819 }
820
821 #[test]
822 fn all_variables() -> Result<(), TurtleError> {
823 let got = parse_gtrig(
824 r#"
825 ?s1 ?p1 ?o1.
826 { ?s2 ?p2 ?o2 }
827 ?g3 { ?s3 ?p3 ?o3 }
828 GRAPH ?g4 { ?s4 ?p4 ?o4 }
829 "#,
830 )?;
831
832 let expected = parse_gnq(
833 r#"
834 ?s1 ?p1 ?o1.
835 ?s2 ?p2 ?o2.
836 ?s3 ?p3 ?o3 ?g3.
837 ?s4 ?p4 ?o4 ?g4.
838 "#,
839 )
840 .unwrap();
841
842 assert_eq!(expected, got);
843 Ok(())
844 }
845
846 #[test]
847 fn all_literals() -> Result<(), TurtleError> {
848 let got = parse_gtrig(
849 r#"
850 "s1" "p1" "o1".
851 { "s2" "p2" "o2" }
852 "g3" { "s3" "p3" "o3" }
853 GRAPH "g4" { "s4" "p4" "o4" }
854 "#,
855 )?;
856
857 let expected = parse_gnq(
858 r#"
859 "s1" "p1" "o1".
860 "s2" "p2" "o2".
861 "s3" "p3" "o3" "g3".
862 "s4" "p4" "o4" "g4".
863 "#,
864 )
865 .unwrap();
866
867 assert_eq!(expected, got);
868 Ok(())
869 }
870
871 #[test]
872 fn all_quoted_triples() -> Result<(), TurtleError> {
873 let got = parse_gtrig(
874 r#"@prefix : <#>.
875 << :ss1 _:ps1 "os1" >> << _:sp1 "pp1" ?op1 >> << "so1" ?po1 :oo1 >>.
876 { << ?ss2 :ps2 _:os2 >> << :sp2 "pp2" _:op2 >> << "so2" _:po2 ?oo2 >> }
877 << _:sg3 ?pg3 :og3 >> { << ?ss3 :ps3 ?os3 >> << :sp3 ?pp3 _:op3 >> << ?so3 _:po3 "oo3" >> }
878 GRAPH << _:sg4 "pg4" :og4 >> { << "ss4" :ps4 _:os4 >> << :sp4 _:pp4 ?op4 >> << _:so4 ?po4 "oo4" >> }
879 "#,
880 )?;
881
882 let expected = parse_gnq(r#"
883 << <#ss1> _:ps1 "os1" >> << _:sp1 "pp1" ?op1 >> << "so1" ?po1 <#oo1> >>.
884 << ?ss2 <#ps2> _:os2 >> << <#sp2> "pp2" _:op2 >> << "so2" _:po2 ?oo2 >>.
885 << ?ss3 <#ps3> ?os3 >> << <#sp3> ?pp3 _:op3 >> << ?so3 _:po3 "oo3" >> << _:sg3 ?pg3 <#og3> >>.
886 << "ss4" <#ps4> _:os4 >> << <#sp4> _:pp4 ?op4 >> << _:so4 ?po4 "oo4" >> << _:sg4 "pg4" <#og4> >>.
887 "#).unwrap();
888
889 assert_eq!(expected, got);
890 Ok(())
891 }
892
893 #[test]
894 fn deeply_nested_triple() -> Result<(), TurtleError> {
895 let got = parse_gtrig(
896 r#"@prefix : <#>.
897 << << :a :b :c >> << :d :e :f >> << :g :h :i >> >> {
898 << << :j :k :l >> << :m :n :o >> << :p :q :r >> >>
899 << << :s :t :u >> << :v :w :x >> << :y :z :A >> >>
900 << << :B :C :D >> << :E :F :G >> << :H :I :J >> >>
901 }"#,
902 )?;
903 let expected = parse_gnq(r#"
904 << << <#j> <#k> <#l> >> << <#m> <#n> <#o> >> << <#p> <#q> <#r> >> >> << << <#s> <#t> <#u> >> << <#v> <#w> <#x> >> << <#y> <#z> <#A> >> >> << << <#B> <#C> <#D> >> << <#E> <#F> <#G> >> << <#H> <#I> <#J> >> >> << << <#a> <#b> <#c> >> << <#d> <#e> <#f> >> << <#g> <#h> <#i> >> >>.
905 "#).unwrap();
906
907 assert_eq!(expected, got);
908 Ok(())
909 }
910
911 #[test]
912 fn composite_predicate() -> Result<(), TurtleError> {
913 let gtrig = r#"
914 ?s [ ?p ?o1 ] ?o2 .
915 "#;
916
917 let mut got: Vec<OwnedQuad> = Vec::with_capacity(2);
918
919 GTriGParser::new(
920 Cursor::new(gtrig),
921 Some(Iri::parse("http://example.org/base/".to_owned()).unwrap()),
922 )
923 .parse_all(&mut |quad| {
924 got.push((
925 quad.subject.into(),
926 quad.predicate.into(),
927 quad.object.into(),
928 quad.graph_name.map(OwnedTerm::from),
929 ));
930 OK_TURTLE_ERROR
931 })?;
932
933 assert_eq!(v("p"), got[0].1);
934 assert_eq!(v("o1"), got[0].2);
935 assert_eq!(v("s"), got[1].0);
936 assert_eq!(v("o2"), got[1].2);
937 assert_eq!(got[0].0, got[1].1);
938 Ok(())
939 }
940
941 fn parse_gtrig(txt: &str) -> Result<Vec<OwnedQuad>, TurtleError> {
942 let mut got = Vec::new();
943 GTriGParser::new(Cursor::new(txt), None).parse_all(&mut |quad| {
944 got.push((
945 quad.subject.into(),
946 quad.predicate.into(),
947 quad.object.into(),
948 quad.graph_name.map(OwnedTerm::from),
949 ));
950 OK_TURTLE_ERROR
951 })?;
952 Ok(got)
953 }
954
955 fn parse_gnq(txt: &str) -> Result<Vec<OwnedQuad>, TurtleError> {
956 let mut got = Vec::new();
957 crate::GeneralizedNQuadsParser::new(Cursor::new(txt)).parse_all(&mut |quad| {
958 got.push((
959 quad.subject.into(),
960 quad.predicate.into(),
961 quad.object.into(),
962 quad.graph_name.map(OwnedTerm::from),
963 ));
964 OK_TURTLE_ERROR
965 })?;
966 Ok(got)
967 }
968
969 fn v(value: &str) -> OwnedTerm {
970 OwnedTerm::Variable(value.to_string())
971 }
972
973 impl<'a> From<GeneralizedTerm<'a>> for OwnedTerm {
974 fn from(other: GeneralizedTerm<'a>) -> OwnedTerm {
975 match other {
976 GeneralizedTerm::NamedNode(n) => OwnedTerm::NamedNode(n.iri.to_string()),
977 GeneralizedTerm::BlankNode(n) => OwnedTerm::BlankNode(n.id.to_string()),
978 GeneralizedTerm::Literal(Literal::Simple { value }) => {
979 OwnedTerm::LiteralSimple(value.to_string())
980 }
981 GeneralizedTerm::Literal(Literal::LanguageTaggedString { value, language }) => {
982 OwnedTerm::LiteralLanguage(value.to_string(), language.to_string())
983 }
984 GeneralizedTerm::Literal(Literal::Typed { value, datatype }) => {
985 OwnedTerm::LiteralDatatype(value.to_string(), datatype.iri.to_string())
986 }
987 GeneralizedTerm::Variable(n) => OwnedTerm::Variable(n.name.to_string()),
988 GeneralizedTerm::Triple(t) => {
989 OwnedTerm::Triple(Box::new([t[0].into(), t[1].into(), t[2].into()]))
990 }
991 }
992 }
993 }
994
995 #[derive(Clone, Debug, PartialEq)]
996 enum OwnedTerm {
997 NamedNode(String),
998 BlankNode(String),
999 LiteralSimple(String),
1000 LiteralLanguage(String, String),
1001 LiteralDatatype(String, String),
1002 Variable(String),
1003 Triple(Box<[OwnedTerm; 3]>),
1004 }
1005
1006 impl<'a> From<&'a OwnedTerm> for GeneralizedTerm<'a> {
1007 fn from(other: &'a OwnedTerm) -> GeneralizedTerm<'a> {
1008 match other {
1009 OwnedTerm::NamedNode(iri) => GeneralizedTerm::NamedNode(NamedNode { iri }),
1010 OwnedTerm::BlankNode(id) => GeneralizedTerm::BlankNode(BlankNode { id }),
1011 OwnedTerm::LiteralSimple(value) => {
1012 GeneralizedTerm::Literal(Literal::Simple { value })
1013 }
1014 OwnedTerm::LiteralLanguage(value, language) => {
1015 GeneralizedTerm::Literal(Literal::LanguageTaggedString { value, language })
1016 }
1017 OwnedTerm::LiteralDatatype(value, iri) => {
1018 GeneralizedTerm::Literal(Literal::Typed {
1019 value,
1020 datatype: NamedNode { iri },
1021 })
1022 }
1023 OwnedTerm::Variable(name) => GeneralizedTerm::Variable(Variable { name }),
1024 OwnedTerm::Triple(_) => {
1025 unimplemented!()
1026 }
1027 }
1028 }
1029 }
1030}