rio_turtle/
gtrig.rs

1//! Implementation of a generalized RDF / RDF-star version of the Trig syntax
2
3use crate::error::*;
4use crate::gnquads::parse_variable;
5use crate::gtriple_allocator::GeneralizedTripleAllocator;
6use crate::shared::*;
7use crate::turtle::*;
8use crate::utils::*;
9use oxiri::Iri;
10use rio_api::model::*;
11use rio_api::parser::GeneralizedQuadsParser;
12use std::collections::HashMap;
13use std::io::BufRead;
14use std::mem::swap;
15
16/// A [TriG](https://www.w3.org/TR/trig/) streaming parser parsing generalized quads.
17///
18/// It implements the `GeneralizedQuadsParser` trait.
19/// Using it requires to enable the `generalized` feature.
20///
21///
22/// Count the number of people using the `QuadsParser` API:
23/// ```
24/// use rio_turtle::{GTriGParser, TurtleError};
25/// use rio_api::parser::GeneralizedQuadsParser;
26/// use rio_api::model::NamedNode;
27///
28/// let file = b"@prefix schema: <http://schema.org/> .
29/// <http://example/> {
30///     <http://example.com/foo> a schema:Person ;
31///         schema:name  ?name .
32///     <http://example.com/bar> a schema:Person ;
33///         schema:name  ?name .
34/// }";
35///
36/// let rdf_type = NamedNode { iri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" };
37/// let schema_person = NamedNode { iri: "http://schema.org/Person" };
38/// let mut count = 0;
39/// GTriGParser::new(file.as_ref(), None).parse_all(&mut |t| {
40///     if t.predicate == rdf_type.into() && t.object == schema_person.into() {
41///         count += 1;
42///     }
43///     Ok(()) as Result<(), TurtleError>
44/// })?;
45/// assert_eq!(2, count);
46/// # Result::<_,rio_turtle::TurtleError>::Ok(())
47/// ```
48pub struct GTriGParser<R: BufRead> {
49    read: LookAheadByteReader<R>,
50    base_iri: Option<Iri<String>>,
51    prefixes: HashMap<String, String>,
52    bnode_id_generator: BlankNodeIdGenerator,
53    triple_alloc: GeneralizedTripleAllocator,
54    graph_name_alloc: GeneralizedTripleAllocator,
55    temp_buf: String,
56}
57
58impl<R: BufRead> GTriGParser<R> {
59    /// Builds the parser from a `BufRead` implementation, and a base IRI for relative IRI resolution.
60    pub fn new(reader: R, base_iri: Option<Iri<String>>) -> Self {
61        Self {
62            read: LookAheadByteReader::new(reader),
63            base_iri,
64            prefixes: HashMap::default(),
65            bnode_id_generator: BlankNodeIdGenerator::default(),
66            triple_alloc: GeneralizedTripleAllocator::new(),
67            graph_name_alloc: GeneralizedTripleAllocator::new(),
68            temp_buf: String::with_capacity(64),
69        }
70    }
71
72    fn make_quad(&self) -> GeneralizedQuad<'_> {
73        self.triple_alloc
74            .top_quad(self.graph_name_alloc.current_subject())
75    }
76}
77
78impl<R: BufRead> GeneralizedQuadsParser for GTriGParser<R> {
79    type Error = TurtleError;
80
81    fn parse_step<E: From<TurtleError>>(
82        &mut self,
83        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
84    ) -> Result<(), E> {
85        self.parse_generalized_block_or_directive(on_quad)
86    }
87
88    fn is_end(&self) -> bool {
89        self.read.current().is_none()
90    }
91}
92
93impl<R: BufRead> GTriGParser<R> {
94    fn parse_generalized_block_or_directive<E: From<TurtleError>>(
95        &mut self,
96        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
97    ) -> Result<(), E> {
98        // [1g] 	trigDoc 	::= 	(directive | block)*
99        // [2g] 	block 	::= 	triplesOrGraph | wrappedGraph | triples2 | "GRAPH" labelOrSubject wrappedGraph
100        skip_whitespace(&mut self.read)?;
101
102        if self.read.current().is_none() {
103            Ok(())
104        } else if self.read.starts_with(b"@prefix") {
105            self.parse_generalized_prefix_id()?;
106            Ok(())
107        } else if self.read.starts_with(b"@base") {
108            self.base_iri = Some(parse_base(
109                &mut self.read,
110                &mut self.temp_buf,
111                &self.base_iri,
112            )?);
113            Ok(())
114        } else if self.read.starts_with_ignore_ascii_case(b"BASE")
115            && self
116                .read
117                .ahead(4)?
118                .map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
119        {
120            self.base_iri = Some(parse_sparql_base(
121                &mut self.read,
122                &mut self.temp_buf,
123                &self.base_iri,
124            )?);
125            Ok(())
126        } else if self.read.starts_with_ignore_ascii_case(b"PREFIX")
127            && self
128                .read
129                .ahead(6)?
130                .map_or(true, |c| c.is_ascii_whitespace())
131        {
132            self.parse_generalized_sparql_prefix()?;
133            Ok(())
134        } else if self.read.starts_with_ignore_ascii_case(b"GRAPH")
135            && self
136                .read
137                .ahead(5)?
138                .map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
139        {
140            self.read.consume_many("GRAPH".len())?;
141            skip_whitespace(&mut self.read)?;
142            self.graph_name_alloc.push_triple_start();
143            self.parse_generalized_term(0, true)?;
144            skip_whitespace(&mut self.read)?;
145            self.parse_generalized_wrapped_graph(on_quad)?;
146            self.graph_name_alloc.pop_term(0);
147            self.graph_name_alloc.pop_top_empty_triple();
148
149            debug_assert_eq!(self.triple_alloc.complete_len(), 0);
150            debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
151            debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
152            debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
153
154            Ok(())
155        } else if self.read.current() == Some(b'{') {
156            self.parse_generalized_wrapped_graph(on_quad)?;
157
158            debug_assert_eq!(self.triple_alloc.complete_len(), 0);
159            debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
160            debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
161            debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
162
163            Ok(())
164        } else if self.read.current() == Some(b'[')
165            && !is_followed_by_space_and_closing_bracket(&mut self.read)?
166            || self.read.current() == Some(b'(')
167        {
168            self.parse_generalized_triples2(on_quad)?;
169
170            debug_assert_eq!(self.triple_alloc.complete_len(), 0);
171            debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
172            debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
173            debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
174
175            Ok(())
176        } else {
177            self.parse_generalized_triples_or_graph(on_quad)?;
178
179            debug_assert_eq!(self.triple_alloc.complete_len(), 0);
180            debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
181            debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
182            debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
183
184            Ok(())
185        }
186    }
187
188    fn parse_generalized_prefix_id(&mut self) -> Result<(), TurtleError> {
189        // [4] 	prefixID 	::= 	'@prefix' PNAME_NS IRIREF '.'
190        self.read.consume_many("@prefix".len())?;
191        skip_whitespace(&mut self.read)?;
192
193        let mut prefix = String::default();
194        parse_pname_ns(&mut self.read, &mut prefix)?;
195        skip_whitespace(&mut self.read)?;
196
197        let mut value = String::default();
198        parse_generalized_iriref(
199            &mut self.read,
200            &mut value,
201            &mut self.temp_buf,
202            self.base_iri.as_ref(),
203        )?;
204        skip_whitespace(&mut self.read)?;
205
206        self.read.check_is_current(b'.')?;
207        self.read.consume()?;
208
209        self.prefixes.insert(prefix, value);
210        Ok(())
211    }
212
213    fn parse_generalized_sparql_prefix(&mut self) -> Result<(), TurtleError> {
214        // [6s] 	sparqlPrefix 	::= 	"PREFIX" PNAME_NS IRIREF
215        self.read.consume_many("PREFIX".len())?;
216        skip_whitespace(&mut self.read)?;
217
218        let mut prefix = String::default();
219        parse_pname_ns(&mut self.read, &mut prefix)?;
220        skip_whitespace(&mut self.read)?;
221
222        let mut value = String::default();
223        parse_generalized_iriref(
224            &mut self.read,
225            &mut value,
226            &mut self.temp_buf,
227            self.base_iri.as_ref(),
228        )?;
229        skip_whitespace(&mut self.read)?;
230
231        self.prefixes.insert(prefix, value);
232        Ok(())
233    }
234
235    fn parse_generalized_wrapped_graph<E: From<TurtleError>>(
236        &mut self,
237        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
238    ) -> Result<(), E> {
239        // [5g] 	wrappedGraph 	::= 	'{' triplesBlock? '}'
240        // [6g] 	triplesBlock 	::= 	triples ('.' triplesBlock?)?
241        self.read.check_is_current(b'{')?;
242        self.read.consume()?;
243        skip_whitespace(&mut self.read)?;
244
245        loop {
246            if self.read.current() == Some(b'}') {
247                self.read.consume()?;
248                break;
249            }
250
251            self.parse_generalized_triples(on_quad)?;
252            debug_assert_eq!(self.triple_alloc.complete_len(), 0);
253            debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
254            match self.read.required_current()? {
255                b'.' => {
256                    self.read.consume()?;
257                    skip_whitespace(&mut self.read)?;
258                }
259                b'}' => {
260                    self.read.consume()?;
261                    break;
262                }
263                _ => self.read.unexpected_char_error()?,
264            }
265        }
266        Ok(())
267    }
268
269    fn parse_generalized_triples<E: From<TurtleError>>(
270        &mut self,
271        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
272    ) -> Result<(), E> {
273        // [6] 	triples 	::= 	subject predicateObjectList | blankNodePropertyList predicateObjectList?
274        match self.read.current() {
275            Some(b'[') if !is_followed_by_space_and_closing_bracket(&mut self.read)? => {
276                let bn = self.parse_generalized_blank_node_property_list(on_quad)?;
277                skip_whitespace(&mut self.read)?;
278                if self.read.current() != Some(b'.') && self.read.current() != Some(b'}') {
279                    self.triple_alloc.push_triple_start();
280                    self.triple_alloc.try_push_atom(0, |b, _| {
281                        b.push_str(bn.as_ref());
282                        Ok(GeneralizedTerm::from(BlankNode { id: b }))
283                    })?;
284                    self.parse_generalized_predicate_object_list(on_quad)?;
285                    self.triple_alloc.pop_term(0);
286                    self.triple_alloc.pop_top_empty_triple();
287                }
288            }
289            _ => {
290                self.triple_alloc.push_triple_start();
291                self.parse_generalized_node(0, on_quad)?;
292                skip_whitespace(&mut self.read)?;
293                self.parse_generalized_predicate_object_list(on_quad)?;
294                self.triple_alloc.pop_term(0);
295                self.triple_alloc.pop_top_empty_triple();
296            }
297        }
298        Ok(())
299    }
300
301    fn parse_generalized_triples2<E: From<TurtleError>>(
302        &mut self,
303        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
304    ) -> Result<(), E> {
305        // [4g] 	triples2 	::= 	blankNodePropertyList predicateObjectList? '.' | collection predicateObjectList '.'
306        match self.read.current() {
307            Some(b'[') => {
308                let bn = self.parse_generalized_blank_node_property_list(on_quad)?;
309                skip_whitespace(&mut self.read)?;
310                if self.read.current() != Some(b'.') {
311                    self.triple_alloc.push_triple_start();
312                    self.triple_alloc.try_push_atom(0, |b, _| {
313                        b.push_str(bn.as_ref());
314                        Ok(GeneralizedTerm::from(BlankNode { id: b }))
315                    })?;
316                    self.parse_generalized_predicate_object_list(on_quad)?;
317                    self.triple_alloc.pop_term(0);
318                    self.triple_alloc.pop_top_empty_triple();
319                }
320            }
321            _ => {
322                let collec = self.parse_generalized_collection(on_quad)?;
323                self.triple_alloc.push_triple_start();
324                self.triple_alloc
325                    .try_push_atom(0, |b, _| allocate_collection(collec, b))?;
326                skip_whitespace(&mut self.read)?;
327                self.parse_generalized_predicate_object_list(on_quad)?;
328                self.triple_alloc.pop_term(0);
329                self.triple_alloc.pop_top_empty_triple();
330            }
331        }
332        self.read.check_is_current(b'.')?;
333        self.read.consume()?;
334        Ok(())
335    }
336
337    fn parse_generalized_triples_or_graph<E: From<TurtleError>>(
338        &mut self,
339        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
340    ) -> Result<(), E> {
341        // [3g] 	triplesOrGraph 	::= 	labelOrSubject (wrappedGraph | predicateObjectList '.')
342        self.triple_alloc.push_triple_start();
343        self.parse_generalized_node(0, on_quad)?;
344        skip_whitespace(&mut self.read)?;
345
346        if self.read.current() == Some(b'{') {
347            // what was supposed to be a subject is in fact a graph name
348            swap(&mut self.triple_alloc, &mut self.graph_name_alloc);
349            self.parse_generalized_wrapped_graph(on_quad)?;
350            self.graph_name_alloc.pop_term(0);
351            self.graph_name_alloc.pop_top_empty_triple();
352        } else {
353            self.parse_generalized_predicate_object_list(on_quad)?;
354            self.triple_alloc.pop_term(0);
355            self.triple_alloc.pop_top_empty_triple();
356
357            self.read.check_is_current(b'.')?;
358            self.read.consume()?;
359        }
360        Ok(())
361    }
362
363    fn parse_generalized_blank_node_property_list<E: From<TurtleError>>(
364        &mut self,
365        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
366    ) -> Result<BlankNodeId, E> {
367        self.read.check_is_current(b'[')?;
368        self.read.consume()?;
369        skip_whitespace(&mut self.read)?;
370
371        let id = self.bnode_id_generator.generate();
372        if self.read.current() == Some(b']') {
373            self.read.consume()?;
374            return Ok(id);
375        }
376
377        self.triple_alloc.push_triple_start();
378        self.triple_alloc.try_push_atom(0, |b, _| {
379            b.push_str(id.as_ref());
380            Ok(GeneralizedTerm::from(BlankNode { id: b }))
381        })?;
382
383        loop {
384            self.parse_generalized_predicate_object_list(on_quad)?;
385            skip_whitespace(&mut self.read)?;
386
387            if self.read.current() == Some(b']') {
388                break;
389            }
390        }
391        self.read.consume()?;
392        self.triple_alloc.pop_term(0);
393        self.triple_alloc.pop_top_empty_triple();
394        Ok(id)
395    }
396
397    fn parse_generalized_collection<E: From<TurtleError>>(
398        &mut self,
399        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
400    ) -> Result<Option<BlankNodeId>, E> {
401        // [15] 	collection 	::= 	'(' object* ')'
402        self.read.check_is_current(b'(')?;
403        self.read.consume()?;
404        let mut root: Option<BlankNodeId> = None;
405        loop {
406            skip_whitespace(&mut self.read)?;
407
408            if self.read.current().is_none() {
409                self.read.unexpected_char_error()?;
410                unreachable!(); // unexpected_char always errs
411            } else if self.read.current() != Some(b')') {
412                let new = self.bnode_id_generator.generate();
413                if root.is_none() {
414                    root = Some(new);
415                    self.triple_alloc.push_triple_start();
416                } else {
417                    self.triple_alloc.try_push_atom(1, |_, _| {
418                        Ok(GeneralizedTerm::from(NamedNode { iri: RDF_REST }))
419                    })?;
420                    self.triple_alloc.try_push_atom(2, |b, _| {
421                        b.push_str(new.as_ref());
422                        Ok(GeneralizedTerm::from(BlankNode { id: b }))
423                    })?;
424                    on_quad(self.make_quad())?;
425                    self.triple_alloc.pop_term(2);
426                    self.triple_alloc.pop_term(1);
427                    self.triple_alloc.pop_term(0);
428                }
429
430                self.triple_alloc.try_push_atom(0, |b, _| {
431                    b.push_str(new.as_ref());
432                    Ok(GeneralizedTerm::from(BlankNode { id: b }))
433                })?;
434                self.triple_alloc.try_push_atom(1, |_, _| {
435                    Ok(GeneralizedTerm::from(NamedNode { iri: RDF_FIRST }))
436                })?;
437                self.parse_generalized_node(2, on_quad)?;
438                on_quad(self.make_quad())?;
439                self.triple_alloc.pop_term(2);
440                self.triple_alloc.pop_term(1);
441            } else {
442                // trailing ')'
443                break;
444            }
445        }
446        self.read.consume()?;
447        if root.is_some() {
448            self.triple_alloc.try_push_atom(1, |_, _| {
449                Ok(GeneralizedTerm::from(NamedNode { iri: RDF_REST }))
450            })?;
451            self.triple_alloc.try_push_atom(2, |_, _| {
452                Ok(GeneralizedTerm::from(NamedNode { iri: RDF_NIL }))
453            })?;
454            on_quad(self.make_quad())?;
455            self.triple_alloc.pop_top_triple();
456        }
457        Ok(root)
458    }
459
460    fn parse_generalized_predicate_object_list<E: From<TurtleError>>(
461        &mut self,
462        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
463    ) -> Result<(), E> {
464        // [7] 	predicateObjectList 	::= 	verb objectList (';' (verb objectList)?)*
465        loop {
466            self.parse_generalized_verb(on_quad)?;
467            skip_whitespace(&mut self.read)?;
468
469            self.parse_generalized_object_list(on_quad)?;
470            skip_whitespace(&mut self.read)?;
471
472            self.triple_alloc.pop_term(1);
473            if self.read.current() != Some(b';') {
474                return Ok(());
475            }
476            while self.read.current() == Some(b';') {
477                self.read.consume()?;
478                skip_whitespace(&mut self.read)?;
479            }
480            match self.read.current() {
481                Some(b'.') | Some(b']') | Some(b'}') | None => return Ok(()),
482                Some(b'|') => return Ok(()),
483                _ => (), //continue
484            }
485        }
486    }
487
488    fn parse_generalized_verb<E: From<TurtleError>>(
489        &mut self,
490        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
491    ) -> Result<(), E> {
492        // [9] 	verb 	::= 	predicate | 'a'
493        if self.read.current() == Some(b'a') {
494            match self.read.next()? {
495                // We check that it is not a prefixed URI
496                Some(c)
497                    if is_possible_pn_chars_ascii(c) || c == b'.' || c == b':' || c > MAX_ASCII =>
498                {
499                    self.parse_generalized_node(1, on_quad)
500                }
501                _ => {
502                    self.read.consume()?;
503                    self.triple_alloc.try_push_atom(1, |_, _| {
504                        Ok(GeneralizedTerm::from(NamedNode { iri: RDF_TYPE }))
505                    })
506                }
507            }
508        } else {
509            self.parse_generalized_node(1, on_quad)
510        }
511    }
512
513    fn parse_generalized_object_list<E: From<TurtleError>>(
514        &mut self,
515        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
516    ) -> Result<(), E> {
517        // [8] 	objectList 	::= 	object (',' object)*
518        loop {
519            self.parse_generalized_node(2, on_quad)?;
520            on_quad(self.make_quad())?;
521
522            skip_whitespace(&mut self.read)?;
523            if self.read.current() == Some(b'{') {
524                self.read.check_is_next(b'|')?;
525                self.read.consume_many(2)?;
526                skip_whitespace(&mut self.read)?;
527
528                self.triple_alloc.push_triple_start();
529                self.triple_alloc.push_quoted_triple(0);
530                self.parse_generalized_predicate_object_list(on_quad)?;
531
532                self.read.check_is_current(b'|')?;
533                self.read.check_is_next(b'}')?;
534                self.read.consume_many(2)?;
535                skip_whitespace(&mut self.read)?;
536                self.triple_alloc.pop_annotation_triple();
537            }
538
539            self.triple_alloc.pop_term(2);
540            if self.read.current() != Some(b',') {
541                return Ok(());
542            }
543            self.read.consume()?;
544            skip_whitespace(&mut self.read)?;
545        }
546    }
547
548    fn parse_generalized_node<E: From<TurtleError>>(
549        &mut self,
550        pos: usize,
551        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
552    ) -> Result<(), E> {
553        //[10] 	subject 	::= 	iri | BlankNode | collection
554        match self.read.current() {
555            Some(b'[') => {
556                let bn = self.parse_generalized_blank_node_property_list(on_quad)?;
557                self.triple_alloc.try_push_atom(pos, |b, _| {
558                    b.push_str(bn.as_ref());
559                    Ok(GeneralizedTerm::from(BlankNode { id: b }))
560                })
561            }
562            Some(b'(') => {
563                let collec = self.parse_generalized_collection(on_quad)?;
564                self.triple_alloc
565                    .try_push_atom(pos, |b, _| allocate_collection(collec, b))?;
566                Ok(())
567            }
568            _ => {
569                self.parse_generalized_term(pos, false)?;
570                Ok(())
571            }
572        }
573    }
574
575    fn parse_generalized_term(&mut self, pos: usize, graph_name: bool) -> Result<(), TurtleError> {
576        let read = &mut self.read;
577        let alloc = if graph_name {
578            &mut self.graph_name_alloc
579        } else {
580            &mut self.triple_alloc
581        };
582        match read.required_current()? {
583            b'<' => {
584                if read.required_next()? == b'<' {
585                    read.consume_many(2)?;
586                    skip_whitespace(read)?;
587
588                    alloc.push_triple_start();
589                    // at this point, we need to drop read and alloc to be able to use self in the loop
590                    for i in 0..3 {
591                        self.parse_generalized_term(i, graph_name)?;
592                        skip_whitespace(&mut self.read)?;
593                    }
594                    self.read.check_is_current(b'>')?;
595                    self.read.check_is_next(b'>')?;
596                    self.read.consume_many(2)?;
597                    // reassign alloc, since we had to drop it above (just before 'for' loop)
598                    let alloc = if graph_name {
599                        &mut self.graph_name_alloc
600                    } else {
601                        &mut self.triple_alloc
602                    };
603                    alloc.push_quoted_triple(pos);
604                    Ok(())
605                } else {
606                    let temp_buf = &mut &mut self.temp_buf;
607                    let base_iri = self.base_iri.as_ref();
608                    alloc.try_push_atom(pos, |b, _| {
609                        parse_generalized_iriref(read, b, temp_buf, base_iri)?;
610                        Ok(GeneralizedTerm::from(NamedNode { iri: b }))
611                    })
612                }
613            }
614            b'_' | b'[' => {
615                let bnode_id_generator = &mut self.bnode_id_generator;
616                alloc.try_push_atom(pos, |b, _| {
617                    parse_blank_node(read, b, bnode_id_generator).map(GeneralizedTerm::from)
618                })
619            }
620            b'"' | b'\'' | b'+' | b'-' | b'.' | b'0'..=b'9' => {
621                let temp_buf = &mut &mut self.temp_buf;
622                let base_iri = &self.base_iri;
623                let prefixes = &self.prefixes;
624                alloc.try_push_atom(pos, |b1, b2| {
625                    parse_literal(read, b1, b2, temp_buf, base_iri, prefixes)
626                        .map(GeneralizedTerm::from)
627                })
628            }
629            b'?' | b'$' => alloc.try_push_atom(pos, |b, _| {
630                parse_variable(read, b).map(GeneralizedTerm::from)
631            }),
632            _ => {
633                let base_iri = &self.base_iri;
634                let prefixes = &self.prefixes;
635                if read.starts_with(b"true")
636                    && read.ahead(4)?.map_or(true, |c| {
637                        c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
638                    })
639                    || read.starts_with(b"false")
640                        && read.ahead(5)?.map_or(true, |c| {
641                            c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
642                        })
643                {
644                    let temp_buf = &mut &mut self.temp_buf;
645                    alloc.try_push_atom(pos, |b1, b2| {
646                        parse_literal(read, b1, b2, temp_buf, base_iri, prefixes)
647                            .map(GeneralizedTerm::from)
648                    })
649                } else {
650                    alloc.try_push_atom(pos, |b, _| {
651                        parse_prefixed_name(read, b, prefixes).map(GeneralizedTerm::from)
652                    })
653                }
654            }
655        }
656    }
657}
658
659pub fn parse_generalized_iriref(
660    read: &mut LookAheadByteReader<impl BufRead>,
661    buffer: &mut String,
662    temp_buf: &mut String,
663    base_iri: Option<&Iri<String>>,
664) -> Result<(), TurtleError> {
665    if let Some(base_iri) = base_iri {
666        parse_iriref(read, temp_buf)?;
667        let result = base_iri.resolve_into(temp_buf, buffer).map_err(|error| {
668            read.parse_error(TurtleErrorKind::InvalidIri {
669                iri: temp_buf.to_owned(),
670                error,
671            })
672        });
673        temp_buf.clear();
674        result
675    } else {
676        parse_iriref(read, buffer)
677    }
678}
679
680fn parse_literal<'a>(
681    read: &mut LookAheadByteReader<impl BufRead>,
682    buffer: &'a mut String,
683    annotation_buffer: &'a mut String,
684    temp_buf: &mut String,
685    base_iri: &Option<Iri<String>>,
686    prefixes: &HashMap<String, String>,
687) -> Result<Literal<'a>, TurtleError> {
688    // [13] 	literal 	::= 	RDFLiteral | NumericLiteral | BooleanLiteral
689    match read.required_current()? {
690        b'"' | b'\'' => {
691            match parse_rdf_literal(
692                read,
693                buffer,
694                annotation_buffer,
695                temp_buf,
696                base_iri,
697                prefixes,
698            )? {
699                Literal::LanguageTaggedString { .. } => Ok(Literal::LanguageTaggedString {
700                    value: buffer,
701                    language: annotation_buffer,
702                }),
703                Literal::Simple { .. } => Ok(Literal::Simple { value: buffer }),
704                Literal::Typed { .. } => Ok(Literal::Typed {
705                    value: buffer,
706                    datatype: NamedNode {
707                        iri: annotation_buffer,
708                    },
709                }),
710            }
711        }
712        b'+' | b'-' | b'.' | b'0'..=b'9' => {
713            match parse_numeric_literal(read, buffer)? {
714                Literal::Typed { datatype, .. } => {
715                    annotation_buffer.push_str(datatype.iri);
716                }
717                _ => unreachable!(),
718            }
719            Ok(Literal::Typed {
720                value: buffer,
721                datatype: NamedNode {
722                    iri: annotation_buffer,
723                },
724            })
725        }
726        _ => {
727            match parse_boolean_literal(read, buffer)? {
728                Literal::Typed { datatype, .. } => {
729                    annotation_buffer.push_str(datatype.iri);
730                }
731                _ => unreachable!(),
732            }
733            Ok(Literal::Typed {
734                value: buffer,
735                datatype: NamedNode {
736                    iri: annotation_buffer,
737                },
738            })
739        }
740    }
741}
742
743#[allow(clippy::unnecessary_wraps)]
744fn allocate_collection(
745    collection: Option<BlankNodeId>,
746    buffer: &mut String,
747) -> Result<GeneralizedTerm<'_>, TurtleError> {
748    match collection {
749        Some(id) => {
750            buffer.push_str(id.as_ref());
751            Ok(BlankNode { id: buffer }.into())
752        }
753        None => Ok(NamedNode { iri: RDF_NIL }.into()),
754    }
755}
756
757//
758
759#[cfg(test)]
760mod test {
761    use super::*;
762    use std::io::Cursor;
763
764    const OK_TURTLE_ERROR: Result<(), TurtleError> = Ok(());
765
766    type OwnedQuad = (OwnedTerm, OwnedTerm, OwnedTerm, Option<OwnedTerm>);
767
768    #[test]
769    fn relative_iri_references() -> Result<(), TurtleError> {
770        let got = parse_gtrig(
771            r#"
772          <../s1> <#p1> </o1>.
773          { <../s2> <#p2> </o2> }
774          <//g3> { <../s3> <#p3> </o3> }
775          GRAPH <//g4> { <../s4> <#p4> </o4> }
776        "#,
777        )?;
778
779        let expected = parse_gnq(
780            r#"
781          <../s1> <#p1> </o1>.
782          <../s2> <#p2> </o2>.
783          <../s3> <#p3> </o3> <//g3>.
784          <../s4> <#p4> </o4> <//g4>.
785        "#,
786        )?;
787
788        assert_eq!(expected, got);
789        Ok(())
790    }
791
792    #[test]
793    fn relative_prefixes() -> Result<(), TurtleError> {
794        let got = parse_gtrig(
795            r#"
796          @prefix s: <../>.
797          PREFIX p: <#>
798          PREFIX o: </>
799          PREFIX g: <//>
800
801          s:s1 p:p1 o:o1.
802          { s:s2 p:p2 o:o2 }
803          g:g3 { s:s3 p:p3 o:o3 }
804          GRAPH g:g4 { s:s4 p:p4 o:o4 }
805        "#,
806        )?;
807
808        let expected = parse_gnq(
809            r#"
810          <../s1> <#p1> </o1>.
811          <../s2> <#p2> </o2>.
812          <../s3> <#p3> </o3> <//g3>.
813          <../s4> <#p4> </o4> <//g4>.
814        "#,
815        )?;
816
817        assert_eq!(expected, got);
818        Ok(())
819    }
820
821    #[test]
822    fn all_variables() -> Result<(), TurtleError> {
823        let got = parse_gtrig(
824            r#"
825          ?s1 ?p1 ?o1.
826          { ?s2 ?p2 ?o2 }
827          ?g3 { ?s3 ?p3 ?o3 }
828          GRAPH ?g4 { ?s4 ?p4 ?o4 }
829        "#,
830        )?;
831
832        let expected = parse_gnq(
833            r#"
834          ?s1 ?p1 ?o1.
835          ?s2 ?p2 ?o2.
836          ?s3 ?p3 ?o3 ?g3.
837          ?s4 ?p4 ?o4 ?g4.
838        "#,
839        )
840        .unwrap();
841
842        assert_eq!(expected, got);
843        Ok(())
844    }
845
846    #[test]
847    fn all_literals() -> Result<(), TurtleError> {
848        let got = parse_gtrig(
849            r#"
850          "s1" "p1" "o1".
851          { "s2" "p2" "o2" }
852          "g3" { "s3" "p3" "o3" }
853          GRAPH "g4" { "s4" "p4" "o4" }
854        "#,
855        )?;
856
857        let expected = parse_gnq(
858            r#"
859          "s1" "p1" "o1".
860          "s2" "p2" "o2".
861          "s3" "p3" "o3" "g3".
862          "s4" "p4" "o4" "g4".
863        "#,
864        )
865        .unwrap();
866
867        assert_eq!(expected, got);
868        Ok(())
869    }
870
871    #[test]
872    fn all_quoted_triples() -> Result<(), TurtleError> {
873        let got = parse_gtrig(
874            r#"@prefix : <#>.
875          << :ss1 _:ps1 "os1" >> << _:sp1 "pp1" ?op1 >> << "so1" ?po1 :oo1 >>.
876          { << ?ss2 :ps2  _:os2 >> << :sp2 "pp2" _:op2 >> << "so2" _:po2 ?oo2 >> }
877          << _:sg3 ?pg3  :og3 >> { << ?ss3 :ps3 ?os3 >> << :sp3 ?pp3 _:op3 >> << ?so3 _:po3 "oo3" >> }
878          GRAPH << _:sg4 "pg4" :og4 >> { << "ss4" :ps4 _:os4 >> << :sp4 _:pp4 ?op4 >> << _:so4 ?po4 "oo4" >> }
879        "#,
880        )?;
881
882        let expected = parse_gnq(r#"
883          << <#ss1> _:ps1 "os1" >> << _:sp1 "pp1" ?op1 >> << "so1" ?po1 <#oo1> >>.
884          << ?ss2 <#ps2> _:os2 >> << <#sp2> "pp2" _:op2 >> << "so2" _:po2 ?oo2 >>.
885          << ?ss3 <#ps3> ?os3 >> << <#sp3> ?pp3 _:op3 >> << ?so3 _:po3 "oo3" >> << _:sg3 ?pg3  <#og3> >>.
886          << "ss4" <#ps4> _:os4 >> << <#sp4> _:pp4 ?op4 >> << _:so4 ?po4 "oo4" >> << _:sg4 "pg4" <#og4> >>.
887        "#).unwrap();
888
889        assert_eq!(expected, got);
890        Ok(())
891    }
892
893    #[test]
894    fn deeply_nested_triple() -> Result<(), TurtleError> {
895        let got = parse_gtrig(
896            r#"@prefix : <#>.
897          << << :a :b :c >> << :d :e :f >> << :g :h :i >> >> {
898            << << :j :k :l >> << :m :n :o >> << :p :q :r >> >>
899            << << :s :t :u >> << :v :w :x >> << :y :z :A >> >>
900            << << :B :C :D >> << :E :F :G >> << :H :I :J >> >>
901        }"#,
902        )?;
903        let expected = parse_gnq(r#"
904            << << <#j> <#k> <#l> >> << <#m> <#n> <#o> >> << <#p> <#q> <#r> >> >>    << << <#s> <#t> <#u> >> << <#v> <#w> <#x> >> << <#y> <#z> <#A> >> >>    << << <#B> <#C> <#D> >> << <#E> <#F> <#G> >> << <#H> <#I> <#J> >> >>    << << <#a> <#b> <#c> >> << <#d> <#e> <#f> >> << <#g> <#h> <#i> >> >>.
905        "#).unwrap();
906
907        assert_eq!(expected, got);
908        Ok(())
909    }
910
911    #[test]
912    fn composite_predicate() -> Result<(), TurtleError> {
913        let gtrig = r#"
914          ?s [ ?p ?o1 ] ?o2 .
915        "#;
916
917        let mut got: Vec<OwnedQuad> = Vec::with_capacity(2);
918
919        GTriGParser::new(
920            Cursor::new(gtrig),
921            Some(Iri::parse("http://example.org/base/".to_owned()).unwrap()),
922        )
923        .parse_all(&mut |quad| {
924            got.push((
925                quad.subject.into(),
926                quad.predicate.into(),
927                quad.object.into(),
928                quad.graph_name.map(OwnedTerm::from),
929            ));
930            OK_TURTLE_ERROR
931        })?;
932
933        assert_eq!(v("p"), got[0].1);
934        assert_eq!(v("o1"), got[0].2);
935        assert_eq!(v("s"), got[1].0);
936        assert_eq!(v("o2"), got[1].2);
937        assert_eq!(got[0].0, got[1].1);
938        Ok(())
939    }
940
941    fn parse_gtrig(txt: &str) -> Result<Vec<OwnedQuad>, TurtleError> {
942        let mut got = Vec::new();
943        GTriGParser::new(Cursor::new(txt), None).parse_all(&mut |quad| {
944            got.push((
945                quad.subject.into(),
946                quad.predicate.into(),
947                quad.object.into(),
948                quad.graph_name.map(OwnedTerm::from),
949            ));
950            OK_TURTLE_ERROR
951        })?;
952        Ok(got)
953    }
954
955    fn parse_gnq(txt: &str) -> Result<Vec<OwnedQuad>, TurtleError> {
956        let mut got = Vec::new();
957        crate::GeneralizedNQuadsParser::new(Cursor::new(txt)).parse_all(&mut |quad| {
958            got.push((
959                quad.subject.into(),
960                quad.predicate.into(),
961                quad.object.into(),
962                quad.graph_name.map(OwnedTerm::from),
963            ));
964            OK_TURTLE_ERROR
965        })?;
966        Ok(got)
967    }
968
969    fn v(value: &str) -> OwnedTerm {
970        OwnedTerm::Variable(value.to_string())
971    }
972
973    impl<'a> From<GeneralizedTerm<'a>> for OwnedTerm {
974        fn from(other: GeneralizedTerm<'a>) -> OwnedTerm {
975            match other {
976                GeneralizedTerm::NamedNode(n) => OwnedTerm::NamedNode(n.iri.to_string()),
977                GeneralizedTerm::BlankNode(n) => OwnedTerm::BlankNode(n.id.to_string()),
978                GeneralizedTerm::Literal(Literal::Simple { value }) => {
979                    OwnedTerm::LiteralSimple(value.to_string())
980                }
981                GeneralizedTerm::Literal(Literal::LanguageTaggedString { value, language }) => {
982                    OwnedTerm::LiteralLanguage(value.to_string(), language.to_string())
983                }
984                GeneralizedTerm::Literal(Literal::Typed { value, datatype }) => {
985                    OwnedTerm::LiteralDatatype(value.to_string(), datatype.iri.to_string())
986                }
987                GeneralizedTerm::Variable(n) => OwnedTerm::Variable(n.name.to_string()),
988                GeneralizedTerm::Triple(t) => {
989                    OwnedTerm::Triple(Box::new([t[0].into(), t[1].into(), t[2].into()]))
990                }
991            }
992        }
993    }
994
995    #[derive(Clone, Debug, PartialEq)]
996    enum OwnedTerm {
997        NamedNode(String),
998        BlankNode(String),
999        LiteralSimple(String),
1000        LiteralLanguage(String, String),
1001        LiteralDatatype(String, String),
1002        Variable(String),
1003        Triple(Box<[OwnedTerm; 3]>),
1004    }
1005
1006    impl<'a> From<&'a OwnedTerm> for GeneralizedTerm<'a> {
1007        fn from(other: &'a OwnedTerm) -> GeneralizedTerm<'a> {
1008            match other {
1009                OwnedTerm::NamedNode(iri) => GeneralizedTerm::NamedNode(NamedNode { iri }),
1010                OwnedTerm::BlankNode(id) => GeneralizedTerm::BlankNode(BlankNode { id }),
1011                OwnedTerm::LiteralSimple(value) => {
1012                    GeneralizedTerm::Literal(Literal::Simple { value })
1013                }
1014                OwnedTerm::LiteralLanguage(value, language) => {
1015                    GeneralizedTerm::Literal(Literal::LanguageTaggedString { value, language })
1016                }
1017                OwnedTerm::LiteralDatatype(value, iri) => {
1018                    GeneralizedTerm::Literal(Literal::Typed {
1019                        value,
1020                        datatype: NamedNode { iri },
1021                    })
1022                }
1023                OwnedTerm::Variable(name) => GeneralizedTerm::Variable(Variable { name }),
1024                OwnedTerm::Triple(_) => {
1025                    unimplemented!()
1026                }
1027            }
1028        }
1029    }
1030}