rio_turtle/
turtle.rs

1//! Implementation of Turtle and Trig RDF syntax
2
3use crate::error::*;
4use crate::shared::*;
5use crate::triple_allocator::TripleAllocator;
6use crate::utils::*;
7use oxiri::Iri;
8use rio_api::model::*;
9use rio_api::parser::{QuadsParser, TriplesParser};
10use std::collections::HashMap;
11use std::io::BufRead;
12use std::str;
13
14/// A [Turtle](https://www.w3.org/TR/turtle/) and [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/#turtle-star) streaming parser.
15///
16/// It implements the [`TriplesParser`] trait.
17///
18///
19/// Count the number of people using the [`TriplesParser`] API:
20/// ```
21/// use rio_turtle::{TurtleParser, TurtleError};
22/// use rio_api::parser::TriplesParser;
23/// use rio_api::model::NamedNode;
24///
25/// let file = b"@prefix schema: <http://schema.org/> .
26/// <http://example.com/foo> a schema:Person ;
27///     schema:name  \"Foo\" .
28/// <http://example.com/bar> a schema:Person ;
29///     schema:name  \"Bar\" .";
30///
31/// let rdf_type = NamedNode { iri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" };
32/// let schema_person = NamedNode { iri: "http://schema.org/Person" };
33/// let mut count = 0;
34/// TurtleParser::new(file.as_ref(), None).parse_all(&mut |t| {
35///     if t.predicate == rdf_type && t.object == schema_person.into() {
36///         count += 1;
37///     }
38///     Ok(()) as Result<(), TurtleError>
39/// })?;
40/// assert_eq!(2, count);
41/// # Result::<_,rio_turtle::TurtleError>::Ok(())
42/// ```
43pub struct TurtleParser<R: BufRead> {
44    read: LookAheadByteReader<R>,
45    base_iri: Option<Iri<String>>,
46    prefixes: HashMap<String, String>,
47    bnode_id_generator: BlankNodeIdGenerator,
48    triple_alloc: TripleAllocator,
49    temp_buf: String,
50}
51
52impl<R: BufRead> TurtleParser<R> {
53    /// Builds the parser from a `BufRead` implementation, and a base IRI for relative IRI resolution.
54    pub fn new(reader: R, base_iri: Option<Iri<String>>) -> Self {
55        let mut triple_alloc = TripleAllocator::new();
56        triple_alloc.push_triple_start();
57        Self {
58            read: LookAheadByteReader::new(reader),
59            base_iri,
60            prefixes: HashMap::default(),
61            bnode_id_generator: BlankNodeIdGenerator::default(),
62            triple_alloc,
63            temp_buf: String::default(),
64        }
65    }
66
67    /// The list of IRI prefixes considered at the current step of the parsing.
68    ///
69    /// This method returns the mapping from prefix name to prefix value.
70    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
71    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
72    ///
73    /// ```
74    /// use std::collections::HashMap;
75    /// use rio_api::model::NamedNode;
76    /// use rio_api::parser::TriplesParser;
77    /// use rio_turtle::{TurtleError, TurtleParser};
78    ///
79    /// let file = b"@prefix schema: <http://schema.org/> .
80    /// @prefix ex: <http://example.com/> .
81    /// ex: a schema:WebSite .
82    /// @prefix ex: <http://example.org/> .
83    /// ex: a schema:WebSite .";
84    ///
85    /// let mut parser = TurtleParser::new(file.as_ref(), None);
86    /// assert_eq!(parser.prefixes(), &HashMap::new()); // No prefix at the beginning
87    ///
88    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |_| panic!("We read the first prefix"));
89    /// assert_eq!(parser.prefixes().len(), 1);
90    /// assert_eq!(parser.prefixes()["schema"], "http://schema.org/");
91    ///
92    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |_| panic!("We read the second prefix"));
93    /// assert_eq!(parser.prefixes().len(), 2);
94    /// assert_eq!(parser.prefixes()["schema"], "http://schema.org/");
95    /// assert_eq!(parser.prefixes()["ex"], "http://example.com/");
96    ///
97    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |t| {
98    ///     assert_eq!(t.subject, NamedNode { iri: "http://example.com/" }.into()); // We read the first triple
99    ///     Ok(())
100    /// });
101    ///
102    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |_| panic!("We read the new version of the ex: prefix"));
103    /// assert_eq!(parser.prefixes().len(), 2);
104    /// assert_eq!(parser.prefixes()["schema"], "http://schema.org/");
105    /// assert_eq!(parser.prefixes()["ex"], "http://example.org/");  
106    ///
107    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |t| {
108    ///     assert_eq!(t.subject, NamedNode { iri: "http://example.org/" }.into()); // We read the second triple
109    ///    Ok(())
110    /// });
111    ///
112    /// # Result::<_,TurtleError>::Ok(())
113    /// ```
114    pub fn prefixes(&self) -> &HashMap<String, String> {
115        &self.prefixes
116    }
117}
118
119impl<R: BufRead> TriplesParser for TurtleParser<R> {
120    type Error = TurtleError;
121
122    fn parse_step<E: From<TurtleError>>(
123        &mut self,
124        on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
125    ) -> Result<(), E> {
126        parse_statement(self, on_triple)
127    }
128
129    fn is_end(&self) -> bool {
130        self.read.current().is_none()
131    }
132}
133
134/// A [TriG](https://www.w3.org/TR/trig/) and [TriG-star](https://w3c.github.io/rdf-star/cg-spec/#trig-star) streaming parser.
135///
136/// It implements the `QuadsParser` trait.
137///
138///
139/// Count the number of people using the `QuadsParser` API:
140/// ```
141/// use rio_turtle::{TriGParser, TurtleError};
142/// use rio_api::parser::QuadsParser;
143/// use rio_api::model::NamedNode;
144///
145/// let file = b"@prefix schema: <http://schema.org/> .
146/// <http://example/> {
147///     <http://example.com/foo> a schema:Person ;
148///         schema:name  \"Foo\" .
149///     <http://example.com/bar> a schema:Person ;
150///         schema:name  \"Bar\" .
151/// }";
152///
153/// let rdf_type = NamedNode { iri: "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" };
154/// let schema_person = NamedNode { iri: "http://schema.org/Person" };
155/// let mut count = 0;
156/// TriGParser::new(file.as_ref(), None).parse_all(&mut |t| {
157///     if t.predicate == rdf_type && t.object == schema_person.into() {
158///         count += 1;
159///     }
160///     Ok(()) as Result<(), TurtleError>
161/// })?;
162/// assert_eq!(2, count);
163/// # Result::<_, TurtleError>::Ok(())
164/// ```
165pub struct TriGParser<R: BufRead> {
166    inner: TurtleParser<R>,
167    graph_name_buf: String,
168}
169
170impl<R: BufRead> TriGParser<R> {
171    /// Builds the parser from a `BufRead` implementation, and a base IRI for relative IRI resolution.
172    pub fn new(reader: R, base_iri: Option<Iri<String>>) -> Self {
173        Self {
174            inner: TurtleParser::new(reader, base_iri),
175            graph_name_buf: String::default(),
176        }
177    }
178
179    /// The list of IRI prefixes considered at the current step of the parsing.
180    ///
181    /// This method returns the mapping from prefix name to prefix value.
182    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
183    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
184    ///
185    /// ```
186    /// use std::collections::HashMap;
187    /// use rio_api::model::NamedNode;
188    /// use rio_api::parser::QuadsParser;
189    /// use rio_turtle::{TurtleError, TriGParser};
190    ///
191    /// let file = b"@prefix schema: <http://schema.org/> .
192    /// @prefix ex: <http://example.com/> .
193    /// ex: a schema:WebSite .
194    /// @prefix ex: <http://example.org/> .
195    /// ex: a schema:WebSite .";
196    ///
197    /// let mut parser = TriGParser::new(file.as_ref(), None);
198    /// assert_eq!(parser.prefixes(), &HashMap::new()); // No prefix at the beginning
199    ///
200    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |_| panic!("We read the first prefix"));
201    /// assert_eq!(parser.prefixes().len(), 1);
202    /// assert_eq!(parser.prefixes()["schema"], "http://schema.org/");
203    ///
204    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |_| panic!("We read the second prefix"));
205    /// assert_eq!(parser.prefixes().len(), 2);
206    /// assert_eq!(parser.prefixes()["schema"], "http://schema.org/");
207    /// assert_eq!(parser.prefixes()["ex"], "http://example.com/");
208    ///
209    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |t| {
210    ///     assert_eq!(t.subject, NamedNode { iri: "http://example.com/" }.into()); // We read the first triple
211    ///     Ok(())
212    /// });
213    ///
214    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |_| panic!("We read the new version of the ex: prefix"));
215    /// assert_eq!(parser.prefixes().len(), 2);
216    /// assert_eq!(parser.prefixes()["schema"], "http://schema.org/");
217    /// assert_eq!(parser.prefixes()["ex"], "http://example.org/");  
218    ///
219    /// let _: Result<_, TurtleError> = parser.parse_step(&mut |t| {
220    ///     assert_eq!(t.subject, NamedNode { iri: "http://example.org/" }.into()); // We read the second triple
221    ///    Ok(())
222    /// });
223    ///
224    /// # Result::<_,TurtleError>::Ok(())
225    /// ```
226    pub fn prefixes(&self) -> &HashMap<String, String> {
227        &self.inner.prefixes
228    }
229}
230
231impl<R: BufRead> QuadsParser for TriGParser<R> {
232    type Error = TurtleError;
233
234    fn parse_step<E: From<TurtleError>>(
235        &mut self,
236        on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
237    ) -> Result<(), E> {
238        parse_block_or_directive(self, on_quad)
239    }
240
241    fn is_end(&self) -> bool {
242        self.inner.read.current().is_none()
243    }
244}
245
246pub(crate) const RDF_TYPE: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
247pub(crate) const RDF_NIL: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil";
248pub(crate) const RDF_FIRST: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#first";
249pub(crate) const RDF_REST: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest";
250pub(crate) const XSD_BOOLEAN: &str = "http://www.w3.org/2001/XMLSchema#boolean";
251pub(crate) const XSD_DECIMAL: &str = "http://www.w3.org/2001/XMLSchema#decimal";
252pub(crate) const XSD_DOUBLE: &str = "http://www.w3.org/2001/XMLSchema#double";
253pub(crate) const XSD_INTEGER: &str = "http://www.w3.org/2001/XMLSchema#integer";
254
255fn parse_statement<E: From<TurtleError>>(
256    parser: &mut TurtleParser<impl BufRead>,
257    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
258) -> Result<(), E> {
259    skip_whitespace(&mut parser.read)?;
260
261    if parser.read.current().is_none() {
262        Ok(())
263    } else if parser.read.starts_with(b"@prefix") {
264        parse_prefix_id(
265            &mut parser.read,
266            &mut parser.prefixes,
267            &parser.base_iri,
268            &mut parser.temp_buf,
269        )
270        .map_err(E::from)
271    } else if parser.read.starts_with(b"@base") {
272        parser.base_iri = Some(parse_base(
273            &mut parser.read,
274            &mut parser.temp_buf,
275            &parser.base_iri,
276        )?);
277        Ok(())
278    } else if parser.read.starts_with_ignore_ascii_case(b"BASE")
279        && parser
280            .read
281            .ahead(4)?
282            .map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
283    {
284        parser.base_iri = Some(parse_sparql_base(
285            &mut parser.read,
286            &mut parser.temp_buf,
287            &parser.base_iri,
288        )?);
289        Ok(())
290    } else if parser.read.starts_with_ignore_ascii_case(b"PREFIX")
291        && parser
292            .read
293            .ahead(6)?
294            .map_or(true, |c| c.is_ascii_whitespace())
295    {
296        parse_sparql_prefix(
297            &mut parser.read,
298            &mut parser.prefixes,
299            &parser.base_iri,
300            &mut parser.temp_buf,
301        )
302        .map_err(E::from)
303    } else {
304        parse_triples(parser, on_triple)?;
305        debug_assert_eq!(parser.triple_alloc.complete_len(), 0);
306        debug_assert_eq!(parser.triple_alloc.incomplete_len(), 1);
307
308        parser.read.check_is_current(b'.')?;
309        parser.read.consume()?;
310        Ok(())
311    }
312}
313
314fn parse_block_or_directive<E: From<TurtleError>>(
315    parser: &mut TriGParser<impl BufRead>,
316    on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
317) -> Result<(), E> {
318    // [1g] 	trigDoc 	::= 	(directive | block)*
319    // [2g] 	block 	::= 	triplesOrGraph | wrappedGraph | triples2 | "GRAPH" labelOrSubject wrappedGraph
320    skip_whitespace(&mut parser.inner.read)?;
321
322    if parser.inner.read.current().is_none() {
323        Ok(())
324    } else if parser.inner.read.starts_with(b"@prefix") {
325        parse_prefix_id(
326            &mut parser.inner.read,
327            &mut parser.inner.prefixes,
328            &parser.inner.base_iri,
329            &mut parser.inner.temp_buf,
330        )?;
331        Ok(())
332    } else if parser.inner.read.starts_with(b"@base") {
333        parser.inner.base_iri = Some(parse_base(
334            &mut parser.inner.read,
335            &mut parser.inner.temp_buf,
336            &parser.inner.base_iri,
337        )?);
338        Ok(())
339    } else if parser.inner.read.starts_with_ignore_ascii_case(b"BASE")
340        && parser
341            .inner
342            .read
343            .ahead(4)?
344            .map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
345    {
346        parser.inner.base_iri = Some(parse_sparql_base(
347            &mut parser.inner.read,
348            &mut parser.inner.temp_buf,
349            &parser.inner.base_iri,
350        )?);
351        Ok(())
352    } else if parser.inner.read.starts_with_ignore_ascii_case(b"PREFIX")
353        && parser
354            .inner
355            .read
356            .ahead(6)?
357            .map_or(true, |c| c.is_ascii_whitespace())
358    {
359        parse_sparql_prefix(
360            &mut parser.inner.read,
361            &mut parser.inner.prefixes,
362            &parser.inner.base_iri,
363            &mut parser.inner.temp_buf,
364        )?;
365        Ok(())
366    } else if parser.inner.read.starts_with_ignore_ascii_case(b"GRAPH")
367        && parser
368            .inner
369            .read
370            .ahead(5)?
371            .map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
372    {
373        parser.inner.read.consume_many("GRAPH".len())?;
374        skip_whitespace(&mut parser.inner.read)?;
375
376        let graph_name = parse_label_or_subject(&mut parser.graph_name_buf, &mut parser.inner)?;
377        skip_whitespace(&mut parser.inner.read)?;
378
379        parse_wrapped_graph(
380            &mut parser.inner,
381            &mut on_triple_in_graph(on_quad, Some(graph_name)),
382        )?;
383        parser.graph_name_buf.clear();
384        Ok(())
385    } else if parser.inner.read.current() == Some(b'{') {
386        parse_wrapped_graph(&mut parser.inner, &mut on_triple_in_graph(on_quad, None))
387    } else if parser.inner.read.current() == Some(b'[')
388        && !is_followed_by_space_and_closing_bracket(&mut parser.inner.read)?
389        || parser.inner.read.current() == Some(b'(')
390    {
391        parse_triples2(&mut parser.inner, &mut on_triple_in_graph(on_quad, None))
392    } else {
393        parse_triples_or_graph(parser, on_quad)
394    }
395}
396
397fn parse_triples_or_graph<E: From<TurtleError>>(
398    parser: &mut TriGParser<impl BufRead>,
399    on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
400) -> Result<(), E> {
401    // [3g] 	triplesOrGraph 	::= 	labelOrSubject ( wrappedGraph | predicateObjectList '.' ) | embTriple predicateObjectList '.'
402
403    if parser.inner.read.starts_with(b"<<") {
404        parse_quoted_triple(&mut parser.inner)?;
405        parser.inner.triple_alloc.push_subject_triple();
406        skip_whitespace(&mut parser.inner.read)?;
407        parse_predicate_object_list(&mut parser.inner, &mut on_triple_in_graph(on_quad, None))?;
408        parser.inner.read.check_is_current(b'.')?;
409        parser.inner.read.consume()?;
410        parser.inner.triple_alloc.pop_subject();
411        return Ok(());
412    }
413
414    let TriGParser {
415        inner,
416        graph_name_buf,
417    } = parser;
418    let graph_name = parse_label_or_subject(graph_name_buf, inner)?;
419    skip_whitespace(&mut inner.read)?;
420
421    if inner.read.current() == Some(b'{') {
422        parse_wrapped_graph(
423            &mut parser.inner,
424            &mut on_triple_in_graph(on_quad, Some(graph_name)),
425        )?;
426    } else {
427        let blank = matches!(graph_name, GraphName::BlankNode(_));
428        inner.triple_alloc.try_push_subject(|b| {
429            b.push_str(graph_name_buf);
430            if blank {
431                Ok(Subject::BlankNode(BlankNode { id: b }))
432            } else {
433                Ok(Subject::NamedNode(NamedNode { iri: b }))
434            }
435        })?;
436        parse_predicate_object_list(&mut parser.inner, &mut on_triple_in_graph(on_quad, None))?;
437
438        parser.inner.read.check_is_current(b'.')?;
439        parser.inner.read.consume()?;
440        parser.inner.triple_alloc.pop_subject();
441        debug_assert_eq!(parser.inner.triple_alloc.complete_len(), 0);
442        debug_assert_eq!(parser.inner.triple_alloc.incomplete_len(), 1);
443    }
444    parser.graph_name_buf.clear();
445    Ok(())
446}
447
448fn parse_triples2<E: From<TurtleError>>(
449    parser: &mut TurtleParser<impl BufRead>,
450    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
451) -> Result<(), E> {
452    // [4g] 	triples2 	::= 	blankNodePropertyList predicateObjectList? '.' | collection predicateObjectList '.'
453    match parser.read.current() {
454        Some(b'[') if !is_followed_by_space_and_closing_bracket(&mut parser.read)? => {
455            let id = parse_blank_node_property_list(parser, on_triple)?;
456            parser.triple_alloc.try_push_subject(|b| {
457                b.push_str(id.as_ref());
458                Ok(Subject::from(BlankNode { id: b }))
459            })?;
460            skip_whitespace(&mut parser.read)?;
461            if parser.read.current() != Some(b'.') {
462                parse_predicate_object_list(parser, on_triple)?;
463            }
464        }
465        _ => {
466            let collec = parse_collection(parser, on_triple)?;
467            parser
468                .triple_alloc
469                .try_push_subject(|b| allocate_collection(collec, b))?;
470            skip_whitespace(&mut parser.read)?;
471            parse_predicate_object_list(parser, on_triple)?;
472        }
473    }
474
475    parser.triple_alloc.pop_subject();
476    debug_assert_eq!(parser.triple_alloc.complete_len(), 0);
477    debug_assert_eq!(parser.triple_alloc.incomplete_len(), 1);
478
479    parser.read.check_is_current(b'.')?;
480    parser.read.consume()?;
481    Ok(())
482}
483
484fn parse_wrapped_graph<E: From<TurtleError>>(
485    parser: &mut TurtleParser<impl BufRead>,
486    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
487) -> Result<(), E> {
488    // [5g] 	wrappedGraph 	::= 	'{' triplesBlock? '}'
489    // [6g] 	triplesBlock 	::= 	triples ('.' triplesBlock?)?
490    parser.read.check_is_current(b'{')?;
491    parser.read.consume()?;
492    skip_whitespace(&mut parser.read)?;
493
494    loop {
495        if parser.read.current() == Some(b'}') {
496            parser.read.consume()?;
497            break;
498        }
499
500        parse_triples(parser, on_triple)?;
501        debug_assert_eq!(parser.triple_alloc.complete_len(), 0);
502        debug_assert_eq!(parser.triple_alloc.incomplete_len(), 1);
503        match parser.read.current() {
504            Some(b'.') => {
505                parser.read.consume()?;
506                skip_whitespace(&mut parser.read)?;
507            }
508            Some(b'}') => {
509                parser.read.consume()?;
510                break;
511            }
512            _ => parser.read.unexpected_char_error()?,
513        }
514    }
515    Ok(())
516}
517
518fn parse_label_or_subject<'a>(
519    buffer: &'a mut String,
520    parser: &mut TurtleParser<impl BufRead>,
521) -> Result<GraphName<'a>, TurtleError> {
522    //[7g] 	labelOrSubject 	::= 	iri | BlankNode
523    // (split in two for the case of TriG*)
524
525    let TurtleParser {
526        read,
527        base_iri,
528        prefixes,
529        bnode_id_generator,
530        temp_buf,
531        ..
532    } = parser;
533    Ok(match read.current() {
534        Some(b'_') | Some(b'[') => parse_blank_node(read, buffer, bnode_id_generator)?.into(),
535        _ => parse_iri(read, buffer, temp_buf, base_iri, prefixes)?.into(),
536    })
537}
538
539fn parse_prefix_id(
540    read: &mut LookAheadByteReader<impl BufRead>,
541    prefixes: &mut HashMap<String, String>,
542    base_iri: &Option<Iri<String>>,
543    temp_buffer: &mut String,
544) -> Result<(), TurtleError> {
545    // [4] 	prefixID 	::= 	'@prefix' PNAME_NS IRIREF '.'
546    read.consume_many("@prefix".len())?;
547    skip_whitespace(read)?;
548
549    let mut prefix = String::default();
550    parse_pname_ns(read, &mut prefix)?;
551    skip_whitespace(read)?;
552
553    let mut value = String::default();
554    parse_iriref_relative(read, &mut value, temp_buffer, base_iri)?;
555    skip_whitespace(read)?;
556
557    read.check_is_current(b'.')?;
558    read.consume()?;
559
560    prefixes.insert(prefix, value);
561    Ok(())
562}
563
564pub(crate) fn parse_base(
565    read: &mut LookAheadByteReader<impl BufRead>,
566    buffer: &mut String,
567    base_iri: &Option<Iri<String>>,
568) -> Result<Iri<String>, TurtleError> {
569    // [5] 	base 	::= 	'@base' IRIREF '.'
570    read.consume_many("@base".len())?;
571    skip_whitespace(read)?;
572
573    let result = parse_base_iriref(read, buffer, base_iri)?;
574    skip_whitespace(read)?;
575
576    read.check_is_current(b'.')?;
577    read.consume()?;
578
579    Ok(result)
580}
581
582pub(crate) fn parse_sparql_base(
583    read: &mut LookAheadByteReader<impl BufRead>,
584    buffer: &mut String,
585    base_iri: &Option<Iri<String>>,
586) -> Result<Iri<String>, TurtleError> {
587    // [5s] 	sparqlBase 	::= 	"BASE" IRIREF
588    read.consume_many("BASE".len())?;
589    skip_whitespace(read)?;
590
591    parse_base_iriref(read, buffer, base_iri)
592}
593
594fn parse_base_iriref(
595    read: &mut LookAheadByteReader<impl BufRead>,
596    temp_buffer: &mut String,
597    base_iri: &Option<Iri<String>>,
598) -> Result<Iri<String>, TurtleError> {
599    //TODO: avoid double parsing
600    let mut buffer = String::default();
601    parse_iriref_relative(read, &mut buffer, temp_buffer, base_iri)?;
602    let result = Iri::parse(buffer.clone())
603        .map_err(|error| read.parse_error(TurtleErrorKind::InvalidIri { iri: buffer, error }))?;
604    temp_buffer.clear();
605    Ok(result)
606}
607
608fn parse_sparql_prefix(
609    read: &mut LookAheadByteReader<impl BufRead>,
610    prefixes: &mut HashMap<String, String>,
611    base_iri: &Option<Iri<String>>,
612    temp_buffer: &mut String,
613) -> Result<(), TurtleError> {
614    // [6s] 	sparqlPrefix 	::= 	"PREFIX" PNAME_NS IRIREF
615    read.consume_many("PREFIX".len())?;
616    skip_whitespace(read)?;
617
618    let mut prefix = String::default();
619    parse_pname_ns(read, &mut prefix)?;
620    skip_whitespace(read)?;
621
622    let mut value = String::default();
623    parse_iriref_relative(read, &mut value, temp_buffer, base_iri)?;
624    skip_whitespace(read)?;
625
626    prefixes.insert(prefix, value);
627    Ok(())
628}
629
630fn parse_triples<E: From<TurtleError>>(
631    parser: &mut TurtleParser<impl BufRead>,
632    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
633) -> Result<(), E> {
634    // [6] 	triples 	::= 	subject predicateObjectList | blankNodePropertyList predicateObjectList?
635    match parser.read.current() {
636        Some(b'[') if !is_followed_by_space_and_closing_bracket(&mut parser.read)? => {
637            let id = parse_blank_node_property_list(parser, on_triple)?;
638            parser.triple_alloc.try_push_subject(|b| {
639                b.push_str(id.as_ref());
640                Ok(Subject::from(BlankNode { id: b }))
641            })?;
642            skip_whitespace(&mut parser.read)?;
643            if parser.read.current() != Some(b'.') && parser.read.current() != Some(b'}') {
644                parse_predicate_object_list(parser, on_triple)?;
645            }
646        }
647        _ => {
648            parse_subject(parser, on_triple)?;
649            skip_whitespace(&mut parser.read)?;
650            parse_predicate_object_list(parser, on_triple)?;
651        }
652    }
653
654    parser.triple_alloc.pop_subject();
655    Ok(())
656}
657
658fn parse_predicate_object_list<E: From<TurtleError>>(
659    parser: &mut TurtleParser<impl BufRead>,
660    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
661) -> Result<(), E> {
662    // [7] 	predicateObjectList 	::= 	verb objectList (';' (verb objectList)?)*
663    loop {
664        parse_verb(parser)?;
665        skip_whitespace(&mut parser.read)?;
666
667        parse_object_list(parser, on_triple)?;
668        skip_whitespace(&mut parser.read)?;
669
670        parser.triple_alloc.pop_predicate();
671        if parser.read.current() != Some(b';') {
672            return Ok(());
673        }
674        while parser.read.current() == Some(b';') {
675            parser.read.consume()?;
676            skip_whitespace(&mut parser.read)?;
677        }
678        match parser.read.current() {
679            Some(b'.') | Some(b']') | Some(b'}') | None => return Ok(()),
680            Some(b'|') => return Ok(()),
681            _ => (), //continue
682        }
683    }
684}
685
686fn parse_object_list<E: From<TurtleError>>(
687    parser: &mut TurtleParser<impl BufRead>,
688    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
689) -> Result<(), E> {
690    // [8] 	objectList 	::= 	object (',' object)*
691    // or, for RDF-star
692    // [8] 	objectList 	::= 	object annotation? ( ',' object annotation? )*
693    // [30t] 	annotation 	::= 	'{|' predicateObjectList '|}'
694    loop {
695        parse_object(parser, on_triple)?;
696        skip_whitespace(&mut parser.read)?;
697
698        if parser.read.current() == Some(b'{') {
699            parser.read.check_is_next(b'|')?;
700            parser.read.consume_many(2)?;
701            skip_whitespace(&mut parser.read)?;
702
703            parser.triple_alloc.push_triple_start();
704            parser.triple_alloc.push_subject_triple();
705            parse_predicate_object_list(parser, on_triple)?;
706
707            parser.read.check_is_current(b'|')?;
708            parser.read.check_is_next(b'}')?;
709            parser.read.consume_many(2)?;
710            skip_whitespace(&mut parser.read)?;
711            parser.triple_alloc.pop_annotation_triple();
712        }
713
714        parser.triple_alloc.pop_object();
715        if parser.read.current() != Some(b',') {
716            return Ok(());
717        }
718        parser.read.consume()?;
719        skip_whitespace(&mut parser.read)?;
720    }
721}
722
723fn parse_verb(parser: &mut TurtleParser<impl BufRead>) -> Result<(), TurtleError> {
724    // [9] 	verb 	::= 	predicate | 'a'
725    if parser.read.current() == Some(b'a') {
726        match parser.read.next()? {
727            // We check that it is not a prefixed URI
728            Some(c) if is_possible_pn_chars_ascii(c) || c == b'.' || c == b':' || c > MAX_ASCII => {
729                parse_predicate(parser)
730            }
731            _ => {
732                parser.read.consume()?;
733                parser
734                    .triple_alloc
735                    .try_push_predicate(|_| Ok(NamedNode { iri: RDF_TYPE }))
736            }
737        }
738    } else {
739        parse_predicate(parser)
740    }
741}
742
743fn parse_subject<E: From<TurtleError>>(
744    parser: &mut TurtleParser<impl BufRead>,
745    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
746) -> Result<(), E> {
747    //[10] 	subject 	::= 	iri | BlankNode | collection
748    match parser.read.current() {
749        Some(b'_') | Some(b'[') => {
750            let TurtleParser {
751                read,
752                bnode_id_generator,
753                triple_alloc,
754                ..
755            } = parser;
756            triple_alloc.try_push_subject(|b| {
757                parse_blank_node(read, b, bnode_id_generator).map(Subject::from)
758            })?;
759        }
760        Some(b'(') => {
761            let collec = parse_collection(parser, on_triple)?;
762            parser
763                .triple_alloc
764                .try_push_subject(|b| allocate_collection(collec, b))?;
765        }
766        _ => {
767            if parser.read.required_current()? == b'<' && parser.read.required_next()? == b'<' {
768                parse_quoted_triple(parser)?;
769                parser.triple_alloc.push_subject_triple();
770            } else {
771                let TurtleParser {
772                    read,
773                    base_iri,
774                    prefixes,
775                    triple_alloc,
776                    temp_buf,
777                    ..
778                } = parser;
779                triple_alloc.try_push_subject(|b| {
780                    parse_iri(read, b, temp_buf, base_iri, prefixes).map(Subject::from)
781                })?;
782            }
783        }
784    };
785    Ok(())
786}
787
788fn parse_predicate(parser: &mut TurtleParser<impl BufRead>) -> Result<(), TurtleError> {
789    //[11] 	predicate 	::= 	iri
790    let TurtleParser {
791        read,
792        base_iri,
793        prefixes,
794        triple_alloc,
795        temp_buf,
796        ..
797    } = parser;
798    triple_alloc.try_push_predicate(|b| parse_iri(read, b, temp_buf, base_iri, prefixes))
799}
800
801fn parse_object<E: From<TurtleError>>(
802    parser: &mut TurtleParser<impl BufRead>,
803    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
804) -> Result<(), E> {
805    //[12] 	object 	::= 	iri | BlankNode | collection | blankNodePropertyList | literal
806
807    match parser.read.required_current()? {
808        b'<' => {
809            if parser.read.required_next()? == b'<' {
810                parse_quoted_triple(parser)?;
811                parser.triple_alloc.push_object_triple();
812            } else {
813                let TurtleParser {
814                    read,
815                    base_iri,
816                    triple_alloc,
817                    temp_buf,
818                    ..
819                } = parser;
820                triple_alloc.try_push_object(|b, _| {
821                    parse_iriref_relative(read, b, temp_buf, base_iri).map(Term::from)
822                })?;
823            }
824        }
825        b'(' => {
826            let collec = parse_collection(parser, on_triple)?;
827            parser
828                .triple_alloc
829                .try_push_object(|b, _| allocate_collection(collec, b).map(Term::from))?;
830        }
831        b'[' if !is_followed_by_space_and_closing_bracket(&mut parser.read)? => {
832            let id = parse_blank_node_property_list(parser, on_triple)?;
833            parser.triple_alloc.try_push_object(|b, _| {
834                b.push_str(id.as_ref());
835                Ok(Term::from(BlankNode { id: b }))
836            })?;
837        }
838        b'_' | b'[' => {
839            let TurtleParser {
840                read,
841                bnode_id_generator,
842                triple_alloc,
843                ..
844            } = parser;
845            triple_alloc.try_push_object(|b, _| {
846                parse_blank_node(read, b, bnode_id_generator).map(Term::from)
847            })?;
848        }
849        b'"' | b'\'' => {
850            let TurtleParser {
851                read,
852                base_iri,
853                prefixes,
854                triple_alloc,
855                temp_buf,
856                ..
857            } = parser;
858            triple_alloc.try_push_object(|b1, b2| {
859                parse_rdf_literal(read, b1, b2, temp_buf, base_iri, prefixes).map(Term::from)
860            })?;
861        }
862        b'+' | b'-' | b'.' | b'0'..=b'9' => {
863            let TurtleParser {
864                read, triple_alloc, ..
865            } = parser;
866            triple_alloc.try_push_object(|b, _| parse_numeric_literal(read, b).map(Term::from))?;
867        }
868        _ => {
869            let TurtleParser {
870                read, triple_alloc, ..
871            } = parser;
872            if read.starts_with(b"true")
873                && read.ahead(4)?.map_or(true, |c| {
874                    c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
875                })
876                || read.starts_with(b"false")
877                    && read.ahead(5)?.map_or(true, |c| {
878                        c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
879                    })
880            {
881                triple_alloc
882                    .try_push_object(|b, _| parse_boolean_literal(read, b).map(Term::from))?;
883            } else {
884                let TurtleParser {
885                    read,
886                    prefixes,
887                    triple_alloc,
888                    ..
889                } = parser;
890                triple_alloc.try_push_object(|b, _| {
891                    parse_prefixed_name(read, b, prefixes).map(Term::from)
892                })?;
893            }
894        }
895    };
896    on_triple(*parser.triple_alloc.top())
897}
898
899fn parse_blank_node_property_list<E: From<TurtleError>>(
900    parser: &mut TurtleParser<impl BufRead>,
901    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
902) -> Result<BlankNodeId, E> {
903    // [14] 	blankNodePropertyList 	::= 	'[' predicateObjectList ']'
904    parser.read.increment_stack_size()?;
905    parser.read.check_is_current(b'[')?;
906    parser.read.consume()?;
907    skip_whitespace(&mut parser.read)?;
908
909    let id = parser.bnode_id_generator.generate();
910    parser.triple_alloc.push_triple_start();
911    parser.triple_alloc.try_push_subject(|b| {
912        b.push_str(id.as_ref());
913        Ok(Subject::from(BlankNode { id: b }))
914    })?;
915
916    loop {
917        parse_predicate_object_list(parser, on_triple)?;
918        skip_whitespace(&mut parser.read)?;
919
920        if parser.read.current() == Some(b']') {
921            parser.read.consume()?;
922            break;
923        }
924    }
925
926    parser.triple_alloc.pop_subject();
927    parser.triple_alloc.pop_top_empty_triple();
928    parser.read.decrement_stack_size();
929    Ok(id)
930}
931
932fn parse_collection<E: From<TurtleError>>(
933    parser: &mut TurtleParser<impl BufRead>,
934    on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
935) -> Result<Option<BlankNodeId>, E> {
936    // [15] 	collection 	::= 	'(' object* ')'
937    parser.read.increment_stack_size()?;
938    parser.read.check_is_current(b'(')?;
939    parser.read.consume()?;
940    let mut root: Option<BlankNodeId> = None;
941    loop {
942        skip_whitespace(&mut parser.read)?;
943
944        if parser.read.current().is_none() {
945            return Ok(parser.read.unexpected_char_error()?);
946        } else if parser.read.current() != Some(b')') {
947            let new = parser.bnode_id_generator.generate();
948            if root.is_none() {
949                root = Some(new);
950                parser.triple_alloc.push_triple_start();
951            } else {
952                parser
953                    .triple_alloc
954                    .try_push_predicate(|_| Ok(NamedNode { iri: RDF_REST }))?;
955                parser.triple_alloc.try_push_object(|b, _| {
956                    b.push_str(new.as_ref());
957                    Ok(Term::from(BlankNode { id: b }))
958                })?;
959                on_triple(*parser.triple_alloc.top())?;
960                parser.triple_alloc.pop_object();
961                parser.triple_alloc.pop_predicate();
962                parser.triple_alloc.pop_subject();
963            }
964
965            parser.triple_alloc.try_push_subject(|b| {
966                b.push_str(new.as_ref());
967                Ok(Subject::from(BlankNode { id: b }))
968            })?;
969            parser
970                .triple_alloc
971                .try_push_predicate(|_| Ok(NamedNode { iri: RDF_FIRST }))?;
972            parse_object(parser, on_triple)?;
973            parser.triple_alloc.pop_object();
974            parser.triple_alloc.pop_predicate();
975        } else {
976            // trailing ')'
977            parser.read.consume()?;
978            if root.is_some() {
979                parser
980                    .triple_alloc
981                    .try_push_predicate(|_| Ok(NamedNode { iri: RDF_REST }))?;
982                parser
983                    .triple_alloc
984                    .try_push_object(|_, _| Ok(Term::from(NamedNode { iri: RDF_NIL })))?;
985                on_triple(*parser.triple_alloc.top())?;
986                parser.triple_alloc.pop_top_triple();
987            }
988            parser.read.decrement_stack_size();
989            return Ok(root);
990        }
991    }
992}
993
994#[allow(clippy::unnecessary_wraps)]
995fn allocate_collection(
996    collection: Option<BlankNodeId>,
997    buffer: &mut String,
998) -> Result<Subject<'_>, TurtleError> {
999    match collection {
1000        Some(id) => {
1001            buffer.push_str(id.as_ref());
1002            Ok(BlankNode { id: buffer }.into())
1003        }
1004        None => Ok(NamedNode { iri: RDF_NIL }.into()),
1005    }
1006}
1007
1008pub(crate) fn parse_numeric_literal<'a>(
1009    read: &mut LookAheadByteReader<impl BufRead>,
1010    buffer: &'a mut String,
1011) -> Result<Literal<'a>, TurtleError> {
1012    // [16] 	NumericLiteral 	::= 	INTEGER | DECIMAL | DOUBLE
1013    // [19] 	INTEGER 	::= 	[+-]? [0-9]+
1014    // [20] 	DECIMAL 	::= 	[+-]? [0-9]* '.' [0-9]+
1015    // [21] 	DOUBLE 	::= 	[+-]? ([0-9]+ '.' [0-9]* EXPONENT | '.' [0-9]+ EXPONENT | [0-9]+ EXPONENT)
1016    // merged [+-] [0-9]* ('.' [0-9]*)? EXPONENT?
1017    let c = read.required_current()?;
1018    match c {
1019        b'+' | b'-' => {
1020            buffer.push(char::from(c));
1021            read.consume()?
1022        }
1023        _ => (),
1024    }
1025
1026    // We read the digits before .
1027    let mut count_before: usize = 0;
1028    while let Some(c) = read.current() {
1029        match c {
1030            b'0'..=b'9' => {
1031                buffer.push(char::from(c));
1032                read.consume()?;
1033                count_before += 1;
1034            }
1035            _ => break,
1036        }
1037    }
1038
1039    // We read the digits after .
1040    let count_after = if read.current() == Some(b'.') {
1041        //We check if it is not the end of a statement
1042
1043        let stop = match read.next()? {
1044            Some(c) => !matches!(c, b'0'..=b'9' | b'e' | b'E'),
1045            None => true,
1046        };
1047        if stop {
1048            return if count_before > 0 {
1049                Ok(Literal::Typed {
1050                    value: buffer,
1051                    datatype: NamedNode { iri: XSD_INTEGER },
1052                })
1053            } else {
1054                read.unexpected_char_error()
1055            };
1056        }
1057
1058        buffer.push('.');
1059        let mut count_after = 0;
1060
1061        read.consume()?;
1062        while let Some(c) = read.current() {
1063            match c {
1064                b'0'..=b'9' => {
1065                    buffer.push(char::from(c));
1066                    read.consume()?;
1067                    count_after += 1;
1068                }
1069                _ => break,
1070            }
1071        }
1072        Some(count_after)
1073    } else {
1074        None
1075    };
1076
1077    // End
1078    let datatype = match read.current() {
1079        Some(b'e') | Some(b'E') => {
1080            if count_before > 0 || count_after.unwrap_or(0) > 0 {
1081                parse_exponent(read, buffer)?;
1082                XSD_DOUBLE
1083            } else {
1084                return read.unexpected_char_error();
1085            }
1086        }
1087        _ => {
1088            if count_after.is_none() && count_before > 0 {
1089                XSD_INTEGER
1090            } else if count_after.is_some() && count_after != Some(0) {
1091                XSD_DECIMAL
1092            } else {
1093                return read.unexpected_char_error();
1094            }
1095        }
1096    };
1097    Ok(Literal::Typed {
1098        value: buffer,
1099        datatype: NamedNode { iri: datatype },
1100    })
1101}
1102
1103#[allow(clippy::ptr_arg)]
1104pub(crate) fn parse_rdf_literal<'a>(
1105    read: &mut LookAheadByteReader<impl BufRead>,
1106    buffer: &'a mut String,
1107    annotation_buffer: &'a mut String,
1108    temp_buffer: &mut String,
1109    base_iri: &Option<Iri<String>>,
1110    prefixes: &HashMap<String, String>,
1111) -> Result<Literal<'a>, TurtleError> {
1112    // [128s] 	RDFLiteral 	::= 	String (LANGTAG | '^^' iri)?
1113    parse_string(read, buffer)?;
1114    skip_whitespace(read)?;
1115
1116    match read.current() {
1117        Some(b'@') => {
1118            parse_langtag(read, annotation_buffer)?;
1119            Ok(Literal::LanguageTaggedString {
1120                value: buffer,
1121                language: annotation_buffer,
1122            })
1123        }
1124        Some(b'^') => {
1125            read.consume()?;
1126            read.check_is_current(b'^')?;
1127            read.consume()?;
1128            skip_whitespace(read)?;
1129            parse_iri(read, annotation_buffer, temp_buffer, base_iri, prefixes)?;
1130            Ok(Literal::Typed {
1131                value: buffer,
1132                datatype: NamedNode {
1133                    iri: annotation_buffer,
1134                },
1135            })
1136        }
1137        _ => Ok(Literal::Simple { value: buffer }),
1138    }
1139}
1140
1141pub(crate) fn parse_boolean_literal<'a>(
1142    read: &mut LookAheadByteReader<impl BufRead>,
1143    buffer: &'a mut String,
1144) -> Result<Literal<'a>, TurtleError> {
1145    if read.starts_with(b"true") {
1146        read.consume_many("true".len())?;
1147        buffer.push_str("true");
1148    } else if read.starts_with(b"false") {
1149        read.consume_many("false".len())?;
1150        buffer.push_str("false");
1151    } else {
1152        return read.unexpected_char_error();
1153    }
1154    Ok(Literal::Typed {
1155        value: buffer,
1156        datatype: NamedNode { iri: XSD_BOOLEAN },
1157    })
1158}
1159
1160fn parse_string(
1161    read: &mut LookAheadByteReader<impl BufRead>,
1162    buffer: &mut String,
1163) -> Result<(), TurtleError> {
1164    match read.current() {
1165        Some(b'"') => {
1166            if read.starts_with(b"\"\"\"") {
1167                parse_string_literal_long_quote(read, buffer)
1168            } else {
1169                parse_string_literal_quote(read, buffer)
1170            }
1171        }
1172        Some(b'\'') => {
1173            if read.starts_with(b"'''") {
1174                parse_string_literal_long_single_quote(read, buffer)
1175            } else {
1176                parse_string_literal_single_quote(read, buffer)
1177            }
1178        }
1179        _ => read.unexpected_char_error(),
1180    }
1181}
1182
1183pub(crate) fn parse_iri<'a>(
1184    read: &mut LookAheadByteReader<impl BufRead>,
1185    buffer: &'a mut String,
1186    temp_buffer: &mut String,
1187    base_iri: &Option<Iri<String>>,
1188    prefixes: &HashMap<String, String>,
1189) -> Result<NamedNode<'a>, TurtleError> {
1190    // [135s] 	iri 	::= 	IRIREF | PrefixedName
1191    if read.current() == Some(b'<') {
1192        parse_iriref_relative(read, buffer, temp_buffer, base_iri)
1193    } else {
1194        parse_prefixed_name(read, buffer, prefixes)
1195    }
1196}
1197
1198pub(crate) fn parse_prefixed_name<'a>(
1199    read: &mut LookAheadByteReader<impl BufRead>,
1200    buffer: &'a mut String,
1201    prefixes: &HashMap<String, String>,
1202) -> Result<NamedNode<'a>, TurtleError> {
1203    // [136s] 	PrefixedName 	::= 	PNAME_LN | PNAME_NS
1204    // It could be written: PNAME_NS PN_LOCAL?
1205
1206    // PNAME_NS
1207    parse_pname_ns(read, buffer)?;
1208    if let Some(value) = prefixes.get(buffer.as_str()) {
1209        buffer.clear();
1210        buffer.push_str(value);
1211    } else {
1212        return Err(read.parse_error(TurtleErrorKind::UnknownPrefix(buffer.clone())));
1213    }
1214
1215    // [168s] 	PN_LOCAL 	::= 	(PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
1216    if let Some(c) = read.current() {
1217        match c {
1218            b'\\' => parse_pn_local_esc(read, buffer)?,
1219            b'%' => parse_percent(read, buffer)?,
1220            b':' | b'0'..=b'9' => buffer.push(char::from(c)),
1221            c if is_possible_pn_chars_u_ascii(c) => buffer.push(char::from(c)),
1222            _ => {
1223                let c = read_utf8_char(read)?;
1224                if is_possible_pn_chars_u_unicode(c) {
1225                    buffer.push(c)
1226                } else {
1227                    return Ok(NamedNode { iri: buffer });
1228                }
1229            }
1230        }
1231    } else {
1232        return Ok(NamedNode { iri: buffer });
1233    }
1234
1235    loop {
1236        read.consume()?;
1237        match read.current() {
1238            Some(b'.') => {
1239                if has_future_char_valid_pname_local(read)? {
1240                    buffer.push('.')
1241                } else {
1242                    break;
1243                }
1244            }
1245            Some(b'\\') => parse_pn_local_esc(read, buffer)?,
1246            Some(b'%') => parse_percent(read, buffer)?,
1247            Some(b':') => buffer.push(':'),
1248            Some(c) if is_possible_pn_chars_ascii(c) => buffer.push(char::from(c)),
1249            _ => {
1250                let c = read_utf8_char(read)?;
1251                if is_possible_pn_chars_unicode(c) {
1252                    buffer.push(c)
1253                } else {
1254                    break;
1255                }
1256            }
1257        }
1258    }
1259    Ok(NamedNode { iri: buffer })
1260}
1261
1262fn has_future_char_valid_pname_local(
1263    read: &mut LookAheadByteReader<impl BufRead>,
1264) -> Result<bool, TurtleError> {
1265    let mut i = 1;
1266    loop {
1267        match read.ahead(i)? {
1268            Some(b':') | Some(b'%') | Some(b'\\') => return Ok(true),
1269            Some(c) if c > MAX_ASCII || is_possible_pn_chars_ascii(c) => return Ok(true),
1270            Some(b'.') => (),
1271            _ => return Ok(false),
1272        }
1273        i += 1;
1274    }
1275}
1276
1277pub(crate) fn parse_blank_node<'a>(
1278    read: &mut LookAheadByteReader<impl BufRead>,
1279    buffer: &'a mut String,
1280    bnode_id_generator: &mut BlankNodeIdGenerator,
1281) -> Result<BlankNode<'a>, TurtleError> {
1282    // [137s] 	BlankNode 	::= 	BLANK_NODE_LABEL | ANON
1283    match read.current() {
1284        Some(b'_') => {
1285            parse_blank_node_label(read, buffer)?;
1286            bnode_id_generator.disambiguate(buffer);
1287        }
1288        Some(b'[') => {
1289            parse_anon(read, buffer, bnode_id_generator)?;
1290        }
1291        _ => read.unexpected_char_error()?,
1292    }
1293    Ok(BlankNode { id: buffer })
1294}
1295
1296pub(crate) fn parse_pname_ns(
1297    read: &mut LookAheadByteReader<impl BufRead>,
1298    buffer: &mut String,
1299) -> Result<(), TurtleError> {
1300    // [139s] 	PNAME_NS 	::= 	PN_PREFIX? ':'
1301    parse_pn_prefix(read, buffer)?;
1302    if read.current() == Some(b':') {
1303        read.consume()?;
1304        Ok(())
1305    } else {
1306        read.unexpected_char_error()
1307    }
1308}
1309
1310fn parse_exponent(
1311    read: &mut LookAheadByteReader<impl BufRead>,
1312    buffer: &mut String,
1313) -> Result<(), TurtleError> {
1314    // [154s] 	EXPONENT 	::= 	[eE] [+-]? [0-9]+
1315    let c = read.required_current()?;
1316    match c {
1317        b'e' | b'E' => buffer.push(char::from(c)),
1318        _ => read.unexpected_char_error()?,
1319    };
1320    read.consume()?;
1321
1322    if let Some(c) = read.current() {
1323        match c {
1324            b'+' | b'-' => {
1325                buffer.push(char::from(c));
1326                read.consume()?
1327            }
1328            _ => (),
1329        }
1330    }
1331
1332    match read.required_current()? {
1333        c @ b'0'..=b'9' => buffer.push(char::from(c)),
1334        _ => read.unexpected_char_error()?,
1335    }
1336
1337    loop {
1338        read.consume()?;
1339        if let Some(c) = read.current() {
1340            match c {
1341                b'0'..=b'9' => buffer.push(char::from(c)),
1342                _ => return Ok(()),
1343            }
1344        } else {
1345            return Ok(());
1346        }
1347    }
1348}
1349
1350fn parse_string_literal_single_quote(
1351    read: &mut LookAheadByteReader<impl BufRead>,
1352    buffer: &mut String,
1353) -> Result<(), TurtleError> {
1354    // [23] 	STRING_LITERAL_SINGLE_QUOTE 	::= 	"'" ([^#x27#x5C#xA#xD] | ECHAR | UCHAR)* "'" /* #x27=' #x5C=\ #xA=new line #xD=carriage return */
1355    parse_string_literal_quote_inner(read, buffer, b'\'')
1356}
1357
1358fn parse_string_literal_long_single_quote(
1359    read: &mut LookAheadByteReader<impl BufRead>,
1360    buffer: &mut String,
1361) -> Result<(), TurtleError> {
1362    // [24] 	STRING_LITERAL_LONG_SINGLE_QUOTE 	::= 	"'''" (("'" | "''")? ([^'\] | ECHAR | UCHAR))* "'''"
1363    parse_string_literal_long_quote_inner(read, buffer, b'\'')
1364}
1365
1366fn parse_string_literal_long_quote(
1367    read: &mut LookAheadByteReader<impl BufRead>,
1368    buffer: &mut String,
1369) -> Result<(), TurtleError> {
1370    // [25] 	STRING_LITERAL_LONG_QUOTE 	::= 	'"""' (('"' | '""')? ([^"\] | ECHAR | UCHAR))* '"""'
1371    parse_string_literal_long_quote_inner(read, buffer, b'"')
1372}
1373
1374fn parse_string_literal_long_quote_inner(
1375    read: &mut LookAheadByteReader<impl BufRead>,
1376    buffer: &mut String,
1377    quote: u8,
1378) -> Result<(), TurtleError> {
1379    let prefix = [quote; 3];
1380    read.consume_many(2)?;
1381    loop {
1382        read.consume()?;
1383        match read.required_current()? {
1384            c if c == quote && read.starts_with(&prefix) => {
1385                read.consume_many(3)?;
1386                return Ok(());
1387            }
1388            b'\\' => parse_echar_or_uchar(read, buffer)?,
1389            c => buffer.push(if c <= 0x7F {
1390                char::from(c) //optimization to avoid UTF-8 decoding
1391            } else {
1392                read_utf8_char(read)?
1393            }),
1394        }
1395    }
1396}
1397
1398fn parse_anon(
1399    read: &mut LookAheadByteReader<impl BufRead>,
1400    buffer: &mut String,
1401    bnode_id_generator: &mut BlankNodeIdGenerator,
1402) -> Result<(), TurtleError> {
1403    read.check_is_current(b'[')?;
1404    read.consume()?;
1405    skip_whitespace(read)?;
1406
1407    read.check_is_current(b']')?;
1408    read.consume()?;
1409
1410    buffer.push_str(bnode_id_generator.generate().as_ref());
1411    Ok(())
1412}
1413
1414fn parse_pn_prefix(
1415    read: &mut LookAheadByteReader<impl BufRead>,
1416    buffer: &mut String,
1417) -> Result<(), TurtleError> {
1418    // [167s] 	PN_PREFIX 	::= 	PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
1419    match read.current() {
1420        Some(c) if c <= MAX_ASCII && is_possible_pn_chars_base_ascii(c) => {
1421            buffer.push(char::from(c))
1422        }
1423        _ => {
1424            let c = read_utf8_char(read)?;
1425            if is_possible_pn_chars_base_unicode(c) {
1426                buffer.push(c)
1427            } else {
1428                return Ok(()); //PN_PREFIX is always optional
1429            }
1430        }
1431    }
1432
1433    loop {
1434        read.consume()?;
1435        match read.current() {
1436            Some(b'.') => match read.next()? {
1437                Some(c) if is_possible_pn_chars_ascii(c) || c > MAX_ASCII => buffer.push('.'),
1438                _ => {
1439                    return Ok(());
1440                }
1441            },
1442            Some(c) if c <= MAX_ASCII && is_possible_pn_chars_ascii(c) => {
1443                buffer.push(char::from(c))
1444            }
1445            _ => {
1446                let c = read_utf8_char(read)?;
1447                if is_possible_pn_chars_unicode(c) {
1448                    buffer.push(c)
1449                } else {
1450                    return Ok(());
1451                }
1452            }
1453        }
1454    }
1455}
1456
1457fn parse_percent(
1458    read: &mut LookAheadByteReader<impl BufRead>,
1459    buffer: &mut String,
1460) -> Result<(), TurtleError> {
1461    // [170s] 	PERCENT 	::= 	'%' HEX HEX
1462    read.check_is_current(b'%')?;
1463    buffer.push('%');
1464    read.consume()?;
1465    parse_hex(read, buffer)?;
1466    read.consume()?;
1467    parse_hex(read, buffer)?;
1468    Ok(())
1469}
1470
1471fn parse_hex(
1472    read: &mut LookAheadByteReader<impl BufRead>,
1473    buffer: &mut String,
1474) -> Result<(), TurtleError> {
1475    // [171s] 	HEX 	::= 	[0-9] | [A-F] | [a-f]
1476    let c = read.required_current()?;
1477    match c {
1478        b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => {
1479            buffer.push(char::from(c));
1480            Ok(())
1481        }
1482        _ => read.unexpected_char_error(),
1483    }
1484}
1485
1486fn parse_pn_local_esc(
1487    read: &mut LookAheadByteReader<impl BufRead>,
1488    buffer: &mut String,
1489) -> Result<(), TurtleError> {
1490    // [172s] 	PN_LOCAL_ESC 	::= 	'\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%')
1491    read.check_is_current(b'\\')?;
1492    read.consume()?;
1493    let c = read.required_current()?;
1494    match c {
1495        b'_' | b'~' | b'.' | b'-' | b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
1496        | b',' | b';' | b'=' | b'/' | b'?' | b'#' | b'@' | b'%' => {
1497            buffer.push(char::from(c));
1498            Ok(())
1499        }
1500        _ => read.unexpected_char_error(),
1501    }
1502}
1503
1504pub(crate) fn skip_whitespace(
1505    read: &mut LookAheadByteReader<impl BufRead>,
1506) -> Result<(), TurtleError> {
1507    loop {
1508        match read.current() {
1509            Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => read.consume()?,
1510            Some(b'#') => {
1511                while read.current() != Some(b'\r')
1512                    && read.current() != Some(b'\n')
1513                    && read.current().is_some()
1514                {
1515                    read.consume()?;
1516                }
1517            }
1518            _ => return Ok(()),
1519        }
1520    }
1521}
1522
1523pub(crate) fn is_followed_by_space_and_closing_bracket(
1524    read: &mut LookAheadByteReader<impl BufRead>,
1525) -> Result<bool, TurtleError> {
1526    for i in 1.. {
1527        match read.ahead(i)? {
1528            Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => (),
1529            Some(b']') => return Ok(true),
1530            _ => return Ok(false),
1531        }
1532    }
1533    Ok(false)
1534}
1535
1536fn on_triple_in_graph<'a, E>(
1537    on_quad: &'a mut impl FnMut(Quad<'_>) -> Result<(), E>,
1538    graph_name: Option<GraphName<'a>>,
1539) -> impl FnMut(Triple<'_>) -> Result<(), E> + 'a {
1540    move |t: Triple<'_>| {
1541        on_quad(Quad {
1542            subject: t.subject,
1543            predicate: t.predicate,
1544            object: t.object,
1545            graph_name,
1546        })
1547    }
1548}
1549
1550pub(crate) fn parse_quoted_triple(
1551    parser: &mut TurtleParser<impl BufRead>,
1552) -> Result<(), TurtleError> {
1553    // [27t] 	embTriple 	::= 	'<<' embSubject verb embObject '>>'
1554    parser.read.increment_stack_size()?;
1555    parser.read.consume_many(2)?;
1556    skip_whitespace(&mut parser.read)?;
1557
1558    parser.triple_alloc.push_triple_start();
1559
1560    parse_emb_subject(parser)?;
1561    skip_whitespace(&mut parser.read)?;
1562
1563    parse_verb(parser)?;
1564    skip_whitespace(&mut parser.read)?;
1565
1566    parse_emb_object(parser)?;
1567    skip_whitespace(&mut parser.read)?;
1568
1569    parser.read.check_is_current(b'>')?;
1570    parser.read.check_is_next(b'>')?;
1571    parser.read.consume_many(2)?;
1572    parser.read.decrement_stack_size();
1573    Ok(())
1574}
1575
1576pub(crate) fn parse_emb_subject(
1577    parser: &mut TurtleParser<impl BufRead>,
1578) -> Result<(), TurtleError> {
1579    // [28t] 	embSubject 	::= 	iri | BlankNode | embTriple
1580    match parser.read.current() {
1581        Some(b'<') => {
1582            if parser.read.required_next()? == b'<' {
1583                parse_quoted_triple(parser)?;
1584                parser.triple_alloc.push_subject_triple();
1585                Ok(())
1586            } else {
1587                let TurtleParser {
1588                    read,
1589                    base_iri,
1590                    triple_alloc,
1591                    temp_buf,
1592                    ..
1593                } = parser;
1594                triple_alloc.try_push_subject(|b| {
1595                    parse_iriref_relative(read, b, temp_buf, base_iri).map(Subject::from)
1596                })
1597            }
1598        }
1599        Some(b'_') | Some(b'[') => {
1600            let TurtleParser {
1601                read,
1602                bnode_id_generator,
1603                triple_alloc,
1604                ..
1605            } = parser;
1606            triple_alloc.try_push_subject(|b| {
1607                parse_blank_node(read, b, bnode_id_generator).map(Subject::from)
1608            })
1609        }
1610        _ => {
1611            let TurtleParser {
1612                read,
1613                prefixes,
1614                triple_alloc,
1615                ..
1616            } = parser;
1617            triple_alloc
1618                .try_push_subject(|b| parse_prefixed_name(read, b, prefixes).map(Subject::from))
1619        }
1620    }
1621}
1622
1623pub(crate) fn parse_emb_object(parser: &mut TurtleParser<impl BufRead>) -> Result<(), TurtleError> {
1624    // [29t] 	embObject 	::= 	iri | BlankNode | literal | embTriple
1625    match parser.read.required_current()? {
1626        b'<' => {
1627            if parser.read.required_next()? == b'<' {
1628                parse_quoted_triple(parser)?;
1629                parser.triple_alloc.push_object_triple();
1630                Ok(())
1631            } else {
1632                let TurtleParser {
1633                    read,
1634                    base_iri,
1635                    triple_alloc,
1636                    temp_buf,
1637                    ..
1638                } = parser;
1639                triple_alloc.try_push_object(|b, _| {
1640                    parse_iriref_relative(read, b, temp_buf, base_iri).map(Term::from)
1641                })
1642            }
1643        }
1644        b'_' | b'[' => {
1645            let TurtleParser {
1646                read,
1647                bnode_id_generator,
1648                triple_alloc,
1649                ..
1650            } = parser;
1651            triple_alloc.try_push_object(|b, _| {
1652                parse_blank_node(read, b, bnode_id_generator).map(Term::from)
1653            })
1654        }
1655        b'"' | b'\'' => {
1656            let TurtleParser {
1657                read,
1658                base_iri,
1659                prefixes,
1660                triple_alloc,
1661                temp_buf,
1662                ..
1663            } = parser;
1664            triple_alloc.try_push_object(|b1, b2| {
1665                parse_rdf_literal(read, b1, b2, temp_buf, base_iri, prefixes).map(Term::from)
1666            })
1667        }
1668        b'+' | b'-' | b'.' | b'0'..=b'9' => {
1669            let TurtleParser {
1670                read, triple_alloc, ..
1671            } = parser;
1672            triple_alloc.try_push_object(|b, _| parse_numeric_literal(read, b).map(Term::from))
1673        }
1674        _ => {
1675            let TurtleParser {
1676                read, triple_alloc, ..
1677            } = parser;
1678            if read.starts_with(b"true")
1679                && read.ahead(4)?.map_or(true, |c| {
1680                    c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
1681                })
1682                || read.starts_with(b"false")
1683                    && read.ahead(5)?.map_or(true, |c| {
1684                        c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
1685                    })
1686            {
1687                triple_alloc.try_push_object(|b, _| parse_boolean_literal(read, b).map(Term::from))
1688            } else {
1689                let TurtleParser {
1690                    read,
1691                    prefixes,
1692                    triple_alloc,
1693                    ..
1694                } = parser;
1695                triple_alloc
1696                    .try_push_object(|b, _| parse_prefixed_name(read, b, prefixes).map(Term::from))
1697            }
1698        }
1699    }
1700}
1701
1702#[cfg(test)]
1703mod test {
1704    use super::*;
1705
1706    #[test]
1707    fn issue_46() -> Result<(), TurtleError> {
1708        let bnid = crate::utils::BlankNodeIdGenerator::default().generate();
1709
1710        let ttl = format!(
1711            r#"PREFIX : <tag:>
1712            :alice :knows [ :name "bob" ].
1713            _:{} :name "charlie".
1714            "#,
1715            bnid.as_ref()
1716        );
1717
1718        let mut blank_subjects = vec![];
1719        TurtleParser::new(std::io::Cursor::new(&ttl), None).parse_all(&mut |t| -> Result<
1720            (),
1721            TurtleError,
1722        > {
1723            if let Subject::BlankNode(b) = t.subject {
1724                blank_subjects.push(b.id.to_string());
1725            }
1726            Ok(())
1727        })?;
1728        assert_eq!(blank_subjects.len(), 2);
1729        assert_ne!(&blank_subjects[0], &blank_subjects[1]);
1730        Ok(())
1731    }
1732}