oxrdfio/
parser.rs

1//! Utilities to read RDF graphs and datasets.
2
3pub use crate::error::RdfParseError;
4use crate::format::RdfFormat;
5use crate::{LoadedDocument, RdfSyntaxError};
6#[cfg(feature = "async-tokio")]
7use oxjsonld::TokioAsyncReaderJsonLdParser;
8use oxjsonld::{
9    JsonLdParser, JsonLdPrefixesIter, JsonLdProfileSet, JsonLdRemoteDocument, ReaderJsonLdParser,
10    SliceJsonLdParser,
11};
12use oxrdf::{BlankNode, GraphName, IriParseError, Quad, Subject, Term, Triple};
13#[cfg(feature = "async-tokio")]
14use oxrdfxml::TokioAsyncReaderRdfXmlParser;
15use oxrdfxml::{RdfXmlParser, RdfXmlPrefixesIter, ReaderRdfXmlParser, SliceRdfXmlParser};
16#[cfg(feature = "async-tokio")]
17use oxttl::n3::TokioAsyncReaderN3Parser;
18use oxttl::n3::{N3Parser, N3PrefixesIter, N3Quad, N3Term, ReaderN3Parser, SliceN3Parser};
19#[cfg(feature = "async-tokio")]
20use oxttl::nquads::TokioAsyncReaderNQuadsParser;
21use oxttl::nquads::{NQuadsParser, ReaderNQuadsParser, SliceNQuadsParser};
22#[cfg(feature = "async-tokio")]
23use oxttl::ntriples::TokioAsyncReaderNTriplesParser;
24use oxttl::ntriples::{NTriplesParser, ReaderNTriplesParser, SliceNTriplesParser};
25#[cfg(feature = "async-tokio")]
26use oxttl::trig::TokioAsyncReaderTriGParser;
27use oxttl::trig::{ReaderTriGParser, SliceTriGParser, TriGParser, TriGPrefixesIter};
28#[cfg(feature = "async-tokio")]
29use oxttl::turtle::TokioAsyncReaderTurtleParser;
30use oxttl::turtle::{ReaderTurtleParser, SliceTurtleParser, TurtleParser, TurtlePrefixesIter};
31use std::collections::HashMap;
32use std::error::Error;
33use std::io::Read;
34use std::panic::{RefUnwindSafe, UnwindSafe};
35#[cfg(feature = "async-tokio")]
36use tokio::io::AsyncRead;
37
38/// Parsers for RDF serialization formats.
39///
40/// It currently supports the following formats:
41/// * [JSON-LD 1.0](https://www.w3.org/TR/json-ld/) ([`RdfFormat::JsonLd`])
42/// * [N3](https://w3c.github.io/N3/spec/) ([`RdfFormat::N3`])
43/// * [N-Quads](https://www.w3.org/TR/n-quads/) ([`RdfFormat::NQuads`])
44/// * [N-Triples](https://www.w3.org/TR/n-triples/) ([`RdfFormat::NTriples`])
45/// * [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) ([`RdfFormat::RdfXml`])
46/// * [TriG](https://www.w3.org/TR/trig/) ([`RdfFormat::TriG`])
47/// * [Turtle](https://www.w3.org/TR/turtle/) ([`RdfFormat::Turtle`])
48///
49/// Note the useful options:
50/// - [`with_base_iri`](Self::with_base_iri) to resolve the relative IRIs.
51/// - [`rename_blank_nodes`](Self::rename_blank_nodes) to rename the blank nodes to auto-generated numbers to avoid conflicts when merging RDF graphs together.
52/// - [`without_named_graphs`](Self::without_named_graphs) to parse a single graph.
53/// - [`unchecked`](Self::unchecked) to skip some validations if the file is already known to be valid.
54///
55/// ```
56/// use oxrdfio::{RdfFormat, RdfParser};
57///
58/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
59///
60/// let quads = RdfParser::from_format(RdfFormat::NTriples)
61///     .for_reader(file.as_bytes())
62///     .collect::<Result<Vec<_>, _>>()?;
63///
64/// assert_eq!(quads.len(), 1);
65/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
66/// # std::io::Result::Ok(())
67/// ```
68#[must_use]
69#[derive(Clone)]
70pub struct RdfParser {
71    inner: RdfParserKind,
72    default_graph: GraphName,
73    without_named_graphs: bool,
74    rename_blank_nodes: bool,
75}
76
77#[derive(Clone)]
78enum RdfParserKind {
79    JsonLd(JsonLdParser, JsonLdProfileSet),
80    N3(N3Parser),
81    NQuads(NQuadsParser),
82    NTriples(NTriplesParser),
83    RdfXml(RdfXmlParser),
84    TriG(TriGParser),
85    Turtle(TurtleParser),
86}
87
88impl RdfParser {
89    /// Builds a parser for the given format.
90    #[inline]
91    pub fn from_format(format: RdfFormat) -> Self {
92        Self {
93            inner: match format {
94                RdfFormat::JsonLd { profile } => {
95                    RdfParserKind::JsonLd(JsonLdParser::new().with_profile(profile), profile)
96                }
97                RdfFormat::N3 => RdfParserKind::N3(N3Parser::new()),
98                RdfFormat::NQuads => RdfParserKind::NQuads({
99                    #[cfg(feature = "rdf-star")]
100                    {
101                        NQuadsParser::new().with_quoted_triples()
102                    }
103                    #[cfg(not(feature = "rdf-star"))]
104                    {
105                        NQuadsParser::new()
106                    }
107                }),
108                RdfFormat::NTriples => RdfParserKind::NTriples({
109                    #[cfg(feature = "rdf-star")]
110                    {
111                        NTriplesParser::new().with_quoted_triples()
112                    }
113                    #[cfg(not(feature = "rdf-star"))]
114                    {
115                        NTriplesParser::new()
116                    }
117                }),
118                RdfFormat::RdfXml => RdfParserKind::RdfXml(RdfXmlParser::new()),
119                RdfFormat::TriG => RdfParserKind::TriG({
120                    #[cfg(feature = "rdf-star")]
121                    {
122                        TriGParser::new().with_quoted_triples()
123                    }
124                    #[cfg(not(feature = "rdf-star"))]
125                    {
126                        TriGParser::new()
127                    }
128                }),
129                RdfFormat::Turtle => RdfParserKind::Turtle({
130                    #[cfg(feature = "rdf-star")]
131                    {
132                        TurtleParser::new().with_quoted_triples()
133                    }
134                    #[cfg(not(feature = "rdf-star"))]
135                    {
136                        TurtleParser::new()
137                    }
138                }),
139            },
140            default_graph: GraphName::DefaultGraph,
141            without_named_graphs: false,
142            rename_blank_nodes: false,
143        }
144    }
145
146    /// The format the parser uses.
147    ///
148    /// ```
149    /// use oxrdfio::{RdfFormat, RdfParser};
150    ///
151    /// assert_eq!(
152    ///     RdfParser::from_format(RdfFormat::Turtle).format(),
153    ///     RdfFormat::Turtle
154    /// );
155    /// ```
156    pub fn format(&self) -> RdfFormat {
157        match &self.inner {
158            RdfParserKind::JsonLd(_, profile) => RdfFormat::JsonLd { profile: *profile },
159            RdfParserKind::N3(_) => RdfFormat::N3,
160            RdfParserKind::NQuads(_) => RdfFormat::NQuads,
161            RdfParserKind::NTriples(_) => RdfFormat::NTriples,
162            RdfParserKind::RdfXml(_) => RdfFormat::RdfXml,
163            RdfParserKind::TriG(_) => RdfFormat::TriG,
164            RdfParserKind::Turtle(_) => RdfFormat::Turtle,
165        }
166    }
167
168    /// Provides an IRI that could be used to resolve the file relative IRIs.
169    ///
170    /// ```
171    /// use oxrdfio::{RdfFormat, RdfParser};
172    ///
173    /// let file = "</s> </p> </o> .";
174    ///
175    /// let quads = RdfParser::from_format(RdfFormat::Turtle)
176    ///     .with_base_iri("http://example.com")?
177    ///     .for_reader(file.as_bytes())
178    ///     .collect::<Result<Vec<_>, _>>()?;
179    ///
180    /// assert_eq!(quads.len(), 1);
181    /// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
182    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
183    /// ```
184    #[inline]
185    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
186        self.inner = match self.inner {
187            RdfParserKind::JsonLd(p, f) => RdfParserKind::JsonLd(p.with_base_iri(base_iri)?, f),
188            RdfParserKind::N3(p) => RdfParserKind::N3(p.with_base_iri(base_iri)?),
189            RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p),
190            RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p),
191            RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.with_base_iri(base_iri)?),
192            RdfParserKind::TriG(p) => RdfParserKind::TriG(p.with_base_iri(base_iri)?),
193            RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.with_base_iri(base_iri)?),
194        };
195        Ok(self)
196    }
197
198    /// Provides the name graph name that should replace the default graph in the returned quads.
199    ///
200    /// ```
201    /// use oxrdf::NamedNode;
202    /// use oxrdfio::{RdfFormat, RdfParser};
203    ///
204    /// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
205    ///
206    /// let quads = RdfParser::from_format(RdfFormat::Turtle)
207    ///     .with_default_graph(NamedNode::new("http://example.com/g")?)
208    ///     .for_reader(file.as_bytes())
209    ///     .collect::<Result<Vec<_>, _>>()?;
210    ///
211    /// assert_eq!(quads.len(), 1);
212    /// assert_eq!(quads[0].graph_name.to_string(), "<http://example.com/g>");
213    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
214    /// ```
215    #[inline]
216    pub fn with_default_graph(mut self, default_graph: impl Into<GraphName>) -> Self {
217        self.default_graph = default_graph.into();
218        self
219    }
220
221    /// Sets that the parser must fail if parsing a named graph.
222    ///
223    /// This function restricts the parser to only parse a single [RDF graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) and not an [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset).
224    ///
225    /// ```
226    /// use oxrdfio::{RdfFormat, RdfParser};
227    ///
228    /// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> <http://example.com/g> .";
229    ///
230    /// let parser = RdfParser::from_format(RdfFormat::NQuads).without_named_graphs();
231    /// assert!(parser.for_reader(file.as_bytes()).next().unwrap().is_err());
232    /// ```
233    #[inline]
234    pub fn without_named_graphs(mut self) -> Self {
235        self.without_named_graphs = true;
236        self
237    }
238
239    /// Renames the blank nodes ids from the ones set in the serialization to random ids.
240    ///
241    /// This allows to avoid id conflicts when merging graphs together.
242    ///
243    /// ```
244    /// use oxrdfio::{RdfFormat, RdfParser};
245    ///
246    /// let file = "_:a <http://example.com/p> <http://example.com/o> .";
247    ///
248    /// let result1 = RdfParser::from_format(RdfFormat::NQuads)
249    ///     .rename_blank_nodes()
250    ///     .for_reader(file.as_bytes())
251    ///     .collect::<Result<Vec<_>, _>>()?;
252    /// let result2 = RdfParser::from_format(RdfFormat::NQuads)
253    ///     .rename_blank_nodes()
254    ///     .for_reader(file.as_bytes())
255    ///     .collect::<Result<Vec<_>, _>>()?;
256    /// assert_ne!(result1, result2);
257    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
258    /// ```
259    #[inline]
260    pub fn rename_blank_nodes(mut self) -> Self {
261        self.rename_blank_nodes = true;
262        self
263    }
264
265    /// Assumes the file is valid to make parsing faster.
266    ///
267    /// It will skip some validations.
268    ///
269    /// Note that if the file is actually not valid, broken RDF might be emitted by the parser.
270    #[inline]
271    pub fn unchecked(mut self) -> Self {
272        self.inner = match self.inner {
273            RdfParserKind::JsonLd(p, f) => RdfParserKind::JsonLd(p.lenient(), f),
274            RdfParserKind::N3(p) => RdfParserKind::N3(p.unchecked()),
275            RdfParserKind::NTriples(p) => RdfParserKind::NTriples(p.unchecked()),
276            RdfParserKind::NQuads(p) => RdfParserKind::NQuads(p.unchecked()),
277            RdfParserKind::RdfXml(p) => RdfParserKind::RdfXml(p.unchecked()),
278            RdfParserKind::TriG(p) => RdfParserKind::TriG(p.unchecked()),
279            RdfParserKind::Turtle(p) => RdfParserKind::Turtle(p.unchecked()),
280        };
281        self
282    }
283
284    /// Parses from a [`Read`] implementation and returns an iterator of quads.
285    ///
286    /// Reads are buffered.
287    ///
288    /// ```
289    /// use oxrdfio::{RdfFormat, RdfParser};
290    ///
291    /// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
292    ///
293    /// let quads = RdfParser::from_format(RdfFormat::NTriples)
294    ///     .for_reader(file.as_bytes())
295    ///     .collect::<Result<Vec<_>, _>>()?;
296    ///
297    /// assert_eq!(quads.len(), 1);
298    /// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
299    /// # std::io::Result::Ok(())
300    /// ```
301    pub fn for_reader<R: Read>(self, reader: R) -> ReaderQuadParser<R> {
302        ReaderQuadParser {
303            inner: match self.inner {
304                RdfParserKind::JsonLd(p, _) => ReaderQuadParserKind::JsonLd(p.for_reader(reader)),
305                RdfParserKind::N3(p) => ReaderQuadParserKind::N3(p.for_reader(reader)),
306                RdfParserKind::NQuads(p) => ReaderQuadParserKind::NQuads(p.for_reader(reader)),
307                RdfParserKind::NTriples(p) => ReaderQuadParserKind::NTriples(p.for_reader(reader)),
308                RdfParserKind::RdfXml(p) => ReaderQuadParserKind::RdfXml(p.for_reader(reader)),
309                RdfParserKind::TriG(p) => ReaderQuadParserKind::TriG(p.for_reader(reader)),
310                RdfParserKind::Turtle(p) => ReaderQuadParserKind::Turtle(p.for_reader(reader)),
311            },
312            mapper: QuadMapper {
313                default_graph: self.default_graph.clone(),
314                without_named_graphs: self.without_named_graphs,
315                blank_node_map: self.rename_blank_nodes.then(HashMap::new),
316            },
317        }
318    }
319
320    /// Parses from a Tokio [`AsyncRead`] implementation and returns an async iterator of quads.
321    ///
322    /// Reads are buffered.
323    ///
324    /// ```
325    /// # #[tokio::main(flavor = "current_thread")]
326    /// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
327    /// use oxrdfio::{RdfFormat, RdfParser};
328    ///
329    /// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
330    ///
331    /// let mut parser =
332    ///     RdfParser::from_format(RdfFormat::NTriples).for_tokio_async_reader(file.as_bytes());
333    /// if let Some(quad) = parser.next().await {
334    ///     assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
335    /// }
336    /// # Ok(())
337    /// # }
338    /// ```
339    #[cfg(feature = "async-tokio")]
340    pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
341        self,
342        reader: R,
343    ) -> TokioAsyncReaderQuadParser<R> {
344        TokioAsyncReaderQuadParser {
345            inner: match self.inner {
346                RdfParserKind::JsonLd(p, _) => {
347                    TokioAsyncReaderQuadParserKind::JsonLd(p.for_tokio_async_reader(reader))
348                }
349                RdfParserKind::N3(p) => {
350                    TokioAsyncReaderQuadParserKind::N3(p.for_tokio_async_reader(reader))
351                }
352                RdfParserKind::NQuads(p) => {
353                    TokioAsyncReaderQuadParserKind::NQuads(p.for_tokio_async_reader(reader))
354                }
355                RdfParserKind::NTriples(p) => {
356                    TokioAsyncReaderQuadParserKind::NTriples(p.for_tokio_async_reader(reader))
357                }
358                RdfParserKind::RdfXml(p) => {
359                    TokioAsyncReaderQuadParserKind::RdfXml(p.for_tokio_async_reader(reader))
360                }
361                RdfParserKind::TriG(p) => {
362                    TokioAsyncReaderQuadParserKind::TriG(p.for_tokio_async_reader(reader))
363                }
364                RdfParserKind::Turtle(p) => {
365                    TokioAsyncReaderQuadParserKind::Turtle(p.for_tokio_async_reader(reader))
366                }
367            },
368            mapper: QuadMapper {
369                default_graph: self.default_graph.clone(),
370                without_named_graphs: self.without_named_graphs,
371                blank_node_map: self.rename_blank_nodes.then(HashMap::new),
372            },
373        }
374    }
375
376    /// Parses from a byte slice and returns an iterator of quads.
377    ///
378    /// ```
379    /// use oxrdfio::{RdfFormat, RdfParser};
380    ///
381    /// let file = b"<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
382    ///
383    /// let quads = RdfParser::from_format(RdfFormat::NTriples)
384    ///     .for_slice(file)
385    ///     .collect::<Result<Vec<_>, _>>()?;
386    ///
387    /// assert_eq!(quads.len(), 1);
388    /// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
389    /// # std::io::Result::Ok(())
390    /// ```
391    pub fn for_slice(self, slice: &[u8]) -> SliceQuadParser<'_> {
392        SliceQuadParser {
393            inner: match self.inner {
394                RdfParserKind::JsonLd(p, _) => SliceQuadParserKind::JsonLd(p.for_slice(slice)),
395                RdfParserKind::N3(p) => SliceQuadParserKind::N3(p.for_slice(slice)),
396                RdfParserKind::NQuads(p) => SliceQuadParserKind::NQuads(p.for_slice(slice)),
397                RdfParserKind::NTriples(p) => SliceQuadParserKind::NTriples(p.for_slice(slice)),
398                RdfParserKind::RdfXml(p) => SliceQuadParserKind::RdfXml(p.for_slice(slice)),
399                RdfParserKind::TriG(p) => SliceQuadParserKind::TriG(p.for_slice(slice)),
400                RdfParserKind::Turtle(p) => SliceQuadParserKind::Turtle(p.for_slice(slice)),
401            },
402            mapper: QuadMapper {
403                default_graph: self.default_graph.clone(),
404                without_named_graphs: self.without_named_graphs,
405                blank_node_map: self.rename_blank_nodes.then(HashMap::new),
406            },
407        }
408    }
409}
410
411impl From<RdfFormat> for RdfParser {
412    fn from(format: RdfFormat) -> Self {
413        Self::from_format(format)
414    }
415}
416
417/// Parses a RDF file from a [`Read`] implementation.
418///
419/// Can be built using [`RdfParser::for_reader`].
420///
421/// Reads are buffered.
422///
423/// ```
424/// use oxrdfio::{RdfFormat, RdfParser};
425///
426/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
427///
428/// let quads = RdfParser::from_format(RdfFormat::NTriples)
429///     .for_reader(file.as_bytes())
430///     .collect::<Result<Vec<_>, _>>()?;
431///
432/// assert_eq!(quads.len(), 1);
433/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
434/// # std::io::Result::Ok(())
435/// ```
436#[must_use]
437pub struct ReaderQuadParser<R: Read> {
438    inner: ReaderQuadParserKind<R>,
439    mapper: QuadMapper,
440}
441
442enum ReaderQuadParserKind<R: Read> {
443    JsonLd(ReaderJsonLdParser<R>),
444    N3(ReaderN3Parser<R>),
445    NQuads(ReaderNQuadsParser<R>),
446    NTriples(ReaderNTriplesParser<R>),
447    RdfXml(ReaderRdfXmlParser<R>),
448    TriG(ReaderTriGParser<R>),
449    Turtle(ReaderTurtleParser<R>),
450}
451
452impl<R: Read> Iterator for ReaderQuadParser<R> {
453    type Item = Result<Quad, RdfParseError>;
454
455    fn next(&mut self) -> Option<Self::Item> {
456        Some(match &mut self.inner {
457            ReaderQuadParserKind::JsonLd(parser) => match parser.next()? {
458                Ok(quad) => self.mapper.map_quad(quad).map_err(Into::into),
459                Err(e) => Err(e.into()),
460            },
461            ReaderQuadParserKind::N3(parser) => match parser.next()? {
462                Ok(quad) => self.mapper.map_n3_quad(quad).map_err(Into::into),
463                Err(e) => Err(e.into()),
464            },
465            ReaderQuadParserKind::NQuads(parser) => match parser.next()? {
466                Ok(quad) => self.mapper.map_quad(quad).map_err(Into::into),
467                Err(e) => Err(e.into()),
468            },
469            ReaderQuadParserKind::NTriples(parser) => match parser.next()? {
470                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
471                Err(e) => Err(e.into()),
472            },
473            ReaderQuadParserKind::RdfXml(parser) => match parser.next()? {
474                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
475                Err(e) => Err(e.into()),
476            },
477            ReaderQuadParserKind::TriG(parser) => match parser.next()? {
478                Ok(quad) => self.mapper.map_quad(quad).map_err(Into::into),
479                Err(e) => Err(e.into()),
480            },
481            ReaderQuadParserKind::Turtle(parser) => match parser.next()? {
482                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
483                Err(e) => Err(e.into()),
484            },
485        })
486    }
487}
488
489impl<R: Read> ReaderQuadParser<R> {
490    /// The list of IRI prefixes considered at the current step of the parsing.
491    ///
492    /// This method returns (prefix name, prefix value) tuples.
493    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
494    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
495    ///
496    /// An empty iterator is return if the format does not support prefixes.
497    ///
498    /// ```
499    /// use oxrdfio::{RdfFormat, RdfParser};
500    ///
501    /// let file = br#"@base <http://example.com/> .
502    /// @prefix schema: <http://schema.org/> .
503    /// <foo> a schema:Person ;
504    ///     schema:name "Foo" ."#;
505    ///
506    /// let mut parser = RdfParser::from_format(RdfFormat::Turtle).for_reader(file.as_slice());
507    /// assert!(parser.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
508    ///
509    /// parser.next().unwrap()?; // We read the first triple
510    /// assert_eq!(
511    ///     parser.prefixes().collect::<Vec<_>>(),
512    ///     [("schema", "http://schema.org/")]
513    /// ); // There are now prefixes
514    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
515    /// ```
516    pub fn prefixes(&self) -> PrefixesIter<'_> {
517        PrefixesIter {
518            inner: match &self.inner {
519                ReaderQuadParserKind::JsonLd(p) => PrefixesIterKind::JsonLd(p.prefixes()),
520                ReaderQuadParserKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
521                ReaderQuadParserKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
522                ReaderQuadParserKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()),
523                ReaderQuadParserKind::RdfXml(p) => PrefixesIterKind::RdfXml(p.prefixes()),
524                ReaderQuadParserKind::NQuads(_) | ReaderQuadParserKind::NTriples(_) => {
525                    PrefixesIterKind::None
526                }
527            },
528        }
529    }
530
531    /// The base IRI considered at the current step of the parsing.
532    ///
533    /// `None` is returned if no base IRI is set or the format does not support base IRIs.
534    ///
535    /// ```
536    /// use oxrdfio::{RdfFormat, RdfParser};
537    ///
538    /// let file = br#"@base <http://example.com/> .
539    /// @prefix schema: <http://schema.org/> .
540    /// <foo> a schema:Person ;
541    ///     schema:name "Foo" ."#;
542    ///
543    /// let mut parser = RdfParser::from_format(RdfFormat::Turtle).for_reader(file.as_slice());
544    /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
545    ///
546    /// parser.next().unwrap()?; // We read the first triple
547    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
548    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
549    /// ```
550    pub fn base_iri(&self) -> Option<&str> {
551        match &self.inner {
552            ReaderQuadParserKind::JsonLd(p) => p.base_iri(),
553            ReaderQuadParserKind::N3(p) => p.base_iri(),
554            ReaderQuadParserKind::TriG(p) => p.base_iri(),
555            ReaderQuadParserKind::Turtle(p) => p.base_iri(),
556            ReaderQuadParserKind::RdfXml(p) => p.base_iri(),
557            ReaderQuadParserKind::NQuads(_) | ReaderQuadParserKind::NTriples(_) => None,
558        }
559    }
560
561    pub fn with_document_loader(
562        mut self,
563        loader: impl Fn(&str) -> Result<LoadedDocument, Box<dyn Error + Send + Sync>>
564            + Send
565            + Sync
566            + UnwindSafe
567            + RefUnwindSafe
568            + 'static,
569    ) -> Self {
570        self.inner = match self.inner {
571            ReaderQuadParserKind::JsonLd(p) => {
572                ReaderQuadParserKind::JsonLd(p.with_load_document_callback(move |iri, _| {
573                    let response = loader(iri)?;
574                    if !matches!(response.format, RdfFormat::JsonLd { .. }) {
575                        return Err(format!(
576                            "The JSON-LD context format must be JSON-LD, {} found",
577                            response.format
578                        )
579                        .into());
580                    }
581                    Ok(JsonLdRemoteDocument {
582                        document: response.content,
583                        document_url: response.url,
584                    })
585                }))
586            }
587            i => i,
588        };
589        self
590    }
591}
592
593/// Parses a RDF file from a Tokio [`AsyncRead`] implementation.
594///
595/// Can be built using [`RdfParser::for_tokio_async_reader`].
596///
597/// Reads are buffered.
598///
599/// ```
600/// # #[tokio::main(flavor = "current_thread")]
601/// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
602/// use oxrdfio::{RdfFormat, RdfParser};
603///
604/// let file = "<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
605///
606/// let mut parser =
607///     RdfParser::from_format(RdfFormat::NTriples).for_tokio_async_reader(file.as_bytes());
608/// if let Some(quad) = parser.next().await {
609///     assert_eq!(quad?.subject.to_string(), "<http://example.com/s>");
610/// }
611/// # Ok(())
612/// # }
613/// ```
614#[must_use]
615#[cfg(feature = "async-tokio")]
616pub struct TokioAsyncReaderQuadParser<R: AsyncRead + Unpin> {
617    inner: TokioAsyncReaderQuadParserKind<R>,
618    mapper: QuadMapper,
619}
620
621#[cfg(feature = "async-tokio")]
622enum TokioAsyncReaderQuadParserKind<R: AsyncRead + Unpin> {
623    JsonLd(TokioAsyncReaderJsonLdParser<R>),
624    N3(TokioAsyncReaderN3Parser<R>),
625    NQuads(TokioAsyncReaderNQuadsParser<R>),
626    NTriples(TokioAsyncReaderNTriplesParser<R>),
627    RdfXml(TokioAsyncReaderRdfXmlParser<R>),
628    TriG(TokioAsyncReaderTriGParser<R>),
629    Turtle(TokioAsyncReaderTurtleParser<R>),
630}
631
632#[cfg(feature = "async-tokio")]
633impl<R: AsyncRead + Unpin> TokioAsyncReaderQuadParser<R> {
634    pub async fn next(&mut self) -> Option<Result<Quad, RdfParseError>> {
635        Some(match &mut self.inner {
636            TokioAsyncReaderQuadParserKind::JsonLd(parser) => match parser.next().await? {
637                Ok(quad) => self.mapper.map_quad(quad).map_err(Into::into),
638                Err(e) => Err(e.into()),
639            },
640            TokioAsyncReaderQuadParserKind::N3(parser) => match parser.next().await? {
641                Ok(quad) => self.mapper.map_n3_quad(quad).map_err(Into::into),
642                Err(e) => Err(e.into()),
643            },
644            TokioAsyncReaderQuadParserKind::NQuads(parser) => match parser.next().await? {
645                Ok(quad) => self.mapper.map_quad(quad).map_err(Into::into),
646                Err(e) => Err(e.into()),
647            },
648            TokioAsyncReaderQuadParserKind::NTriples(parser) => match parser.next().await? {
649                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
650                Err(e) => Err(e.into()),
651            },
652            TokioAsyncReaderQuadParserKind::RdfXml(parser) => match parser.next().await? {
653                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
654                Err(e) => Err(e.into()),
655            },
656            TokioAsyncReaderQuadParserKind::TriG(parser) => match parser.next().await? {
657                Ok(quad) => self.mapper.map_quad(quad).map_err(Into::into),
658                Err(e) => Err(e.into()),
659            },
660            TokioAsyncReaderQuadParserKind::Turtle(parser) => match parser.next().await? {
661                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
662                Err(e) => Err(e.into()),
663            },
664        })
665    }
666
667    /// The list of IRI prefixes considered at the current step of the parsing.
668    ///
669    /// This method returns (prefix name, prefix value) tuples.
670    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
671    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
672    ///
673    /// An empty iterator is return if the format does not support prefixes.
674    ///
675    /// ```
676    /// # #[tokio::main(flavor = "current_thread")]
677    /// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
678    /// use oxrdfio::{RdfFormat, RdfParser};
679    ///
680    /// let file = br#"@base <http://example.com/> .
681    /// @prefix schema: <http://schema.org/> .
682    /// <foo> a schema:Person ;
683    ///     schema:name "Foo" ."#;
684    ///
685    /// let mut parser =
686    ///     RdfParser::from_format(RdfFormat::Turtle).for_tokio_async_reader(file.as_slice());
687    /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
688    ///
689    /// parser.next().await.unwrap()?; // We read the first triple
690    /// assert_eq!(
691    ///     parser.prefixes().collect::<Vec<_>>(),
692    ///     [("schema", "http://schema.org/")]
693    /// ); // There are now prefixes
694    /// # Ok(())
695    /// # }
696    /// ```
697    pub fn prefixes(&self) -> PrefixesIter<'_> {
698        PrefixesIter {
699            inner: match &self.inner {
700                TokioAsyncReaderQuadParserKind::JsonLd(p) => PrefixesIterKind::JsonLd(p.prefixes()),
701                TokioAsyncReaderQuadParserKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
702                TokioAsyncReaderQuadParserKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
703                TokioAsyncReaderQuadParserKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()),
704                TokioAsyncReaderQuadParserKind::RdfXml(p) => PrefixesIterKind::RdfXml(p.prefixes()),
705                TokioAsyncReaderQuadParserKind::NQuads(_)
706                | TokioAsyncReaderQuadParserKind::NTriples(_) => PrefixesIterKind::None,
707            },
708        }
709    }
710
711    /// The base IRI considered at the current step of the parsing.
712    ///
713    /// `None` is returned if no base IRI is set or the format does not support base IRIs.
714    ///
715    /// ```
716    /// # #[tokio::main(flavor = "current_thread")]
717    /// # async fn main() -> Result<(), oxrdfio::RdfParseError> {
718    /// use oxrdfio::{RdfFormat, RdfParser};
719    ///
720    /// let file = br#"@base <http://example.com/> .
721    /// @prefix schema: <http://schema.org/> .
722    /// <foo> a schema:Person ;
723    ///     schema:name "Foo" ."#;
724    ///
725    /// let mut parser =
726    ///     RdfParser::from_format(RdfFormat::Turtle).for_tokio_async_reader(file.as_slice());
727    /// assert!(parser.base_iri().is_none()); // No base IRI at the beginning
728    ///
729    /// parser.next().await.unwrap()?; // We read the first triple
730    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI
731    /// # Ok(())
732    /// # }
733    /// ```
734    pub fn base_iri(&self) -> Option<&str> {
735        match &self.inner {
736            TokioAsyncReaderQuadParserKind::JsonLd(p) => p.base_iri(),
737            TokioAsyncReaderQuadParserKind::N3(p) => p.base_iri(),
738            TokioAsyncReaderQuadParserKind::TriG(p) => p.base_iri(),
739            TokioAsyncReaderQuadParserKind::Turtle(p) => p.base_iri(),
740            TokioAsyncReaderQuadParserKind::RdfXml(p) => p.base_iri(),
741            TokioAsyncReaderQuadParserKind::NQuads(_)
742            | TokioAsyncReaderQuadParserKind::NTriples(_) => None,
743        }
744    }
745}
746
747/// Parses a RDF file from a byte slice.
748///
749/// Can be built using [`RdfParser::for_slice`].
750///
751/// ```
752/// use oxrdfio::{RdfFormat, RdfParser};
753///
754/// let file = b"<http://example.com/s> <http://example.com/p> <http://example.com/o> .";
755///
756/// let quads = RdfParser::from_format(RdfFormat::NTriples)
757///     .for_slice(file)
758///     .collect::<Result<Vec<_>, _>>()?;
759///
760/// assert_eq!(quads.len(), 1);
761/// assert_eq!(quads[0].subject.to_string(), "<http://example.com/s>");
762/// # std::io::Result::Ok(())
763/// ```
764#[must_use]
765pub struct SliceQuadParser<'a> {
766    inner: SliceQuadParserKind<'a>,
767    mapper: QuadMapper,
768}
769
770enum SliceQuadParserKind<'a> {
771    JsonLd(SliceJsonLdParser<'a>),
772    N3(SliceN3Parser<'a>),
773    NQuads(SliceNQuadsParser<'a>),
774    NTriples(SliceNTriplesParser<'a>),
775    RdfXml(SliceRdfXmlParser<'a>),
776    TriG(SliceTriGParser<'a>),
777    Turtle(SliceTurtleParser<'a>),
778}
779
780impl Iterator for SliceQuadParser<'_> {
781    type Item = Result<Quad, RdfSyntaxError>;
782
783    fn next(&mut self) -> Option<Self::Item> {
784        Some(match &mut self.inner {
785            SliceQuadParserKind::JsonLd(parser) => match parser.next()? {
786                Ok(quad) => self.mapper.map_quad(quad),
787                Err(e) => Err(e.into()),
788            },
789            SliceQuadParserKind::N3(parser) => match parser.next()? {
790                Ok(quad) => self.mapper.map_n3_quad(quad),
791                Err(e) => Err(e.into()),
792            },
793            SliceQuadParserKind::NQuads(parser) => match parser.next()? {
794                Ok(quad) => self.mapper.map_quad(quad),
795                Err(e) => Err(e.into()),
796            },
797            SliceQuadParserKind::NTriples(parser) => match parser.next()? {
798                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
799                Err(e) => Err(e.into()),
800            },
801            SliceQuadParserKind::RdfXml(parser) => match parser.next()? {
802                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
803                Err(e) => Err(e.into()),
804            },
805            SliceQuadParserKind::TriG(parser) => match parser.next()? {
806                Ok(quad) => self.mapper.map_quad(quad),
807                Err(e) => Err(e.into()),
808            },
809            SliceQuadParserKind::Turtle(parser) => match parser.next()? {
810                Ok(triple) => Ok(self.mapper.map_triple_to_quad(triple)),
811                Err(e) => Err(e.into()),
812            },
813        })
814    }
815}
816
817impl SliceQuadParser<'_> {
818    /// The list of IRI prefixes considered at the current step of the parsing.
819    ///
820    /// This method returns (prefix name, prefix value) tuples.
821    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
822    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
823    ///
824    /// An empty iterator is return if the format does not support prefixes.
825    ///
826    /// ```
827    /// use oxrdfio::{RdfFormat, RdfParser};
828    ///
829    /// let file = br#"@base <http://example.com/> .
830    /// @prefix schema: <http://schema.org/> .
831    /// <foo> a schema:Person ;
832    ///     schema:name "Foo" ."#;
833    ///
834    /// let mut parser = RdfParser::from_format(RdfFormat::Turtle).for_slice(file);
835    /// assert!(parser.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
836    ///
837    /// parser.next().unwrap()?; // We read the first triple
838    /// assert_eq!(
839    ///     parser.prefixes().collect::<Vec<_>>(),
840    ///     [("schema", "http://schema.org/")]
841    /// ); // There are now prefixes
842    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
843    /// ```
844    pub fn prefixes(&self) -> PrefixesIter<'_> {
845        PrefixesIter {
846            inner: match &self.inner {
847                SliceQuadParserKind::JsonLd(p) => PrefixesIterKind::JsonLd(p.prefixes()),
848                SliceQuadParserKind::N3(p) => PrefixesIterKind::N3(p.prefixes()),
849                SliceQuadParserKind::TriG(p) => PrefixesIterKind::TriG(p.prefixes()),
850                SliceQuadParserKind::Turtle(p) => PrefixesIterKind::Turtle(p.prefixes()),
851                SliceQuadParserKind::RdfXml(p) => PrefixesIterKind::RdfXml(p.prefixes()),
852                SliceQuadParserKind::NQuads(_) | SliceQuadParserKind::NTriples(_) => {
853                    PrefixesIterKind::None
854                }
855            },
856        }
857    }
858
859    /// The base IRI considered at the current step of the parsing.
860    ///
861    /// `None` is returned if no base IRI is set or the format does not support base IRIs.
862    ///
863    /// ```
864    /// use oxrdfio::{RdfFormat, RdfParser};
865    ///
866    /// let file = br#"@base <http://example.com/> .
867    /// @prefix schema: <http://schema.org/> .
868    /// <foo> a schema:Person ;
869    ///     schema:name "Foo" ."#;
870    ///
871    /// let mut parser = RdfParser::from_format(RdfFormat::Turtle).for_slice(file);
872    /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
873    ///
874    /// parser.next().unwrap()?; // We read the first triple
875    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
876    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
877    /// ```
878    pub fn base_iri(&self) -> Option<&str> {
879        match &self.inner {
880            SliceQuadParserKind::JsonLd(p) => p.base_iri(),
881            SliceQuadParserKind::N3(p) => p.base_iri(),
882            SliceQuadParserKind::TriG(p) => p.base_iri(),
883            SliceQuadParserKind::Turtle(p) => p.base_iri(),
884            SliceQuadParserKind::RdfXml(p) => p.base_iri(),
885            SliceQuadParserKind::NQuads(_) | SliceQuadParserKind::NTriples(_) => None,
886        }
887    }
888}
889
890/// Iterator on the file prefixes.
891///
892/// See [`ReaderQuadParser::prefixes`].
893pub struct PrefixesIter<'a> {
894    inner: PrefixesIterKind<'a>,
895}
896
897enum PrefixesIterKind<'a> {
898    JsonLd(JsonLdPrefixesIter<'a>),
899    Turtle(TurtlePrefixesIter<'a>),
900    TriG(TriGPrefixesIter<'a>),
901    N3(N3PrefixesIter<'a>),
902    RdfXml(RdfXmlPrefixesIter<'a>),
903    None,
904}
905
906impl<'a> Iterator for PrefixesIter<'a> {
907    type Item = (&'a str, &'a str);
908
909    #[inline]
910    fn next(&mut self) -> Option<Self::Item> {
911        match &mut self.inner {
912            PrefixesIterKind::JsonLd(iter) => iter.next(),
913            PrefixesIterKind::Turtle(iter) => iter.next(),
914            PrefixesIterKind::TriG(iter) => iter.next(),
915            PrefixesIterKind::N3(iter) => iter.next(),
916            PrefixesIterKind::RdfXml(iter) => iter.next(),
917            PrefixesIterKind::None => None,
918        }
919    }
920
921    #[inline]
922    fn size_hint(&self) -> (usize, Option<usize>) {
923        match &self.inner {
924            PrefixesIterKind::JsonLd(iter) => iter.size_hint(),
925            PrefixesIterKind::Turtle(iter) => iter.size_hint(),
926            PrefixesIterKind::TriG(iter) => iter.size_hint(),
927            PrefixesIterKind::N3(iter) => iter.size_hint(),
928            PrefixesIterKind::RdfXml(iter) => iter.size_hint(),
929            PrefixesIterKind::None => (0, Some(0)),
930        }
931    }
932}
933
934struct QuadMapper {
935    default_graph: GraphName,
936    without_named_graphs: bool,
937    blank_node_map: Option<HashMap<BlankNode, BlankNode>>,
938}
939
940impl QuadMapper {
941    fn map_blank_node(&mut self, node: BlankNode) -> BlankNode {
942        if let Some(blank_node_map) = &mut self.blank_node_map {
943            blank_node_map
944                .entry(node)
945                .or_insert_with(BlankNode::default)
946                .clone()
947        } else {
948            node
949        }
950    }
951
952    fn map_subject(&mut self, node: Subject) -> Subject {
953        match node {
954            Subject::NamedNode(node) => node.into(),
955            Subject::BlankNode(node) => self.map_blank_node(node).into(),
956            #[cfg(feature = "rdf-star")]
957            Subject::Triple(triple) => self.map_triple(*triple).into(),
958        }
959    }
960
961    fn map_term(&mut self, node: Term) -> Term {
962        match node {
963            Term::NamedNode(node) => node.into(),
964            Term::BlankNode(node) => self.map_blank_node(node).into(),
965            Term::Literal(literal) => literal.into(),
966            #[cfg(feature = "rdf-star")]
967            Term::Triple(triple) => self.map_triple(*triple).into(),
968        }
969    }
970
971    fn map_triple(&mut self, triple: Triple) -> Triple {
972        Triple {
973            subject: self.map_subject(triple.subject),
974            predicate: triple.predicate,
975            object: self.map_term(triple.object),
976        }
977    }
978
979    fn map_graph_name(&mut self, graph_name: GraphName) -> Result<GraphName, RdfSyntaxError> {
980        match graph_name {
981            GraphName::NamedNode(node) => {
982                if self.without_named_graphs {
983                    Err(RdfSyntaxError::msg("Named graphs are not allowed"))
984                } else {
985                    Ok(node.into())
986                }
987            }
988            GraphName::BlankNode(node) => {
989                if self.without_named_graphs {
990                    Err(RdfSyntaxError::msg("Named graphs are not allowed"))
991                } else {
992                    Ok(self.map_blank_node(node).into())
993                }
994            }
995            GraphName::DefaultGraph => Ok(self.default_graph.clone()),
996        }
997    }
998
999    fn map_quad(&mut self, quad: Quad) -> Result<Quad, RdfSyntaxError> {
1000        Ok(Quad {
1001            subject: self.map_subject(quad.subject),
1002            predicate: quad.predicate,
1003            object: self.map_term(quad.object),
1004            graph_name: self.map_graph_name(quad.graph_name)?,
1005        })
1006    }
1007
1008    fn map_triple_to_quad(&mut self, triple: Triple) -> Quad {
1009        self.map_triple(triple).in_graph(self.default_graph.clone())
1010    }
1011
1012    fn map_n3_quad(&mut self, quad: N3Quad) -> Result<Quad, RdfSyntaxError> {
1013        Ok(Quad {
1014            subject: match quad.subject {
1015                N3Term::NamedNode(s) => Ok(s.into()),
1016                N3Term::BlankNode(s) => Ok(self.map_blank_node(s).into()),
1017                N3Term::Literal(_) => Err(RdfSyntaxError::msg(
1018                    "literals are not allowed in regular RDF subjects",
1019                )),
1020                #[cfg(feature = "rdf-star")]
1021                N3Term::Triple(s) => Ok(self.map_triple(*s).into()),
1022                N3Term::Variable(_) => Err(RdfSyntaxError::msg(
1023                    "variables are not allowed in regular RDF subjects",
1024                )),
1025            }?,
1026            predicate: match quad.predicate {
1027                N3Term::NamedNode(p) => Ok(p),
1028                N3Term::BlankNode(_) => Err(RdfSyntaxError::msg(
1029                    "blank nodes are not allowed in regular RDF predicates",
1030                )),
1031                N3Term::Literal(_) => Err(RdfSyntaxError::msg(
1032                    "literals are not allowed in regular RDF predicates",
1033                )),
1034                #[cfg(feature = "rdf-star")]
1035                N3Term::Triple(_) => Err(RdfSyntaxError::msg(
1036                    "quoted triples are not allowed in regular RDF predicates",
1037                )),
1038                N3Term::Variable(_) => Err(RdfSyntaxError::msg(
1039                    "variables are not allowed in regular RDF predicates",
1040                )),
1041            }?,
1042            object: match quad.object {
1043                N3Term::NamedNode(o) => Ok(o.into()),
1044                N3Term::BlankNode(o) => Ok(self.map_blank_node(o).into()),
1045                N3Term::Literal(o) => Ok(o.into()),
1046                #[cfg(feature = "rdf-star")]
1047                N3Term::Triple(o) => Ok(self.map_triple(*o).into()),
1048                N3Term::Variable(_) => Err(RdfSyntaxError::msg(
1049                    "variables are not allowed in regular RDF objects",
1050                )),
1051            }?,
1052            graph_name: self.map_graph_name(quad.graph_name)?,
1053        })
1054    }
1055}