oxttl/
turtle.rs

1//! A [Turtle](https://www.w3.org/TR/turtle/) streaming parser implemented by [`TurtleParser`]
2//! and a serializer implemented by [`TurtleSerializer`].
3
4use crate::chunker::get_turtle_file_chunks;
5use crate::terse::TriGRecognizer;
6#[cfg(feature = "async-tokio")]
7use crate::toolkit::TokioAsyncReaderIterator;
8use crate::toolkit::{Parser, ReaderIterator, SliceIterator, TurtleParseError, TurtleSyntaxError};
9#[cfg(feature = "async-tokio")]
10use crate::trig::TokioAsyncWriterTriGSerializer;
11use crate::trig::{LowLevelTriGSerializer, TriGSerializer, WriterTriGSerializer};
12use crate::MIN_PARALLEL_CHUNK_SIZE;
13use oxiri::{Iri, IriParseError};
14use oxrdf::{GraphNameRef, Triple, TripleRef};
15use std::collections::hash_map::Iter;
16use std::collections::HashMap;
17use std::io::{self, Read, Write};
18#[cfg(feature = "async-tokio")]
19use tokio::io::{AsyncRead, AsyncWrite};
20
21/// A [Turtle](https://www.w3.org/TR/turtle/) streaming parser.
22///
23/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature and the [`TurtleParser::with_quoted_triples`] option.
24///
25/// Count the number of people:
26/// ```
27/// use oxrdf::vocab::rdf;
28/// use oxrdf::NamedNodeRef;
29/// use oxttl::TurtleParser;
30///
31/// let file = br#"@base <http://example.com/> .
32/// @prefix schema: <http://schema.org/> .
33/// <foo> a schema:Person ;
34///     schema:name "Foo" .
35/// <bar> a schema:Person ;
36///     schema:name "Bar" ."#;
37///
38/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
39/// let mut count = 0;
40/// for triple in TurtleParser::new().for_reader(file.as_ref()) {
41///     let triple = triple?;
42///     if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
43///         count += 1;
44///     }
45/// }
46/// assert_eq!(2, count);
47/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
48/// ```
49#[derive(Default, Clone)]
50#[must_use]
51pub struct TurtleParser {
52    unchecked: bool,
53    base: Option<Iri<String>>,
54    prefixes: HashMap<String, Iri<String>>,
55    #[cfg(feature = "rdf-star")]
56    with_quoted_triples: bool,
57}
58
59impl TurtleParser {
60    /// Builds a new [`TurtleParser`].
61    #[inline]
62    pub fn new() -> Self {
63        Self::default()
64    }
65
66    /// Assumes the file is valid to make parsing faster.
67    ///
68    /// It will skip some validations.
69    ///
70    /// Note that if the file is actually not valid, broken RDF might be emitted by the parser.
71    #[inline]
72    pub fn unchecked(mut self) -> Self {
73        self.unchecked = true;
74        self
75    }
76
77    #[inline]
78    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
79        self.base = Some(Iri::parse(base_iri.into())?);
80        Ok(self)
81    }
82
83    #[inline]
84    pub fn with_prefix(
85        mut self,
86        prefix_name: impl Into<String>,
87        prefix_iri: impl Into<String>,
88    ) -> Result<Self, IriParseError> {
89        self.prefixes
90            .insert(prefix_name.into(), Iri::parse(prefix_iri.into())?);
91        Ok(self)
92    }
93
94    /// Enables [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star).
95    #[cfg(feature = "rdf-star")]
96    #[inline]
97    pub fn with_quoted_triples(mut self) -> Self {
98        self.with_quoted_triples = true;
99        self
100    }
101
102    /// Parses a Turtle file from a [`Read`] implementation.
103    ///
104    /// Count the number of people:
105    /// ```
106    /// use oxrdf::vocab::rdf;
107    /// use oxrdf::NamedNodeRef;
108    /// use oxttl::TurtleParser;
109    ///
110    /// let file = br#"@base <http://example.com/> .
111    /// @prefix schema: <http://schema.org/> .
112    /// <foo> a schema:Person ;
113    ///     schema:name "Foo" .
114    /// <bar> a schema:Person ;
115    ///     schema:name "Bar" ."#;
116    ///
117    /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
118    /// let mut count = 0;
119    /// for triple in TurtleParser::new().for_reader(file.as_ref()) {
120    ///     let triple = triple?;
121    ///     if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
122    ///         count += 1;
123    ///     }
124    /// }
125    /// assert_eq!(2, count);
126    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
127    /// ```
128    pub fn for_reader<R: Read>(self, reader: R) -> ReaderTurtleParser<R> {
129        ReaderTurtleParser {
130            inner: self.low_level().parser.for_reader(reader),
131        }
132    }
133
134    /// Parses a Turtle file from a [`AsyncRead`] implementation.
135    ///
136    /// Count the number of people:
137    /// ```
138    /// # #[tokio::main(flavor = "current_thread")]
139    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
140    /// use oxrdf::vocab::rdf;
141    /// use oxrdf::NamedNodeRef;
142    /// use oxttl::TurtleParser;
143    ///
144    /// let file = br#"@base <http://example.com/> .
145    /// @prefix schema: <http://schema.org/> .
146    /// <foo> a schema:Person ;
147    ///     schema:name "Foo" .
148    /// <bar> a schema:Person ;
149    ///     schema:name "Bar" ."#;
150    ///
151    /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
152    /// let mut count = 0;
153    /// let mut parser = TurtleParser::new().for_tokio_async_reader(file.as_ref());
154    /// while let Some(triple) = parser.next().await {
155    ///     let triple = triple?;
156    ///     if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
157    ///         count += 1;
158    ///     }
159    /// }
160    /// assert_eq!(2, count);
161    /// # Ok(())
162    /// # }
163    /// ```
164    #[cfg(feature = "async-tokio")]
165    pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
166        self,
167        reader: R,
168    ) -> TokioAsyncReaderTurtleParser<R> {
169        TokioAsyncReaderTurtleParser {
170            inner: self.low_level().parser.for_tokio_async_reader(reader),
171        }
172    }
173
174    /// Parses Turtle file from a byte slice.
175    ///
176    /// Count the number of people:
177    /// ```
178    /// use oxrdf::vocab::rdf;
179    /// use oxrdf::NamedNodeRef;
180    /// use oxttl::TurtleParser;
181    ///
182    /// let file = br#"@base <http://example.com/> .
183    /// @prefix schema: <http://schema.org/> .
184    /// <foo> a schema:Person ;
185    ///     schema:name "Foo" .
186    /// <bar> a schema:Person ;
187    ///     schema:name "Bar" ."#;
188    ///
189    /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
190    /// let mut count = 0;
191    /// for triple in TurtleParser::new().for_slice(file) {
192    ///     let triple = triple?;
193    ///     if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
194    ///         count += 1;
195    ///     }
196    /// }
197    /// assert_eq!(2, count);
198    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
199    /// ```
200    pub fn for_slice(self, slice: &[u8]) -> SliceTurtleParser<'_> {
201        SliceTurtleParser {
202            inner: TriGRecognizer::new_parser(
203                slice,
204                true,
205                false,
206                #[cfg(feature = "rdf-star")]
207                self.with_quoted_triples,
208                self.unchecked,
209                self.base,
210                self.prefixes,
211            )
212            .into_iter(),
213        }
214    }
215
216    /// Creates a vector of iterators that may be used to parse a Turtle document slice in parallel.
217    /// To dynamically specify target_parallelism, use e.g. [`std::thread::available_parallelism`].
218    /// Intended to work on large documents.
219    /// Can fail or return wrong results if there are prefixes or base iris that are not defined
220    /// at the top of the document, or valid turtle syntax inside literal values.
221    ///
222    /// Count the number of people:
223    /// ```
224    /// use oxrdf::vocab::rdf;
225    /// use oxrdf::NamedNodeRef;
226    /// use oxttl::TurtleParser;
227    /// use rayon::iter::{IntoParallelIterator, ParallelIterator};
228    ///
229    /// let file = br#"@base <http://example.com/> .
230    /// @prefix schema: <http://schema.org/> .
231    /// <foo> a schema:Person ;
232    ///     schema:name "Foo" .
233    /// <bar> a schema:Person ;
234    ///     schema:name "Bar" ."#;
235    ///
236    /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
237    /// let readers = TurtleParser::new().split_slice_for_parallel_parsing(file.as_ref(), 2);
238    /// let count = readers
239    ///     .into_par_iter()
240    ///     .map(|reader| {
241    ///         let mut count = 0;
242    ///         for triple in reader {
243    ///             let triple = triple.unwrap();
244    ///             if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
245    ///                 count += 1;
246    ///             }
247    ///         }
248    ///         count
249    ///     })
250    ///     .sum();
251    /// assert_eq!(2, count);
252    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
253    /// ```
254    pub fn split_slice_for_parallel_parsing(
255        mut self,
256        slice: &[u8],
257        target_parallelism: usize,
258    ) -> Vec<SliceTurtleParser<'_>> {
259        let n_chunks = (slice.len() / MIN_PARALLEL_CHUNK_SIZE).clamp(1, target_parallelism);
260
261        if n_chunks > 1 {
262            // Prefixes must be determined before chunks, since determining chunks relies on parser with prefixes determined.
263            let mut from_slice_parser = self.clone().for_slice(slice);
264            // We don't care about errors: they will be raised when parsing the first chunk anyway
265            from_slice_parser.next();
266            for (p, iri) in from_slice_parser.prefixes() {
267                // Already know this is a valid IRI
268                self = self.with_prefix(p, iri).unwrap();
269            }
270        }
271
272        get_turtle_file_chunks(slice, n_chunks, &self)
273            .into_iter()
274            .map(|(start, end)| self.clone().for_slice(&slice[start..end]))
275            .collect()
276    }
277
278    /// Allows to parse a Turtle file by using a low-level API.
279    ///
280    /// Count the number of people:
281    /// ```
282    /// use oxrdf::vocab::rdf;
283    /// use oxrdf::NamedNodeRef;
284    /// use oxttl::TurtleParser;
285    ///
286    /// let file: [&[u8]; 5] = [
287    ///     b"@base <http://example.com/>",
288    ///     b". @prefix schema: <http://schema.org/> .",
289    ///     b"<foo> a schema:Person",
290    ///     b" ; schema:name \"Foo\" . <bar>",
291    ///     b" a schema:Person ; schema:name \"Bar\" .",
292    /// ];
293    ///
294    /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
295    /// let mut count = 0;
296    /// let mut parser = TurtleParser::new().low_level();
297    /// let mut file_chunks = file.iter();
298    /// while !parser.is_end() {
299    ///     // We feed more data to the parser
300    ///     if let Some(chunk) = file_chunks.next() {
301    ///         parser.extend_from_slice(chunk);
302    ///     } else {
303    ///         parser.end(); // It's finished
304    ///     }
305    ///     // We read as many triples from the parser as possible
306    ///     while let Some(triple) = parser.parse_next() {
307    ///         let triple = triple?;
308    ///         if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
309    ///             count += 1;
310    ///         }
311    ///     }
312    /// }
313    /// assert_eq!(2, count);
314    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
315    /// ```
316    pub fn low_level(self) -> LowLevelTurtleParser {
317        LowLevelTurtleParser {
318            parser: TriGRecognizer::new_parser(
319                Vec::new(),
320                false,
321                false,
322                #[cfg(feature = "rdf-star")]
323                self.with_quoted_triples,
324                self.unchecked,
325                self.base,
326                self.prefixes,
327            ),
328        }
329    }
330}
331
332/// Parses a Turtle file from a [`Read`] implementation.
333///
334/// Can be built using [`TurtleParser::for_reader`].
335///
336/// Count the number of people:
337/// ```
338/// use oxrdf::vocab::rdf;
339/// use oxrdf::NamedNodeRef;
340/// use oxttl::TurtleParser;
341///
342/// let file = br#"@base <http://example.com/> .
343/// @prefix schema: <http://schema.org/> .
344/// <foo> a schema:Person ;
345///     schema:name "Foo" .
346/// <bar> a schema:Person ;
347///     schema:name "Bar" ."#;
348///
349/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
350/// let mut count = 0;
351/// for triple in TurtleParser::new().for_reader(file.as_ref()) {
352///     let triple = triple?;
353///     if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
354///         count += 1;
355///     }
356/// }
357/// assert_eq!(2, count);
358/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
359/// ```
360#[must_use]
361pub struct ReaderTurtleParser<R: Read> {
362    inner: ReaderIterator<R, TriGRecognizer>,
363}
364
365impl<R: Read> ReaderTurtleParser<R> {
366    /// The list of IRI prefixes considered at the current step of the parsing.
367    ///
368    /// This method returns (prefix name, prefix value) tuples.
369    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
370    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
371    ///
372    /// ```
373    /// use oxttl::TurtleParser;
374    ///
375    /// let file = br#"@base <http://example.com/> .
376    /// @prefix schema: <http://schema.org/> .
377    /// <foo> a schema:Person ;
378    ///     schema:name "Foo" ."#;
379    ///
380    /// let mut parser = TurtleParser::new().for_reader(file.as_ref());
381    /// assert!(parser.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
382    ///
383    /// parser.next().unwrap()?; // We read the first triple
384    /// assert_eq!(
385    ///     parser.prefixes().collect::<Vec<_>>(),
386    ///     [("schema", "http://schema.org/")]
387    /// ); // There are now prefixes
388    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
389    /// ```
390    pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
391        TurtlePrefixesIter {
392            inner: self.inner.parser.context.prefixes(),
393        }
394    }
395
396    /// The base IRI considered at the current step of the parsing.
397    ///
398    /// ```
399    /// use oxttl::TurtleParser;
400    ///
401    /// let file = br#"@base <http://example.com/> .
402    /// @prefix schema: <http://schema.org/> .
403    /// <foo> a schema:Person ;
404    ///     schema:name "Foo" ."#;
405    ///
406    /// let mut parser = TurtleParser::new().for_reader(file.as_ref());
407    /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
408    ///
409    /// parser.next().unwrap()?; // We read the first triple
410    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
411    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
412    /// ```
413    pub fn base_iri(&self) -> Option<&str> {
414        self.inner
415            .parser
416            .context
417            .lexer_options
418            .base_iri
419            .as_ref()
420            .map(Iri::as_str)
421    }
422}
423
424impl<R: Read> Iterator for ReaderTurtleParser<R> {
425    type Item = Result<Triple, TurtleParseError>;
426
427    fn next(&mut self) -> Option<Self::Item> {
428        Some(self.inner.next()?.map(Into::into))
429    }
430}
431
432/// Parses a Turtle file from a [`AsyncRead`] implementation.
433///
434/// Can be built using [`TurtleParser::for_tokio_async_reader`].
435///
436/// Count the number of people:
437/// ```
438/// # #[tokio::main(flavor = "current_thread")]
439/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
440/// use oxrdf::vocab::rdf;
441/// use oxrdf::NamedNodeRef;
442/// use oxttl::TurtleParser;
443///
444/// let file = br#"@base <http://example.com/> .
445/// @prefix schema: <http://schema.org/> .
446/// <foo> a schema:Person ;
447///     schema:name "Foo" .
448/// <bar> a schema:Person ;
449///     schema:name "Bar" ."#;
450///
451/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
452/// let mut count = 0;
453/// let mut parser = TurtleParser::new().for_tokio_async_reader(file.as_ref());
454/// while let Some(triple) = parser.next().await {
455///     let triple = triple?;
456///     if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
457///         count += 1;
458///     }
459/// }
460/// assert_eq!(2, count);
461/// # Ok(())
462/// # }
463/// ```
464#[cfg(feature = "async-tokio")]
465#[must_use]
466pub struct TokioAsyncReaderTurtleParser<R: AsyncRead + Unpin> {
467    inner: TokioAsyncReaderIterator<R, TriGRecognizer>,
468}
469
470#[cfg(feature = "async-tokio")]
471impl<R: AsyncRead + Unpin> TokioAsyncReaderTurtleParser<R> {
472    /// Reads the next triple or returns `None` if the file is finished.
473    pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> {
474        Some(self.inner.next().await?.map(Into::into))
475    }
476
477    /// The list of IRI prefixes considered at the current step of the parsing.
478    ///
479    /// This method returns (prefix name, prefix value) tuples.
480    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
481    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
482    ///
483    /// ```
484    /// # #[tokio::main(flavor = "current_thread")]
485    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
486    /// use oxttl::TurtleParser;
487    ///
488    /// let file = br#"@base <http://example.com/> .
489    /// @prefix schema: <http://schema.org/> .
490    /// <foo> a schema:Person ;
491    ///     schema:name "Foo" ."#;
492    ///
493    /// let mut parser = TurtleParser::new().for_tokio_async_reader(file.as_ref());
494    /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
495    ///
496    /// parser.next().await.unwrap()?; // We read the first triple
497    /// assert_eq!(
498    ///     parser.prefixes().collect::<Vec<_>>(),
499    ///     [("schema", "http://schema.org/")]
500    /// ); // There are now prefixes
501    /// # Ok(())
502    /// # }
503    /// ```
504    pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
505        TurtlePrefixesIter {
506            inner: self.inner.parser.context.prefixes(),
507        }
508    }
509
510    /// The base IRI considered at the current step of the parsing.
511    ///
512    /// ```
513    /// # #[tokio::main(flavor = "current_thread")]
514    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
515    /// use oxttl::TurtleParser;
516    ///
517    /// let file = br#"@base <http://example.com/> .
518    /// @prefix schema: <http://schema.org/> .
519    /// <foo> a schema:Person ;
520    ///     schema:name "Foo" ."#;
521    ///
522    /// let mut parser = TurtleParser::new().for_tokio_async_reader(file.as_ref());
523    /// assert!(parser.base_iri().is_none()); // No base IRI at the beginning
524    ///
525    /// parser.next().await.unwrap()?; // We read the first triple
526    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI
527    /// # Ok(())
528    /// # }
529    /// ```
530    pub fn base_iri(&self) -> Option<&str> {
531        self.inner
532            .parser
533            .context
534            .lexer_options
535            .base_iri
536            .as_ref()
537            .map(Iri::as_str)
538    }
539}
540
541/// Parses a Turtle file from a byte slice.
542///
543/// Can be built using [`TurtleParser::for_slice`].
544///
545/// Count the number of people:
546/// ```
547/// use oxrdf::vocab::rdf;
548/// use oxrdf::NamedNodeRef;
549/// use oxttl::TurtleParser;
550///
551/// let file = br#"@base <http://example.com/> .
552/// @prefix schema: <http://schema.org/> .
553/// <foo> a schema:Person ;
554///     schema:name "Foo" .
555/// <bar> a schema:Person ;
556///     schema:name "Bar" ."#;
557///
558/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
559/// let mut count = 0;
560/// for triple in TurtleParser::new().for_slice(file) {
561///     let triple = triple?;
562///     if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
563///         count += 1;
564///     }
565/// }
566/// assert_eq!(2, count);
567/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
568/// ```
569#[must_use]
570pub struct SliceTurtleParser<'a> {
571    inner: SliceIterator<'a, TriGRecognizer>,
572}
573
574impl SliceTurtleParser<'_> {
575    /// The list of IRI prefixes considered at the current step of the parsing.
576    ///
577    /// This method returns (prefix name, prefix value) tuples.
578    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
579    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
580    ///
581    /// ```
582    /// use oxttl::TurtleParser;
583    ///
584    /// let file = br#"@base <http://example.com/> .
585    /// @prefix schema: <http://schema.org/> .
586    /// <foo> a schema:Person ;
587    ///     schema:name "Foo" ."#;
588    ///
589    /// let mut parser = TurtleParser::new().for_slice(file);
590    /// assert!(parser.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
591    ///
592    /// parser.next().unwrap()?; // We read the first triple
593    /// assert_eq!(
594    ///     parser.prefixes().collect::<Vec<_>>(),
595    ///     [("schema", "http://schema.org/")]
596    /// ); // There are now prefixes
597    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
598    /// ```
599    pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
600        TurtlePrefixesIter {
601            inner: self.inner.parser.context.prefixes(),
602        }
603    }
604
605    /// The base IRI considered at the current step of the parsing.
606    ///
607    /// ```
608    /// use oxttl::TurtleParser;
609    ///
610    /// let file = br#"@base <http://example.com/> .
611    /// @prefix schema: <http://schema.org/> .
612    /// <foo> a schema:Person ;
613    ///     schema:name "Foo" ."#;
614    ///
615    /// let mut parser = TurtleParser::new().for_slice(file);
616    /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
617    ///
618    /// parser.next().unwrap()?; // We read the first triple
619    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
620    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
621    /// ```
622    pub fn base_iri(&self) -> Option<&str> {
623        self.inner
624            .parser
625            .context
626            .lexer_options
627            .base_iri
628            .as_ref()
629            .map(Iri::as_str)
630    }
631}
632
633impl Iterator for SliceTurtleParser<'_> {
634    type Item = Result<Triple, TurtleSyntaxError>;
635
636    fn next(&mut self) -> Option<Self::Item> {
637        Some(self.inner.next()?.map(Into::into))
638    }
639}
640
641/// Parses a Turtle file by using a low-level API.
642///
643/// Can be built using [`TurtleParser::low_level`].
644///
645/// Count the number of people:
646/// ```
647/// use oxrdf::vocab::rdf;
648/// use oxrdf::NamedNodeRef;
649/// use oxttl::TurtleParser;
650///
651/// let file: [&[u8]; 5] = [
652///     b"@base <http://example.com/>",
653///     b". @prefix schema: <http://schema.org/> .",
654///     b"<foo> a schema:Person",
655///     b" ; schema:name \"Foo\" . <bar>",
656///     b" a schema:Person ; schema:name \"Bar\" .",
657/// ];
658///
659/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
660/// let mut count = 0;
661/// let mut parser = TurtleParser::new().low_level();
662/// let mut file_chunks = file.iter();
663/// while !parser.is_end() {
664///     // We feed more data to the parser
665///     if let Some(chunk) = file_chunks.next() {
666///         parser.extend_from_slice(chunk);
667///     } else {
668///         parser.end(); // It's finished
669///     }
670///     // We read as many triples from the parser as possible
671///     while let Some(triple) = parser.parse_next() {
672///         let triple = triple?;
673///         if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
674///             count += 1;
675///         }
676///     }
677/// }
678/// assert_eq!(2, count);
679/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
680/// ```
681pub struct LowLevelTurtleParser {
682    parser: Parser<Vec<u8>, TriGRecognizer>,
683}
684
685impl LowLevelTurtleParser {
686    /// Adds some extra bytes to the parser. Should be called when [`parse_next`](Self::parse_next) returns [`None`] and there is still unread data.
687    pub fn extend_from_slice(&mut self, other: &[u8]) {
688        self.parser.extend_from_slice(other)
689    }
690
691    /// Tell the parser that the file is finished.
692    ///
693    /// This triggers the parsing of the final bytes and might lead [`parse_next`](Self::parse_next) to return some extra values.
694    pub fn end(&mut self) {
695        self.parser.end()
696    }
697
698    /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`parse_next`](Self::parse_next) is always going to return `None`.
699    pub fn is_end(&self) -> bool {
700        self.parser.is_end()
701    }
702
703    /// Attempt to parse a new triple from the already provided data.
704    ///
705    /// Returns [`None`] if the parsing is finished or more data is required.
706    /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
707    pub fn parse_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> {
708        Some(self.parser.parse_next()?.map(Into::into))
709    }
710
711    /// The list of IRI prefixes considered at the current step of the parsing.
712    ///
713    /// This method returns (prefix name, prefix value) tuples.
714    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
715    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
716    ///
717    /// ```
718    /// use oxttl::TurtleParser;
719    ///
720    /// let file = br#"@base <http://example.com/> .
721    /// @prefix schema: <http://schema.org/> .
722    /// <foo> a schema:Person ;
723    ///     schema:name "Foo" ."#;
724    ///
725    /// let mut parser = TurtleParser::new().low_level();
726    /// parser.extend_from_slice(file);
727    /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
728    ///
729    /// parser.parse_next().unwrap()?; // We read the first triple
730    /// assert_eq!(
731    ///     parser.prefixes().collect::<Vec<_>>(),
732    ///     [("schema", "http://schema.org/")]
733    /// ); // There are now prefixes
734    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
735    /// ```
736    pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
737        TurtlePrefixesIter {
738            inner: self.parser.context.prefixes(),
739        }
740    }
741
742    /// The base IRI considered at the current step of the parsing.
743    ///
744    /// ```
745    /// use oxttl::TurtleParser;
746    ///
747    /// let file = br#"@base <http://example.com/> .
748    /// @prefix schema: <http://schema.org/> .
749    /// <foo> a schema:Person ;
750    ///     schema:name "Foo" ."#;
751    ///
752    /// let mut parser = TurtleParser::new().low_level();
753    /// parser.extend_from_slice(file);
754    /// assert!(parser.base_iri().is_none()); // No base IRI at the beginning
755    ///
756    /// parser.parse_next().unwrap()?; // We read the first triple
757    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI
758    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
759    /// ```
760    pub fn base_iri(&self) -> Option<&str> {
761        self.parser
762            .context
763            .lexer_options
764            .base_iri
765            .as_ref()
766            .map(Iri::as_str)
767    }
768}
769
770/// Iterator on the file prefixes.
771///
772/// See [`LowLevelTurtleParser::prefixes`].
773pub struct TurtlePrefixesIter<'a> {
774    inner: Iter<'a, String, Iri<String>>,
775}
776
777impl<'a> Iterator for TurtlePrefixesIter<'a> {
778    type Item = (&'a str, &'a str);
779
780    #[inline]
781    fn next(&mut self) -> Option<Self::Item> {
782        let (key, value) = self.inner.next()?;
783        Some((key.as_str(), value.as_str()))
784    }
785
786    #[inline]
787    fn size_hint(&self) -> (usize, Option<usize>) {
788        self.inner.size_hint()
789    }
790}
791
792/// A [Turtle](https://www.w3.org/TR/turtle/) serializer.
793///
794/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature.
795///
796/// ```
797/// use oxrdf::vocab::rdf;
798/// use oxrdf::{NamedNodeRef, TripleRef};
799/// use oxttl::TurtleSerializer;
800///
801/// let mut serializer = TurtleSerializer::new()
802///     .with_prefix("schema", "http://schema.org/")?
803///     .for_writer(Vec::new());
804/// serializer.serialize_triple(TripleRef::new(
805///     NamedNodeRef::new("http://example.com#me")?,
806///     rdf::TYPE,
807///     NamedNodeRef::new("http://schema.org/Person")?,
808/// ))?;
809/// assert_eq!(
810///     b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
811///     serializer.finish()?.as_slice()
812/// );
813/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
814/// ```
815#[derive(Default, Clone)]
816#[must_use]
817pub struct TurtleSerializer {
818    inner: TriGSerializer,
819}
820
821impl TurtleSerializer {
822    /// Builds a new [`TurtleSerializer`].
823    #[inline]
824    pub fn new() -> Self {
825        Self::default()
826    }
827
828    #[inline]
829    pub fn with_prefix(
830        mut self,
831        prefix_name: impl Into<String>,
832        prefix_iri: impl Into<String>,
833    ) -> Result<Self, IriParseError> {
834        self.inner = self.inner.with_prefix(prefix_name, prefix_iri)?;
835        Ok(self)
836    }
837
838    /// Adds a base IRI to the serialization.
839    ///
840    /// ```
841    /// use oxrdf::vocab::rdf;
842    /// use oxrdf::{NamedNodeRef, TripleRef};
843    /// use oxttl::TurtleSerializer;
844    ///
845    /// let mut serializer = TurtleSerializer::new()
846    ///     .with_base_iri("http://example.com")?
847    ///     .with_prefix("ex", "http://example.com/ns#")?
848    ///     .for_writer(Vec::new());
849    /// serializer.serialize_triple(TripleRef::new(
850    ///     NamedNodeRef::new("http://example.com/me")?,
851    ///     rdf::TYPE,
852    ///     NamedNodeRef::new("http://example.com/ns#Person")?,
853    /// ))?;
854    /// assert_eq!(
855    ///     b"@base <http://example.com> .\n@prefix ex: </ns#> .\n</me> a ex:Person .\n",
856    ///     serializer.finish()?.as_slice()
857    /// );
858    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
859    /// ```
860    #[inline]
861    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
862        self.inner = self.inner.with_base_iri(base_iri)?;
863        Ok(self)
864    }
865
866    /// Writes a Turtle file to a [`Write`] implementation.
867    ///
868    /// ```
869    /// use oxrdf::vocab::rdf;
870    /// use oxrdf::{NamedNodeRef, TripleRef};
871    /// use oxttl::TurtleSerializer;
872    ///
873    /// let mut serializer = TurtleSerializer::new()
874    ///     .with_prefix("schema", "http://schema.org/")?
875    ///     .for_writer(Vec::new());
876    /// serializer.serialize_triple(TripleRef::new(
877    ///     NamedNodeRef::new("http://example.com#me")?,
878    ///     rdf::TYPE,
879    ///     NamedNodeRef::new("http://schema.org/Person")?,
880    /// ))?;
881    /// assert_eq!(
882    ///     b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
883    ///     serializer.finish()?.as_slice()
884    /// );
885    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
886    /// ```
887    pub fn for_writer<W: Write>(self, writer: W) -> WriterTurtleSerializer<W> {
888        WriterTurtleSerializer {
889            inner: self.inner.for_writer(writer),
890        }
891    }
892
893    /// Writes a Turtle file to a [`AsyncWrite`] implementation.
894    ///
895    /// ```
896    /// # #[tokio::main(flavor = "current_thread")]
897    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
898    /// use oxrdf::vocab::rdf;
899    /// use oxrdf::{NamedNodeRef, TripleRef};
900    /// use oxttl::TurtleSerializer;
901    ///
902    /// let mut serializer = TurtleSerializer::new()
903    ///     .with_prefix("schema", "http://schema.org/")?
904    ///     .for_tokio_async_writer(Vec::new());
905    /// serializer
906    ///     .serialize_triple(TripleRef::new(
907    ///         NamedNodeRef::new("http://example.com#me")?,
908    ///         rdf::TYPE,
909    ///         NamedNodeRef::new("http://schema.org/Person")?,
910    ///     ))
911    ///     .await?;
912    /// assert_eq!(
913    ///     b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
914    ///     serializer.finish().await?.as_slice()
915    /// );
916    /// # Ok(())
917    /// # }
918    /// ```
919    #[cfg(feature = "async-tokio")]
920    pub fn for_tokio_async_writer<W: AsyncWrite + Unpin>(
921        self,
922        writer: W,
923    ) -> TokioAsyncWriterTurtleSerializer<W> {
924        TokioAsyncWriterTurtleSerializer {
925            inner: self.inner.for_tokio_async_writer(writer),
926        }
927    }
928
929    /// Builds a low-level Turtle writer.
930    ///
931    /// ```
932    /// use oxrdf::vocab::rdf;
933    /// use oxrdf::{NamedNodeRef, TripleRef};
934    /// use oxttl::TurtleSerializer;
935    ///
936    /// let mut buf = Vec::new();
937    /// let mut serializer = TurtleSerializer::new()
938    ///     .with_prefix("schema", "http://schema.org/")?
939    ///     .low_level();
940    /// serializer.serialize_triple(
941    ///     TripleRef::new(
942    ///         NamedNodeRef::new("http://example.com#me")?,
943    ///         rdf::TYPE,
944    ///         NamedNodeRef::new("http://schema.org/Person")?,
945    ///     ),
946    ///     &mut buf,
947    /// )?;
948    /// serializer.finish(&mut buf)?;
949    /// assert_eq!(
950    ///     b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
951    ///     buf.as_slice()
952    /// );
953    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
954    /// ```
955    pub fn low_level(self) -> LowLevelTurtleSerializer {
956        LowLevelTurtleSerializer {
957            inner: self.inner.low_level(),
958        }
959    }
960}
961
962/// Writes a Turtle file to a [`Write`] implementation.
963///
964/// Can be built using [`TurtleSerializer::for_writer`].
965///
966/// ```
967/// use oxrdf::vocab::rdf;
968/// use oxrdf::{NamedNodeRef, TripleRef};
969/// use oxttl::TurtleSerializer;
970///
971/// let mut serializer = TurtleSerializer::new()
972///     .with_prefix("schema", "http://schema.org/")?
973///     .for_writer(Vec::new());
974/// serializer.serialize_triple(TripleRef::new(
975///     NamedNodeRef::new("http://example.com#me")?,
976///     rdf::TYPE,
977///     NamedNodeRef::new("http://schema.org/Person")?,
978/// ))?;
979/// assert_eq!(
980///     b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
981///     serializer.finish()?.as_slice()
982/// );
983/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
984/// ```
985#[must_use]
986pub struct WriterTurtleSerializer<W: Write> {
987    inner: WriterTriGSerializer<W>,
988}
989
990impl<W: Write> WriterTurtleSerializer<W> {
991    /// Writes an extra triple.
992    pub fn serialize_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
993        self.inner
994            .serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
995    }
996
997    /// Ends the write process and returns the underlying [`Write`].
998    pub fn finish(self) -> io::Result<W> {
999        self.inner.finish()
1000    }
1001}
1002
1003/// Writes a Turtle file to a [`AsyncWrite`] implementation.
1004///
1005/// Can be built using [`TurtleSerializer::for_tokio_async_writer`].
1006///
1007/// ```
1008/// # #[tokio::main(flavor = "current_thread")]
1009/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
1010/// use oxrdf::vocab::rdf;
1011/// use oxrdf::{NamedNodeRef, TripleRef};
1012/// use oxttl::TurtleSerializer;
1013///
1014/// let mut serializer = TurtleSerializer::new()
1015///     .with_prefix("schema", "http://schema.org/")?
1016///     .for_tokio_async_writer(Vec::new());
1017/// serializer
1018///     .serialize_triple(TripleRef::new(
1019///         NamedNodeRef::new("http://example.com#me")?,
1020///         rdf::TYPE,
1021///         NamedNodeRef::new("http://schema.org/Person")?,
1022///     ))
1023///     .await?;
1024/// assert_eq!(
1025///     b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
1026///     serializer.finish().await?.as_slice()
1027/// );
1028/// # Ok(())
1029/// # }
1030/// ```
1031#[cfg(feature = "async-tokio")]
1032#[must_use]
1033pub struct TokioAsyncWriterTurtleSerializer<W: AsyncWrite + Unpin> {
1034    inner: TokioAsyncWriterTriGSerializer<W>,
1035}
1036
1037#[cfg(feature = "async-tokio")]
1038impl<W: AsyncWrite + Unpin> TokioAsyncWriterTurtleSerializer<W> {
1039    /// Writes an extra triple.
1040    pub async fn serialize_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
1041        self.inner
1042            .serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
1043            .await
1044    }
1045
1046    /// Ends the write process and returns the underlying [`Write`].
1047    pub async fn finish(self) -> io::Result<W> {
1048        self.inner.finish().await
1049    }
1050}
1051
1052/// Writes a Turtle file by using a low-level API.
1053///
1054/// Can be built using [`TurtleSerializer::low_level`].
1055///
1056/// ```
1057/// use oxrdf::vocab::rdf;
1058/// use oxrdf::{NamedNodeRef, TripleRef};
1059/// use oxttl::TurtleSerializer;
1060///
1061/// let mut buf = Vec::new();
1062/// let mut serializer = TurtleSerializer::new()
1063///     .with_prefix("schema", "http://schema.org/")?
1064///     .low_level();
1065/// serializer.serialize_triple(
1066///     TripleRef::new(
1067///         NamedNodeRef::new("http://example.com#me")?,
1068///         rdf::TYPE,
1069///         NamedNodeRef::new("http://schema.org/Person")?,
1070///     ),
1071///     &mut buf,
1072/// )?;
1073/// serializer.finish(&mut buf)?;
1074/// assert_eq!(
1075///     b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
1076///     buf.as_slice()
1077/// );
1078/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
1079/// ```
1080pub struct LowLevelTurtleSerializer {
1081    inner: LowLevelTriGSerializer,
1082}
1083
1084impl LowLevelTurtleSerializer {
1085    /// Writes an extra triple.
1086    pub fn serialize_triple<'a>(
1087        &mut self,
1088        t: impl Into<TripleRef<'a>>,
1089        writer: impl Write,
1090    ) -> io::Result<()> {
1091        self.inner
1092            .serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph), writer)
1093    }
1094
1095    /// Finishes to write the file.
1096    pub fn finish(&mut self, writer: impl Write) -> io::Result<()> {
1097        self.inner.finish(writer)
1098    }
1099}
1100
1101#[cfg(test)]
1102#[allow(clippy::panic_in_result_fn)]
1103mod tests {
1104    use super::*;
1105    use oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef};
1106
1107    #[test]
1108    fn test_write() -> io::Result<()> {
1109        let mut serializer = TurtleSerializer::new().for_writer(Vec::new());
1110        serializer.serialize_triple(TripleRef::new(
1111            NamedNodeRef::new_unchecked("http://example.com/s"),
1112            NamedNodeRef::new_unchecked("http://example.com/p"),
1113            NamedNodeRef::new_unchecked("http://example.com/o"),
1114        ))?;
1115        serializer.serialize_triple(TripleRef::new(
1116            NamedNodeRef::new_unchecked("http://example.com/s"),
1117            NamedNodeRef::new_unchecked("http://example.com/p"),
1118            LiteralRef::new_simple_literal("foo"),
1119        ))?;
1120        serializer.serialize_triple(TripleRef::new(
1121            NamedNodeRef::new_unchecked("http://example.com/s"),
1122            NamedNodeRef::new_unchecked("http://example.com/p2"),
1123            LiteralRef::new_language_tagged_literal_unchecked("foo", "en"),
1124        ))?;
1125        serializer.serialize_triple(TripleRef::new(
1126            BlankNodeRef::new_unchecked("b"),
1127            NamedNodeRef::new_unchecked("http://example.com/p2"),
1128            BlankNodeRef::new_unchecked("b2"),
1129        ))?;
1130        assert_eq!(String::from_utf8(serializer.finish()?).unwrap(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t<http://example.com/p2> \"foo\"@en .\n_:b <http://example.com/p2> _:b2 .\n");
1131        Ok(())
1132    }
1133}