oxttl/turtle.rs
1//! A [Turtle](https://www.w3.org/TR/turtle/) streaming parser implemented by [`TurtleParser`]
2//! and a serializer implemented by [`TurtleSerializer`].
3
4use crate::chunker::get_turtle_file_chunks;
5use crate::terse::TriGRecognizer;
6#[cfg(feature = "async-tokio")]
7use crate::toolkit::TokioAsyncReaderIterator;
8use crate::toolkit::{Parser, ReaderIterator, SliceIterator, TurtleParseError, TurtleSyntaxError};
9#[cfg(feature = "async-tokio")]
10use crate::trig::TokioAsyncWriterTriGSerializer;
11use crate::trig::{LowLevelTriGSerializer, TriGSerializer, WriterTriGSerializer};
12use crate::MIN_PARALLEL_CHUNK_SIZE;
13use oxiri::{Iri, IriParseError};
14use oxrdf::{GraphNameRef, Triple, TripleRef};
15use std::collections::hash_map::Iter;
16use std::collections::HashMap;
17use std::io::{self, Read, Write};
18#[cfg(feature = "async-tokio")]
19use tokio::io::{AsyncRead, AsyncWrite};
20
21/// A [Turtle](https://www.w3.org/TR/turtle/) streaming parser.
22///
23/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature and the [`TurtleParser::with_quoted_triples`] option.
24///
25/// Count the number of people:
26/// ```
27/// use oxrdf::vocab::rdf;
28/// use oxrdf::NamedNodeRef;
29/// use oxttl::TurtleParser;
30///
31/// let file = br#"@base <http://example.com/> .
32/// @prefix schema: <http://schema.org/> .
33/// <foo> a schema:Person ;
34/// schema:name "Foo" .
35/// <bar> a schema:Person ;
36/// schema:name "Bar" ."#;
37///
38/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
39/// let mut count = 0;
40/// for triple in TurtleParser::new().for_reader(file.as_ref()) {
41/// let triple = triple?;
42/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
43/// count += 1;
44/// }
45/// }
46/// assert_eq!(2, count);
47/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
48/// ```
49#[derive(Default, Clone)]
50#[must_use]
51pub struct TurtleParser {
52 unchecked: bool,
53 base: Option<Iri<String>>,
54 prefixes: HashMap<String, Iri<String>>,
55 #[cfg(feature = "rdf-star")]
56 with_quoted_triples: bool,
57}
58
59impl TurtleParser {
60 /// Builds a new [`TurtleParser`].
61 #[inline]
62 pub fn new() -> Self {
63 Self::default()
64 }
65
66 /// Assumes the file is valid to make parsing faster.
67 ///
68 /// It will skip some validations.
69 ///
70 /// Note that if the file is actually not valid, broken RDF might be emitted by the parser.
71 #[inline]
72 pub fn unchecked(mut self) -> Self {
73 self.unchecked = true;
74 self
75 }
76
77 #[inline]
78 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
79 self.base = Some(Iri::parse(base_iri.into())?);
80 Ok(self)
81 }
82
83 #[inline]
84 pub fn with_prefix(
85 mut self,
86 prefix_name: impl Into<String>,
87 prefix_iri: impl Into<String>,
88 ) -> Result<Self, IriParseError> {
89 self.prefixes
90 .insert(prefix_name.into(), Iri::parse(prefix_iri.into())?);
91 Ok(self)
92 }
93
94 /// Enables [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star).
95 #[cfg(feature = "rdf-star")]
96 #[inline]
97 pub fn with_quoted_triples(mut self) -> Self {
98 self.with_quoted_triples = true;
99 self
100 }
101
102 /// Parses a Turtle file from a [`Read`] implementation.
103 ///
104 /// Count the number of people:
105 /// ```
106 /// use oxrdf::vocab::rdf;
107 /// use oxrdf::NamedNodeRef;
108 /// use oxttl::TurtleParser;
109 ///
110 /// let file = br#"@base <http://example.com/> .
111 /// @prefix schema: <http://schema.org/> .
112 /// <foo> a schema:Person ;
113 /// schema:name "Foo" .
114 /// <bar> a schema:Person ;
115 /// schema:name "Bar" ."#;
116 ///
117 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
118 /// let mut count = 0;
119 /// for triple in TurtleParser::new().for_reader(file.as_ref()) {
120 /// let triple = triple?;
121 /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
122 /// count += 1;
123 /// }
124 /// }
125 /// assert_eq!(2, count);
126 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
127 /// ```
128 pub fn for_reader<R: Read>(self, reader: R) -> ReaderTurtleParser<R> {
129 ReaderTurtleParser {
130 inner: self.low_level().parser.for_reader(reader),
131 }
132 }
133
134 /// Parses a Turtle file from a [`AsyncRead`] implementation.
135 ///
136 /// Count the number of people:
137 /// ```
138 /// # #[tokio::main(flavor = "current_thread")]
139 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
140 /// use oxrdf::vocab::rdf;
141 /// use oxrdf::NamedNodeRef;
142 /// use oxttl::TurtleParser;
143 ///
144 /// let file = br#"@base <http://example.com/> .
145 /// @prefix schema: <http://schema.org/> .
146 /// <foo> a schema:Person ;
147 /// schema:name "Foo" .
148 /// <bar> a schema:Person ;
149 /// schema:name "Bar" ."#;
150 ///
151 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
152 /// let mut count = 0;
153 /// let mut parser = TurtleParser::new().for_tokio_async_reader(file.as_ref());
154 /// while let Some(triple) = parser.next().await {
155 /// let triple = triple?;
156 /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
157 /// count += 1;
158 /// }
159 /// }
160 /// assert_eq!(2, count);
161 /// # Ok(())
162 /// # }
163 /// ```
164 #[cfg(feature = "async-tokio")]
165 pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
166 self,
167 reader: R,
168 ) -> TokioAsyncReaderTurtleParser<R> {
169 TokioAsyncReaderTurtleParser {
170 inner: self.low_level().parser.for_tokio_async_reader(reader),
171 }
172 }
173
174 /// Parses Turtle file from a byte slice.
175 ///
176 /// Count the number of people:
177 /// ```
178 /// use oxrdf::vocab::rdf;
179 /// use oxrdf::NamedNodeRef;
180 /// use oxttl::TurtleParser;
181 ///
182 /// let file = br#"@base <http://example.com/> .
183 /// @prefix schema: <http://schema.org/> .
184 /// <foo> a schema:Person ;
185 /// schema:name "Foo" .
186 /// <bar> a schema:Person ;
187 /// schema:name "Bar" ."#;
188 ///
189 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
190 /// let mut count = 0;
191 /// for triple in TurtleParser::new().for_slice(file) {
192 /// let triple = triple?;
193 /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
194 /// count += 1;
195 /// }
196 /// }
197 /// assert_eq!(2, count);
198 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
199 /// ```
200 pub fn for_slice(self, slice: &[u8]) -> SliceTurtleParser<'_> {
201 SliceTurtleParser {
202 inner: TriGRecognizer::new_parser(
203 slice,
204 true,
205 false,
206 #[cfg(feature = "rdf-star")]
207 self.with_quoted_triples,
208 self.unchecked,
209 self.base,
210 self.prefixes,
211 )
212 .into_iter(),
213 }
214 }
215
216 /// Creates a vector of iterators that may be used to parse a Turtle document slice in parallel.
217 /// To dynamically specify target_parallelism, use e.g. [`std::thread::available_parallelism`].
218 /// Intended to work on large documents.
219 /// Can fail or return wrong results if there are prefixes or base iris that are not defined
220 /// at the top of the document, or valid turtle syntax inside literal values.
221 ///
222 /// Count the number of people:
223 /// ```
224 /// use oxrdf::vocab::rdf;
225 /// use oxrdf::NamedNodeRef;
226 /// use oxttl::TurtleParser;
227 /// use rayon::iter::{IntoParallelIterator, ParallelIterator};
228 ///
229 /// let file = br#"@base <http://example.com/> .
230 /// @prefix schema: <http://schema.org/> .
231 /// <foo> a schema:Person ;
232 /// schema:name "Foo" .
233 /// <bar> a schema:Person ;
234 /// schema:name "Bar" ."#;
235 ///
236 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
237 /// let readers = TurtleParser::new().split_slice_for_parallel_parsing(file.as_ref(), 2);
238 /// let count = readers
239 /// .into_par_iter()
240 /// .map(|reader| {
241 /// let mut count = 0;
242 /// for triple in reader {
243 /// let triple = triple.unwrap();
244 /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
245 /// count += 1;
246 /// }
247 /// }
248 /// count
249 /// })
250 /// .sum();
251 /// assert_eq!(2, count);
252 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
253 /// ```
254 pub fn split_slice_for_parallel_parsing(
255 mut self,
256 slice: &[u8],
257 target_parallelism: usize,
258 ) -> Vec<SliceTurtleParser<'_>> {
259 let n_chunks = (slice.len() / MIN_PARALLEL_CHUNK_SIZE).clamp(1, target_parallelism);
260
261 if n_chunks > 1 {
262 // Prefixes must be determined before chunks, since determining chunks relies on parser with prefixes determined.
263 let mut from_slice_parser = self.clone().for_slice(slice);
264 // We don't care about errors: they will be raised when parsing the first chunk anyway
265 from_slice_parser.next();
266 for (p, iri) in from_slice_parser.prefixes() {
267 // Already know this is a valid IRI
268 self = self.with_prefix(p, iri).unwrap();
269 }
270 }
271
272 get_turtle_file_chunks(slice, n_chunks, &self)
273 .into_iter()
274 .map(|(start, end)| self.clone().for_slice(&slice[start..end]))
275 .collect()
276 }
277
278 /// Allows to parse a Turtle file by using a low-level API.
279 ///
280 /// Count the number of people:
281 /// ```
282 /// use oxrdf::vocab::rdf;
283 /// use oxrdf::NamedNodeRef;
284 /// use oxttl::TurtleParser;
285 ///
286 /// let file: [&[u8]; 5] = [
287 /// b"@base <http://example.com/>",
288 /// b". @prefix schema: <http://schema.org/> .",
289 /// b"<foo> a schema:Person",
290 /// b" ; schema:name \"Foo\" . <bar>",
291 /// b" a schema:Person ; schema:name \"Bar\" .",
292 /// ];
293 ///
294 /// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
295 /// let mut count = 0;
296 /// let mut parser = TurtleParser::new().low_level();
297 /// let mut file_chunks = file.iter();
298 /// while !parser.is_end() {
299 /// // We feed more data to the parser
300 /// if let Some(chunk) = file_chunks.next() {
301 /// parser.extend_from_slice(chunk);
302 /// } else {
303 /// parser.end(); // It's finished
304 /// }
305 /// // We read as many triples from the parser as possible
306 /// while let Some(triple) = parser.parse_next() {
307 /// let triple = triple?;
308 /// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
309 /// count += 1;
310 /// }
311 /// }
312 /// }
313 /// assert_eq!(2, count);
314 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
315 /// ```
316 pub fn low_level(self) -> LowLevelTurtleParser {
317 LowLevelTurtleParser {
318 parser: TriGRecognizer::new_parser(
319 Vec::new(),
320 false,
321 false,
322 #[cfg(feature = "rdf-star")]
323 self.with_quoted_triples,
324 self.unchecked,
325 self.base,
326 self.prefixes,
327 ),
328 }
329 }
330}
331
332/// Parses a Turtle file from a [`Read`] implementation.
333///
334/// Can be built using [`TurtleParser::for_reader`].
335///
336/// Count the number of people:
337/// ```
338/// use oxrdf::vocab::rdf;
339/// use oxrdf::NamedNodeRef;
340/// use oxttl::TurtleParser;
341///
342/// let file = br#"@base <http://example.com/> .
343/// @prefix schema: <http://schema.org/> .
344/// <foo> a schema:Person ;
345/// schema:name "Foo" .
346/// <bar> a schema:Person ;
347/// schema:name "Bar" ."#;
348///
349/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
350/// let mut count = 0;
351/// for triple in TurtleParser::new().for_reader(file.as_ref()) {
352/// let triple = triple?;
353/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
354/// count += 1;
355/// }
356/// }
357/// assert_eq!(2, count);
358/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
359/// ```
360#[must_use]
361pub struct ReaderTurtleParser<R: Read> {
362 inner: ReaderIterator<R, TriGRecognizer>,
363}
364
365impl<R: Read> ReaderTurtleParser<R> {
366 /// The list of IRI prefixes considered at the current step of the parsing.
367 ///
368 /// This method returns (prefix name, prefix value) tuples.
369 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
370 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
371 ///
372 /// ```
373 /// use oxttl::TurtleParser;
374 ///
375 /// let file = br#"@base <http://example.com/> .
376 /// @prefix schema: <http://schema.org/> .
377 /// <foo> a schema:Person ;
378 /// schema:name "Foo" ."#;
379 ///
380 /// let mut parser = TurtleParser::new().for_reader(file.as_ref());
381 /// assert!(parser.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
382 ///
383 /// parser.next().unwrap()?; // We read the first triple
384 /// assert_eq!(
385 /// parser.prefixes().collect::<Vec<_>>(),
386 /// [("schema", "http://schema.org/")]
387 /// ); // There are now prefixes
388 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
389 /// ```
390 pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
391 TurtlePrefixesIter {
392 inner: self.inner.parser.context.prefixes(),
393 }
394 }
395
396 /// The base IRI considered at the current step of the parsing.
397 ///
398 /// ```
399 /// use oxttl::TurtleParser;
400 ///
401 /// let file = br#"@base <http://example.com/> .
402 /// @prefix schema: <http://schema.org/> .
403 /// <foo> a schema:Person ;
404 /// schema:name "Foo" ."#;
405 ///
406 /// let mut parser = TurtleParser::new().for_reader(file.as_ref());
407 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
408 ///
409 /// parser.next().unwrap()?; // We read the first triple
410 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
411 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
412 /// ```
413 pub fn base_iri(&self) -> Option<&str> {
414 self.inner
415 .parser
416 .context
417 .lexer_options
418 .base_iri
419 .as_ref()
420 .map(Iri::as_str)
421 }
422}
423
424impl<R: Read> Iterator for ReaderTurtleParser<R> {
425 type Item = Result<Triple, TurtleParseError>;
426
427 fn next(&mut self) -> Option<Self::Item> {
428 Some(self.inner.next()?.map(Into::into))
429 }
430}
431
432/// Parses a Turtle file from a [`AsyncRead`] implementation.
433///
434/// Can be built using [`TurtleParser::for_tokio_async_reader`].
435///
436/// Count the number of people:
437/// ```
438/// # #[tokio::main(flavor = "current_thread")]
439/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
440/// use oxrdf::vocab::rdf;
441/// use oxrdf::NamedNodeRef;
442/// use oxttl::TurtleParser;
443///
444/// let file = br#"@base <http://example.com/> .
445/// @prefix schema: <http://schema.org/> .
446/// <foo> a schema:Person ;
447/// schema:name "Foo" .
448/// <bar> a schema:Person ;
449/// schema:name "Bar" ."#;
450///
451/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
452/// let mut count = 0;
453/// let mut parser = TurtleParser::new().for_tokio_async_reader(file.as_ref());
454/// while let Some(triple) = parser.next().await {
455/// let triple = triple?;
456/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
457/// count += 1;
458/// }
459/// }
460/// assert_eq!(2, count);
461/// # Ok(())
462/// # }
463/// ```
464#[cfg(feature = "async-tokio")]
465#[must_use]
466pub struct TokioAsyncReaderTurtleParser<R: AsyncRead + Unpin> {
467 inner: TokioAsyncReaderIterator<R, TriGRecognizer>,
468}
469
470#[cfg(feature = "async-tokio")]
471impl<R: AsyncRead + Unpin> TokioAsyncReaderTurtleParser<R> {
472 /// Reads the next triple or returns `None` if the file is finished.
473 pub async fn next(&mut self) -> Option<Result<Triple, TurtleParseError>> {
474 Some(self.inner.next().await?.map(Into::into))
475 }
476
477 /// The list of IRI prefixes considered at the current step of the parsing.
478 ///
479 /// This method returns (prefix name, prefix value) tuples.
480 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
481 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
482 ///
483 /// ```
484 /// # #[tokio::main(flavor = "current_thread")]
485 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
486 /// use oxttl::TurtleParser;
487 ///
488 /// let file = br#"@base <http://example.com/> .
489 /// @prefix schema: <http://schema.org/> .
490 /// <foo> a schema:Person ;
491 /// schema:name "Foo" ."#;
492 ///
493 /// let mut parser = TurtleParser::new().for_tokio_async_reader(file.as_ref());
494 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
495 ///
496 /// parser.next().await.unwrap()?; // We read the first triple
497 /// assert_eq!(
498 /// parser.prefixes().collect::<Vec<_>>(),
499 /// [("schema", "http://schema.org/")]
500 /// ); // There are now prefixes
501 /// # Ok(())
502 /// # }
503 /// ```
504 pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
505 TurtlePrefixesIter {
506 inner: self.inner.parser.context.prefixes(),
507 }
508 }
509
510 /// The base IRI considered at the current step of the parsing.
511 ///
512 /// ```
513 /// # #[tokio::main(flavor = "current_thread")]
514 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
515 /// use oxttl::TurtleParser;
516 ///
517 /// let file = br#"@base <http://example.com/> .
518 /// @prefix schema: <http://schema.org/> .
519 /// <foo> a schema:Person ;
520 /// schema:name "Foo" ."#;
521 ///
522 /// let mut parser = TurtleParser::new().for_tokio_async_reader(file.as_ref());
523 /// assert!(parser.base_iri().is_none()); // No base IRI at the beginning
524 ///
525 /// parser.next().await.unwrap()?; // We read the first triple
526 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI
527 /// # Ok(())
528 /// # }
529 /// ```
530 pub fn base_iri(&self) -> Option<&str> {
531 self.inner
532 .parser
533 .context
534 .lexer_options
535 .base_iri
536 .as_ref()
537 .map(Iri::as_str)
538 }
539}
540
541/// Parses a Turtle file from a byte slice.
542///
543/// Can be built using [`TurtleParser::for_slice`].
544///
545/// Count the number of people:
546/// ```
547/// use oxrdf::vocab::rdf;
548/// use oxrdf::NamedNodeRef;
549/// use oxttl::TurtleParser;
550///
551/// let file = br#"@base <http://example.com/> .
552/// @prefix schema: <http://schema.org/> .
553/// <foo> a schema:Person ;
554/// schema:name "Foo" .
555/// <bar> a schema:Person ;
556/// schema:name "Bar" ."#;
557///
558/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
559/// let mut count = 0;
560/// for triple in TurtleParser::new().for_slice(file) {
561/// let triple = triple?;
562/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
563/// count += 1;
564/// }
565/// }
566/// assert_eq!(2, count);
567/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
568/// ```
569#[must_use]
570pub struct SliceTurtleParser<'a> {
571 inner: SliceIterator<'a, TriGRecognizer>,
572}
573
574impl SliceTurtleParser<'_> {
575 /// The list of IRI prefixes considered at the current step of the parsing.
576 ///
577 /// This method returns (prefix name, prefix value) tuples.
578 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
579 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
580 ///
581 /// ```
582 /// use oxttl::TurtleParser;
583 ///
584 /// let file = br#"@base <http://example.com/> .
585 /// @prefix schema: <http://schema.org/> .
586 /// <foo> a schema:Person ;
587 /// schema:name "Foo" ."#;
588 ///
589 /// let mut parser = TurtleParser::new().for_slice(file);
590 /// assert!(parser.prefixes().collect::<Vec<_>>().is_empty()); // No prefix at the beginning
591 ///
592 /// parser.next().unwrap()?; // We read the first triple
593 /// assert_eq!(
594 /// parser.prefixes().collect::<Vec<_>>(),
595 /// [("schema", "http://schema.org/")]
596 /// ); // There are now prefixes
597 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
598 /// ```
599 pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
600 TurtlePrefixesIter {
601 inner: self.inner.parser.context.prefixes(),
602 }
603 }
604
605 /// The base IRI considered at the current step of the parsing.
606 ///
607 /// ```
608 /// use oxttl::TurtleParser;
609 ///
610 /// let file = br#"@base <http://example.com/> .
611 /// @prefix schema: <http://schema.org/> .
612 /// <foo> a schema:Person ;
613 /// schema:name "Foo" ."#;
614 ///
615 /// let mut parser = TurtleParser::new().for_slice(file);
616 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
617 ///
618 /// parser.next().unwrap()?; // We read the first triple
619 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
620 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
621 /// ```
622 pub fn base_iri(&self) -> Option<&str> {
623 self.inner
624 .parser
625 .context
626 .lexer_options
627 .base_iri
628 .as_ref()
629 .map(Iri::as_str)
630 }
631}
632
633impl Iterator for SliceTurtleParser<'_> {
634 type Item = Result<Triple, TurtleSyntaxError>;
635
636 fn next(&mut self) -> Option<Self::Item> {
637 Some(self.inner.next()?.map(Into::into))
638 }
639}
640
641/// Parses a Turtle file by using a low-level API.
642///
643/// Can be built using [`TurtleParser::low_level`].
644///
645/// Count the number of people:
646/// ```
647/// use oxrdf::vocab::rdf;
648/// use oxrdf::NamedNodeRef;
649/// use oxttl::TurtleParser;
650///
651/// let file: [&[u8]; 5] = [
652/// b"@base <http://example.com/>",
653/// b". @prefix schema: <http://schema.org/> .",
654/// b"<foo> a schema:Person",
655/// b" ; schema:name \"Foo\" . <bar>",
656/// b" a schema:Person ; schema:name \"Bar\" .",
657/// ];
658///
659/// let schema_person = NamedNodeRef::new("http://schema.org/Person")?;
660/// let mut count = 0;
661/// let mut parser = TurtleParser::new().low_level();
662/// let mut file_chunks = file.iter();
663/// while !parser.is_end() {
664/// // We feed more data to the parser
665/// if let Some(chunk) = file_chunks.next() {
666/// parser.extend_from_slice(chunk);
667/// } else {
668/// parser.end(); // It's finished
669/// }
670/// // We read as many triples from the parser as possible
671/// while let Some(triple) = parser.parse_next() {
672/// let triple = triple?;
673/// if triple.predicate == rdf::TYPE && triple.object == schema_person.into() {
674/// count += 1;
675/// }
676/// }
677/// }
678/// assert_eq!(2, count);
679/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
680/// ```
681pub struct LowLevelTurtleParser {
682 parser: Parser<Vec<u8>, TriGRecognizer>,
683}
684
685impl LowLevelTurtleParser {
686 /// Adds some extra bytes to the parser. Should be called when [`parse_next`](Self::parse_next) returns [`None`] and there is still unread data.
687 pub fn extend_from_slice(&mut self, other: &[u8]) {
688 self.parser.extend_from_slice(other)
689 }
690
691 /// Tell the parser that the file is finished.
692 ///
693 /// This triggers the parsing of the final bytes and might lead [`parse_next`](Self::parse_next) to return some extra values.
694 pub fn end(&mut self) {
695 self.parser.end()
696 }
697
698 /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`parse_next`](Self::parse_next) is always going to return `None`.
699 pub fn is_end(&self) -> bool {
700 self.parser.is_end()
701 }
702
703 /// Attempt to parse a new triple from the already provided data.
704 ///
705 /// Returns [`None`] if the parsing is finished or more data is required.
706 /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
707 pub fn parse_next(&mut self) -> Option<Result<Triple, TurtleSyntaxError>> {
708 Some(self.parser.parse_next()?.map(Into::into))
709 }
710
711 /// The list of IRI prefixes considered at the current step of the parsing.
712 ///
713 /// This method returns (prefix name, prefix value) tuples.
714 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
715 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
716 ///
717 /// ```
718 /// use oxttl::TurtleParser;
719 ///
720 /// let file = br#"@base <http://example.com/> .
721 /// @prefix schema: <http://schema.org/> .
722 /// <foo> a schema:Person ;
723 /// schema:name "Foo" ."#;
724 ///
725 /// let mut parser = TurtleParser::new().low_level();
726 /// parser.extend_from_slice(file);
727 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
728 ///
729 /// parser.parse_next().unwrap()?; // We read the first triple
730 /// assert_eq!(
731 /// parser.prefixes().collect::<Vec<_>>(),
732 /// [("schema", "http://schema.org/")]
733 /// ); // There are now prefixes
734 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
735 /// ```
736 pub fn prefixes(&self) -> TurtlePrefixesIter<'_> {
737 TurtlePrefixesIter {
738 inner: self.parser.context.prefixes(),
739 }
740 }
741
742 /// The base IRI considered at the current step of the parsing.
743 ///
744 /// ```
745 /// use oxttl::TurtleParser;
746 ///
747 /// let file = br#"@base <http://example.com/> .
748 /// @prefix schema: <http://schema.org/> .
749 /// <foo> a schema:Person ;
750 /// schema:name "Foo" ."#;
751 ///
752 /// let mut parser = TurtleParser::new().low_level();
753 /// parser.extend_from_slice(file);
754 /// assert!(parser.base_iri().is_none()); // No base IRI at the beginning
755 ///
756 /// parser.parse_next().unwrap()?; // We read the first triple
757 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI
758 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
759 /// ```
760 pub fn base_iri(&self) -> Option<&str> {
761 self.parser
762 .context
763 .lexer_options
764 .base_iri
765 .as_ref()
766 .map(Iri::as_str)
767 }
768}
769
770/// Iterator on the file prefixes.
771///
772/// See [`LowLevelTurtleParser::prefixes`].
773pub struct TurtlePrefixesIter<'a> {
774 inner: Iter<'a, String, Iri<String>>,
775}
776
777impl<'a> Iterator for TurtlePrefixesIter<'a> {
778 type Item = (&'a str, &'a str);
779
780 #[inline]
781 fn next(&mut self) -> Option<Self::Item> {
782 let (key, value) = self.inner.next()?;
783 Some((key.as_str(), value.as_str()))
784 }
785
786 #[inline]
787 fn size_hint(&self) -> (usize, Option<usize>) {
788 self.inner.size_hint()
789 }
790}
791
792/// A [Turtle](https://www.w3.org/TR/turtle/) serializer.
793///
794/// Support for [Turtle-star](https://w3c.github.io/rdf-star/cg-spec/2021-12-17.html#turtle-star) is available behind the `rdf-star` feature.
795///
796/// ```
797/// use oxrdf::vocab::rdf;
798/// use oxrdf::{NamedNodeRef, TripleRef};
799/// use oxttl::TurtleSerializer;
800///
801/// let mut serializer = TurtleSerializer::new()
802/// .with_prefix("schema", "http://schema.org/")?
803/// .for_writer(Vec::new());
804/// serializer.serialize_triple(TripleRef::new(
805/// NamedNodeRef::new("http://example.com#me")?,
806/// rdf::TYPE,
807/// NamedNodeRef::new("http://schema.org/Person")?,
808/// ))?;
809/// assert_eq!(
810/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
811/// serializer.finish()?.as_slice()
812/// );
813/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
814/// ```
815#[derive(Default, Clone)]
816#[must_use]
817pub struct TurtleSerializer {
818 inner: TriGSerializer,
819}
820
821impl TurtleSerializer {
822 /// Builds a new [`TurtleSerializer`].
823 #[inline]
824 pub fn new() -> Self {
825 Self::default()
826 }
827
828 #[inline]
829 pub fn with_prefix(
830 mut self,
831 prefix_name: impl Into<String>,
832 prefix_iri: impl Into<String>,
833 ) -> Result<Self, IriParseError> {
834 self.inner = self.inner.with_prefix(prefix_name, prefix_iri)?;
835 Ok(self)
836 }
837
838 /// Adds a base IRI to the serialization.
839 ///
840 /// ```
841 /// use oxrdf::vocab::rdf;
842 /// use oxrdf::{NamedNodeRef, TripleRef};
843 /// use oxttl::TurtleSerializer;
844 ///
845 /// let mut serializer = TurtleSerializer::new()
846 /// .with_base_iri("http://example.com")?
847 /// .with_prefix("ex", "http://example.com/ns#")?
848 /// .for_writer(Vec::new());
849 /// serializer.serialize_triple(TripleRef::new(
850 /// NamedNodeRef::new("http://example.com/me")?,
851 /// rdf::TYPE,
852 /// NamedNodeRef::new("http://example.com/ns#Person")?,
853 /// ))?;
854 /// assert_eq!(
855 /// b"@base <http://example.com> .\n@prefix ex: </ns#> .\n</me> a ex:Person .\n",
856 /// serializer.finish()?.as_slice()
857 /// );
858 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
859 /// ```
860 #[inline]
861 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
862 self.inner = self.inner.with_base_iri(base_iri)?;
863 Ok(self)
864 }
865
866 /// Writes a Turtle file to a [`Write`] implementation.
867 ///
868 /// ```
869 /// use oxrdf::vocab::rdf;
870 /// use oxrdf::{NamedNodeRef, TripleRef};
871 /// use oxttl::TurtleSerializer;
872 ///
873 /// let mut serializer = TurtleSerializer::new()
874 /// .with_prefix("schema", "http://schema.org/")?
875 /// .for_writer(Vec::new());
876 /// serializer.serialize_triple(TripleRef::new(
877 /// NamedNodeRef::new("http://example.com#me")?,
878 /// rdf::TYPE,
879 /// NamedNodeRef::new("http://schema.org/Person")?,
880 /// ))?;
881 /// assert_eq!(
882 /// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
883 /// serializer.finish()?.as_slice()
884 /// );
885 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
886 /// ```
887 pub fn for_writer<W: Write>(self, writer: W) -> WriterTurtleSerializer<W> {
888 WriterTurtleSerializer {
889 inner: self.inner.for_writer(writer),
890 }
891 }
892
893 /// Writes a Turtle file to a [`AsyncWrite`] implementation.
894 ///
895 /// ```
896 /// # #[tokio::main(flavor = "current_thread")]
897 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
898 /// use oxrdf::vocab::rdf;
899 /// use oxrdf::{NamedNodeRef, TripleRef};
900 /// use oxttl::TurtleSerializer;
901 ///
902 /// let mut serializer = TurtleSerializer::new()
903 /// .with_prefix("schema", "http://schema.org/")?
904 /// .for_tokio_async_writer(Vec::new());
905 /// serializer
906 /// .serialize_triple(TripleRef::new(
907 /// NamedNodeRef::new("http://example.com#me")?,
908 /// rdf::TYPE,
909 /// NamedNodeRef::new("http://schema.org/Person")?,
910 /// ))
911 /// .await?;
912 /// assert_eq!(
913 /// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
914 /// serializer.finish().await?.as_slice()
915 /// );
916 /// # Ok(())
917 /// # }
918 /// ```
919 #[cfg(feature = "async-tokio")]
920 pub fn for_tokio_async_writer<W: AsyncWrite + Unpin>(
921 self,
922 writer: W,
923 ) -> TokioAsyncWriterTurtleSerializer<W> {
924 TokioAsyncWriterTurtleSerializer {
925 inner: self.inner.for_tokio_async_writer(writer),
926 }
927 }
928
929 /// Builds a low-level Turtle writer.
930 ///
931 /// ```
932 /// use oxrdf::vocab::rdf;
933 /// use oxrdf::{NamedNodeRef, TripleRef};
934 /// use oxttl::TurtleSerializer;
935 ///
936 /// let mut buf = Vec::new();
937 /// let mut serializer = TurtleSerializer::new()
938 /// .with_prefix("schema", "http://schema.org/")?
939 /// .low_level();
940 /// serializer.serialize_triple(
941 /// TripleRef::new(
942 /// NamedNodeRef::new("http://example.com#me")?,
943 /// rdf::TYPE,
944 /// NamedNodeRef::new("http://schema.org/Person")?,
945 /// ),
946 /// &mut buf,
947 /// )?;
948 /// serializer.finish(&mut buf)?;
949 /// assert_eq!(
950 /// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
951 /// buf.as_slice()
952 /// );
953 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
954 /// ```
955 pub fn low_level(self) -> LowLevelTurtleSerializer {
956 LowLevelTurtleSerializer {
957 inner: self.inner.low_level(),
958 }
959 }
960}
961
962/// Writes a Turtle file to a [`Write`] implementation.
963///
964/// Can be built using [`TurtleSerializer::for_writer`].
965///
966/// ```
967/// use oxrdf::vocab::rdf;
968/// use oxrdf::{NamedNodeRef, TripleRef};
969/// use oxttl::TurtleSerializer;
970///
971/// let mut serializer = TurtleSerializer::new()
972/// .with_prefix("schema", "http://schema.org/")?
973/// .for_writer(Vec::new());
974/// serializer.serialize_triple(TripleRef::new(
975/// NamedNodeRef::new("http://example.com#me")?,
976/// rdf::TYPE,
977/// NamedNodeRef::new("http://schema.org/Person")?,
978/// ))?;
979/// assert_eq!(
980/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
981/// serializer.finish()?.as_slice()
982/// );
983/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
984/// ```
985#[must_use]
986pub struct WriterTurtleSerializer<W: Write> {
987 inner: WriterTriGSerializer<W>,
988}
989
990impl<W: Write> WriterTurtleSerializer<W> {
991 /// Writes an extra triple.
992 pub fn serialize_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
993 self.inner
994 .serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
995 }
996
997 /// Ends the write process and returns the underlying [`Write`].
998 pub fn finish(self) -> io::Result<W> {
999 self.inner.finish()
1000 }
1001}
1002
1003/// Writes a Turtle file to a [`AsyncWrite`] implementation.
1004///
1005/// Can be built using [`TurtleSerializer::for_tokio_async_writer`].
1006///
1007/// ```
1008/// # #[tokio::main(flavor = "current_thread")]
1009/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
1010/// use oxrdf::vocab::rdf;
1011/// use oxrdf::{NamedNodeRef, TripleRef};
1012/// use oxttl::TurtleSerializer;
1013///
1014/// let mut serializer = TurtleSerializer::new()
1015/// .with_prefix("schema", "http://schema.org/")?
1016/// .for_tokio_async_writer(Vec::new());
1017/// serializer
1018/// .serialize_triple(TripleRef::new(
1019/// NamedNodeRef::new("http://example.com#me")?,
1020/// rdf::TYPE,
1021/// NamedNodeRef::new("http://schema.org/Person")?,
1022/// ))
1023/// .await?;
1024/// assert_eq!(
1025/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
1026/// serializer.finish().await?.as_slice()
1027/// );
1028/// # Ok(())
1029/// # }
1030/// ```
1031#[cfg(feature = "async-tokio")]
1032#[must_use]
1033pub struct TokioAsyncWriterTurtleSerializer<W: AsyncWrite + Unpin> {
1034 inner: TokioAsyncWriterTriGSerializer<W>,
1035}
1036
1037#[cfg(feature = "async-tokio")]
1038impl<W: AsyncWrite + Unpin> TokioAsyncWriterTurtleSerializer<W> {
1039 /// Writes an extra triple.
1040 pub async fn serialize_triple<'a>(&mut self, t: impl Into<TripleRef<'a>>) -> io::Result<()> {
1041 self.inner
1042 .serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph))
1043 .await
1044 }
1045
1046 /// Ends the write process and returns the underlying [`Write`].
1047 pub async fn finish(self) -> io::Result<W> {
1048 self.inner.finish().await
1049 }
1050}
1051
1052/// Writes a Turtle file by using a low-level API.
1053///
1054/// Can be built using [`TurtleSerializer::low_level`].
1055///
1056/// ```
1057/// use oxrdf::vocab::rdf;
1058/// use oxrdf::{NamedNodeRef, TripleRef};
1059/// use oxttl::TurtleSerializer;
1060///
1061/// let mut buf = Vec::new();
1062/// let mut serializer = TurtleSerializer::new()
1063/// .with_prefix("schema", "http://schema.org/")?
1064/// .low_level();
1065/// serializer.serialize_triple(
1066/// TripleRef::new(
1067/// NamedNodeRef::new("http://example.com#me")?,
1068/// rdf::TYPE,
1069/// NamedNodeRef::new("http://schema.org/Person")?,
1070/// ),
1071/// &mut buf,
1072/// )?;
1073/// serializer.finish(&mut buf)?;
1074/// assert_eq!(
1075/// b"@prefix schema: <http://schema.org/> .\n<http://example.com#me> a schema:Person .\n",
1076/// buf.as_slice()
1077/// );
1078/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
1079/// ```
1080pub struct LowLevelTurtleSerializer {
1081 inner: LowLevelTriGSerializer,
1082}
1083
1084impl LowLevelTurtleSerializer {
1085 /// Writes an extra triple.
1086 pub fn serialize_triple<'a>(
1087 &mut self,
1088 t: impl Into<TripleRef<'a>>,
1089 writer: impl Write,
1090 ) -> io::Result<()> {
1091 self.inner
1092 .serialize_quad(t.into().in_graph(GraphNameRef::DefaultGraph), writer)
1093 }
1094
1095 /// Finishes to write the file.
1096 pub fn finish(&mut self, writer: impl Write) -> io::Result<()> {
1097 self.inner.finish(writer)
1098 }
1099}
1100
1101#[cfg(test)]
1102#[allow(clippy::panic_in_result_fn)]
1103mod tests {
1104 use super::*;
1105 use oxrdf::{BlankNodeRef, LiteralRef, NamedNodeRef};
1106
1107 #[test]
1108 fn test_write() -> io::Result<()> {
1109 let mut serializer = TurtleSerializer::new().for_writer(Vec::new());
1110 serializer.serialize_triple(TripleRef::new(
1111 NamedNodeRef::new_unchecked("http://example.com/s"),
1112 NamedNodeRef::new_unchecked("http://example.com/p"),
1113 NamedNodeRef::new_unchecked("http://example.com/o"),
1114 ))?;
1115 serializer.serialize_triple(TripleRef::new(
1116 NamedNodeRef::new_unchecked("http://example.com/s"),
1117 NamedNodeRef::new_unchecked("http://example.com/p"),
1118 LiteralRef::new_simple_literal("foo"),
1119 ))?;
1120 serializer.serialize_triple(TripleRef::new(
1121 NamedNodeRef::new_unchecked("http://example.com/s"),
1122 NamedNodeRef::new_unchecked("http://example.com/p2"),
1123 LiteralRef::new_language_tagged_literal_unchecked("foo", "en"),
1124 ))?;
1125 serializer.serialize_triple(TripleRef::new(
1126 BlankNodeRef::new_unchecked("b"),
1127 NamedNodeRef::new_unchecked("http://example.com/p2"),
1128 BlankNodeRef::new_unchecked("b2"),
1129 ))?;
1130 assert_eq!(String::from_utf8(serializer.finish()?).unwrap(), "<http://example.com/s> <http://example.com/p> <http://example.com/o> , \"foo\" ;\n\t<http://example.com/p2> \"foo\"@en .\n_:b <http://example.com/p2> _:b2 .\n");
1131 Ok(())
1132 }
1133}