oxttl/n3.rs
1//! A [N3](https://w3c.github.io/N3/spec/) streaming parser implemented by [`N3Parser`].
2
3use crate::lexer::{resolve_local_name, N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
4#[cfg(feature = "async-tokio")]
5use crate::toolkit::TokioAsyncReaderIterator;
6use crate::toolkit::{
7 Lexer, Parser, ReaderIterator, RuleRecognizer, RuleRecognizerError, SliceIterator,
8 TokenOrLineJump, TurtleSyntaxError,
9};
10use crate::{TurtleParseError, MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
11use oxiri::{Iri, IriParseError};
12use oxrdf::vocab::{rdf, xsd};
13#[cfg(feature = "rdf-star")]
14use oxrdf::Triple;
15use oxrdf::{
16 BlankNode, GraphName, Literal, NamedNode, NamedNodeRef, NamedOrBlankNode, Quad, Subject, Term,
17 Variable,
18};
19use std::collections::hash_map::Iter;
20use std::collections::HashMap;
21use std::fmt;
22use std::io::Read;
23#[cfg(feature = "async-tokio")]
24use tokio::io::AsyncRead;
25
26/// A N3 term i.e. a RDF `Term` or a `Variable`.
27#[derive(Eq, PartialEq, Debug, Clone, Hash)]
28pub enum N3Term {
29 NamedNode(NamedNode),
30 BlankNode(BlankNode),
31 Literal(Literal),
32 #[cfg(feature = "rdf-star")]
33 Triple(Box<Triple>),
34 Variable(Variable),
35}
36
37impl fmt::Display for N3Term {
38 #[inline]
39 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40 match self {
41 Self::NamedNode(term) => term.fmt(f),
42 Self::BlankNode(term) => term.fmt(f),
43 Self::Literal(term) => term.fmt(f),
44 #[cfg(feature = "rdf-star")]
45 Self::Triple(term) => term.fmt(f),
46 Self::Variable(term) => term.fmt(f),
47 }
48 }
49}
50
51impl From<NamedNode> for N3Term {
52 #[inline]
53 fn from(node: NamedNode) -> Self {
54 Self::NamedNode(node)
55 }
56}
57
58impl From<NamedNodeRef<'_>> for N3Term {
59 #[inline]
60 fn from(node: NamedNodeRef<'_>) -> Self {
61 Self::NamedNode(node.into_owned())
62 }
63}
64
65impl From<BlankNode> for N3Term {
66 #[inline]
67 fn from(node: BlankNode) -> Self {
68 Self::BlankNode(node)
69 }
70}
71
72impl From<Literal> for N3Term {
73 #[inline]
74 fn from(literal: Literal) -> Self {
75 Self::Literal(literal)
76 }
77}
78
79#[cfg(feature = "rdf-star")]
80impl From<Triple> for N3Term {
81 #[inline]
82 fn from(triple: Triple) -> Self {
83 Self::Triple(Box::new(triple))
84 }
85}
86
87#[cfg(feature = "rdf-star")]
88impl From<Box<Triple>> for N3Term {
89 #[inline]
90 fn from(node: Box<Triple>) -> Self {
91 Self::Triple(node)
92 }
93}
94
95impl From<NamedOrBlankNode> for N3Term {
96 #[inline]
97 fn from(node: NamedOrBlankNode) -> Self {
98 match node {
99 NamedOrBlankNode::NamedNode(node) => node.into(),
100 NamedOrBlankNode::BlankNode(node) => node.into(),
101 }
102 }
103}
104
105impl From<Subject> for N3Term {
106 #[inline]
107 fn from(node: Subject) -> Self {
108 match node {
109 Subject::NamedNode(node) => node.into(),
110 Subject::BlankNode(node) => node.into(),
111 #[cfg(feature = "rdf-star")]
112 Subject::Triple(triple) => Self::Triple(triple),
113 }
114 }
115}
116
117impl From<Term> for N3Term {
118 #[inline]
119 fn from(node: Term) -> Self {
120 match node {
121 Term::NamedNode(node) => node.into(),
122 Term::BlankNode(node) => node.into(),
123 Term::Literal(node) => node.into(),
124 #[cfg(feature = "rdf-star")]
125 Term::Triple(triple) => Self::Triple(triple),
126 }
127 }
128}
129
130impl From<Variable> for N3Term {
131 #[inline]
132 fn from(variable: Variable) -> Self {
133 Self::Variable(variable)
134 }
135}
136
137/// A N3 quad i.e. a quad composed of [`N3Term`].
138///
139/// The `graph_name` is used to encode the formula where the triple is in.
140/// In this case the formula is encoded by a blank node.
141#[derive(Eq, PartialEq, Debug, Clone, Hash)]
142pub struct N3Quad {
143 /// The [subject](https://www.w3.org/TR/rdf11-concepts/#dfn-subject) of this triple.
144 pub subject: N3Term,
145
146 /// The [predicate](https://www.w3.org/TR/rdf11-concepts/#dfn-predicate) of this triple.
147 pub predicate: N3Term,
148
149 /// The [object](https://www.w3.org/TR/rdf11-concepts/#dfn-object) of this triple.
150 pub object: N3Term,
151
152 /// The name of the RDF [graph](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-graph) in which the triple is.
153 pub graph_name: GraphName,
154}
155
156impl fmt::Display for N3Quad {
157 #[inline]
158 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
159 if self.graph_name == GraphName::DefaultGraph {
160 write!(f, "{} {} {}", self.subject, self.predicate, self.object)
161 } else {
162 write!(
163 f,
164 "{} {} {} {}",
165 self.subject, self.predicate, self.object, self.graph_name
166 )
167 }
168 }
169}
170
171impl From<Quad> for N3Quad {
172 fn from(quad: Quad) -> Self {
173 Self {
174 subject: quad.subject.into(),
175 predicate: quad.predicate.into(),
176 object: quad.object.into(),
177 graph_name: quad.graph_name,
178 }
179 }
180}
181
182/// A [N3](https://w3c.github.io/N3/spec/) streaming parser.
183///
184/// Count the number of people:
185/// ```
186/// use oxrdf::vocab::rdf;
187/// use oxrdf::NamedNode;
188/// use oxttl::n3::{N3Parser, N3Term};
189///
190/// let file = br#"@base <http://example.com/> .
191/// @prefix schema: <http://schema.org/> .
192/// <foo> a schema:Person ;
193/// schema:name "Foo" .
194/// <bar> a schema:Person ;
195/// schema:name "Bar" ."#;
196///
197/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
198/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
199/// let mut count = 0;
200/// for triple in N3Parser::new().for_reader(file.as_ref()) {
201/// let triple = triple?;
202/// if triple.predicate == rdf_type && triple.object == schema_person {
203/// count += 1;
204/// }
205/// }
206/// assert_eq!(2, count);
207/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
208/// ```
209#[derive(Default, Clone)]
210#[must_use]
211pub struct N3Parser {
212 unchecked: bool,
213 base: Option<Iri<String>>,
214 prefixes: HashMap<String, Iri<String>>,
215}
216
217impl N3Parser {
218 /// Builds a new [`N3Parser`].
219 #[inline]
220 pub fn new() -> Self {
221 Self::default()
222 }
223
224 /// Assumes the file is valid to make parsing faster.
225 ///
226 /// It will skip some validations.
227 ///
228 /// Note that if the file is actually not valid, broken RDF might be emitted by the parser.
229 #[inline]
230 pub fn unchecked(mut self) -> Self {
231 self.unchecked = true;
232 self
233 }
234
235 #[inline]
236 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
237 self.base = Some(Iri::parse(base_iri.into())?);
238 Ok(self)
239 }
240
241 #[inline]
242 pub fn with_prefix(
243 mut self,
244 prefix_name: impl Into<String>,
245 prefix_iri: impl Into<String>,
246 ) -> Result<Self, IriParseError> {
247 self.prefixes
248 .insert(prefix_name.into(), Iri::parse(prefix_iri.into())?);
249 Ok(self)
250 }
251
252 /// Parses a N3 file from a [`Read`] implementation.
253 ///
254 /// Count the number of people:
255 /// ```
256 /// use oxrdf::NamedNode;
257 /// use oxttl::n3::{N3Parser, N3Term};
258 ///
259 /// let file = br#"@base <http://example.com/> .
260 /// @prefix schema: <http://schema.org/> .
261 /// <foo> a schema:Person ;
262 /// schema:name "Foo" .
263 /// <bar> a schema:Person ;
264 /// schema:name "Bar" ."#;
265 ///
266 /// let rdf_type = N3Term::NamedNode(NamedNode::new(
267 /// "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
268 /// )?);
269 /// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
270 /// let mut count = 0;
271 /// for triple in N3Parser::new().for_reader(file.as_ref()) {
272 /// let triple = triple?;
273 /// if triple.predicate == rdf_type && triple.object == schema_person {
274 /// count += 1;
275 /// }
276 /// }
277 /// assert_eq!(2, count);
278 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
279 /// ```
280 pub fn for_reader<R: Read>(self, reader: R) -> ReaderN3Parser<R> {
281 ReaderN3Parser {
282 inner: self.low_level().parser.for_reader(reader),
283 }
284 }
285
286 /// Parses a N3 file from a [`AsyncRead`] implementation.
287 ///
288 /// Count the number of people:
289 /// ```
290 /// # #[tokio::main(flavor = "current_thread")]
291 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
292 /// use oxrdf::vocab::rdf;
293 /// use oxrdf::NamedNode;
294 /// use oxttl::n3::{N3Parser, N3Term};
295 ///
296 /// let file = br#"@base <http://example.com/> .
297 /// @prefix schema: <http://schema.org/> .
298 /// <foo> a schema:Person ;
299 /// schema:name "Foo" .
300 /// <bar> a schema:Person ;
301 /// schema:name "Bar" ."#;
302 ///
303 /// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
304 /// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
305 /// let mut count = 0;
306 /// let mut parser = N3Parser::new().for_tokio_async_reader(file.as_ref());
307 /// while let Some(triple) = parser.next().await {
308 /// let triple = triple?;
309 /// if triple.predicate == rdf_type && triple.object == schema_person {
310 /// count += 1;
311 /// }
312 /// }
313 /// assert_eq!(2, count);
314 /// # Ok(())
315 /// # }
316 /// ```
317 #[cfg(feature = "async-tokio")]
318 pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
319 self,
320 reader: R,
321 ) -> TokioAsyncReaderN3Parser<R> {
322 TokioAsyncReaderN3Parser {
323 inner: self.low_level().parser.for_tokio_async_reader(reader),
324 }
325 }
326
327 /// Parses a N3 file from a byte slice.
328 ///
329 /// Count the number of people:
330 /// ```
331 /// use oxrdf::vocab::rdf;
332 /// use oxrdf::NamedNode;
333 /// use oxttl::n3::{N3Parser, N3Term};
334 ///
335 /// let file = br#"@base <http://example.com/> .
336 /// @prefix schema: <http://schema.org/> .
337 /// <foo> a schema:Person ;
338 /// schema:name "Foo" .
339 /// <bar> a schema:Person ;
340 /// schema:name "Bar" ."#;
341 ///
342 /// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
343 /// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
344 /// let mut count = 0;
345 /// for triple in N3Parser::new().for_slice(file) {
346 /// let triple = triple?;
347 /// if triple.predicate == rdf_type && triple.object == schema_person {
348 /// count += 1;
349 /// }
350 /// }
351 /// assert_eq!(2, count);
352 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
353 /// ```
354 pub fn for_slice(self, slice: &[u8]) -> SliceN3Parser<'_> {
355 SliceN3Parser {
356 inner: N3Recognizer::new_parser(slice, true, false, self.base, self.prefixes)
357 .into_iter(),
358 }
359 }
360
361 /// Allows to parse a N3 file by using a low-level API.
362 ///
363 /// Count the number of people:
364 /// ```
365 /// use oxrdf::vocab::rdf;
366 /// use oxrdf::NamedNode;
367 /// use oxttl::n3::{N3Parser, N3Term};
368 ///
369 /// let file: [&[u8]; 5] = [
370 /// b"@base <http://example.com/>",
371 /// b". @prefix schema: <http://schema.org/> .",
372 /// b"<foo> a schema:Person",
373 /// b" ; schema:name \"Foo\" . <bar>",
374 /// b" a schema:Person ; schema:name \"Bar\" .",
375 /// ];
376 ///
377 /// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
378 /// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
379 /// let mut count = 0;
380 /// let mut parser = N3Parser::new().low_level();
381 /// let mut file_chunks = file.iter();
382 /// while !parser.is_end() {
383 /// // We feed more data to the parser
384 /// if let Some(chunk) = file_chunks.next() {
385 /// parser.extend_from_slice(chunk);
386 /// } else {
387 /// parser.end(); // It's finished
388 /// }
389 /// // We read as many triples from the parser as possible
390 /// while let Some(triple) = parser.parse_next() {
391 /// let triple = triple?;
392 /// if triple.predicate == rdf_type && triple.object == schema_person {
393 /// count += 1;
394 /// }
395 /// }
396 /// }
397 /// assert_eq!(2, count);
398 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
399 /// ```
400 pub fn low_level(self) -> LowLevelN3Parser {
401 LowLevelN3Parser {
402 parser: N3Recognizer::new_parser(
403 Vec::new(),
404 false,
405 self.unchecked,
406 self.base,
407 self.prefixes,
408 ),
409 }
410 }
411}
412
413/// Parses a N3 file from a [`Read`] implementation.
414///
415/// Can be built using [`N3Parser::for_reader`].
416///
417/// Count the number of people:
418/// ```
419/// use oxrdf::vocab::rdf;
420/// use oxrdf::NamedNode;
421/// use oxttl::n3::{N3Parser, N3Term};
422///
423/// let file = br#"@base <http://example.com/> .
424/// @prefix schema: <http://schema.org/> .
425/// <foo> a schema:Person ;
426/// schema:name "Foo" .
427/// <bar> a schema:Person ;
428/// schema:name "Bar" ."#;
429///
430/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
431/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
432/// let mut count = 0;
433/// for triple in N3Parser::new().for_reader(file.as_ref()) {
434/// let triple = triple?;
435/// if triple.predicate == rdf_type && triple.object == schema_person {
436/// count += 1;
437/// }
438/// }
439/// assert_eq!(2, count);
440/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
441/// ```
442#[must_use]
443pub struct ReaderN3Parser<R: Read> {
444 inner: ReaderIterator<R, N3Recognizer>,
445}
446
447impl<R: Read> ReaderN3Parser<R> {
448 /// The list of IRI prefixes considered at the current step of the parsing.
449 ///
450 /// This method returns (prefix name, prefix value) tuples.
451 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
452 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
453 ///
454 /// ```
455 /// use oxttl::N3Parser;
456 ///
457 /// let file = br#"@base <http://example.com/> .
458 /// @prefix schema: <http://schema.org/> .
459 /// <foo> a schema:Person ;
460 /// schema:name "Foo" ."#;
461 ///
462 /// let mut parser = N3Parser::new().for_reader(file.as_ref());
463 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
464 ///
465 /// parser.next().unwrap()?; // We read the first triple
466 /// assert_eq!(
467 /// parser.prefixes().collect::<Vec<_>>(),
468 /// [("schema", "http://schema.org/")]
469 /// ); // There are now prefixes
470 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
471 /// ```
472 pub fn prefixes(&self) -> N3PrefixesIter<'_> {
473 N3PrefixesIter {
474 inner: self.inner.parser.context.prefixes.iter(),
475 }
476 }
477
478 /// The base IRI considered at the current step of the parsing.
479 ///
480 /// ```
481 /// use oxttl::N3Parser;
482 ///
483 /// let file = br#"@base <http://example.com/> .
484 /// @prefix schema: <http://schema.org/> .
485 /// <foo> a schema:Person ;
486 /// schema:name "Foo" ."#;
487 ///
488 /// let mut parser = N3Parser::new().for_reader(file.as_ref());
489 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
490 ///
491 /// parser.next().unwrap()?; // We read the first triple
492 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
493 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
494 /// ```
495 pub fn base_iri(&self) -> Option<&str> {
496 self.inner
497 .parser
498 .context
499 .lexer_options
500 .base_iri
501 .as_ref()
502 .map(Iri::as_str)
503 }
504}
505
506impl<R: Read> Iterator for ReaderN3Parser<R> {
507 type Item = Result<N3Quad, TurtleParseError>;
508
509 fn next(&mut self) -> Option<Self::Item> {
510 self.inner.next()
511 }
512}
513
514/// Parses a N3 file from a [`AsyncRead`] implementation.
515///
516/// Can be built using [`N3Parser::for_tokio_async_reader`].
517///
518/// Count the number of people:
519/// ```
520/// # #[tokio::main(flavor = "current_thread")]
521/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
522/// use oxrdf::vocab::rdf;
523/// use oxrdf::NamedNode;
524/// use oxttl::n3::{N3Parser, N3Term};
525///
526/// let file = br#"@base <http://example.com/> .
527/// @prefix schema: <http://schema.org/> .
528/// <foo> a schema:Person ;
529/// schema:name "Foo" .
530/// <bar> a schema:Person ;
531/// schema:name "Bar" ."#;
532///
533/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
534/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
535/// let mut count = 0;
536/// let mut parser = N3Parser::new().for_tokio_async_reader(file.as_ref());
537/// while let Some(triple) = parser.next().await {
538/// let triple = triple?;
539/// if triple.predicate == rdf_type && triple.object == schema_person {
540/// count += 1;
541/// }
542/// }
543/// assert_eq!(2, count);
544/// # Ok(())
545/// # }
546/// ```
547#[cfg(feature = "async-tokio")]
548#[must_use]
549pub struct TokioAsyncReaderN3Parser<R: AsyncRead + Unpin> {
550 inner: TokioAsyncReaderIterator<R, N3Recognizer>,
551}
552
553#[cfg(feature = "async-tokio")]
554impl<R: AsyncRead + Unpin> TokioAsyncReaderN3Parser<R> {
555 /// Reads the next triple or returns `None` if the file is finished.
556 pub async fn next(&mut self) -> Option<Result<N3Quad, TurtleParseError>> {
557 self.inner.next().await
558 }
559
560 /// The list of IRI prefixes considered at the current step of the parsing.
561 ///
562 /// This method returns (prefix name, prefix value) tuples.
563 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
564 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
565 ///
566 /// ```
567 /// # #[tokio::main(flavor = "current_thread")]
568 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
569 /// use oxttl::N3Parser;
570 ///
571 /// let file = br#"@base <http://example.com/> .
572 /// @prefix schema: <http://schema.org/> .
573 /// <foo> a schema:Person ;
574 /// schema:name "Foo" ."#;
575 ///
576 /// let mut parser = N3Parser::new().for_tokio_async_reader(file.as_ref());
577 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
578 ///
579 /// parser.next().await.unwrap()?; // We read the first triple
580 /// assert_eq!(
581 /// parser.prefixes().collect::<Vec<_>>(),
582 /// [("schema", "http://schema.org/")]
583 /// ); // There are now prefixes
584 /// # Ok(())
585 /// # }
586 /// ```
587 pub fn prefixes(&self) -> N3PrefixesIter<'_> {
588 N3PrefixesIter {
589 inner: self.inner.parser.context.prefixes.iter(),
590 }
591 }
592
593 /// The base IRI considered at the current step of the parsing.
594 ///
595 /// ```
596 /// # #[tokio::main(flavor = "current_thread")]
597 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
598 /// use oxttl::N3Parser;
599 ///
600 /// let file = br#"@base <http://example.com/> .
601 /// @prefix schema: <http://schema.org/> .
602 /// <foo> a schema:Person ;
603 /// schema:name "Foo" ."#;
604 ///
605 /// let mut parser = N3Parser::new().for_tokio_async_reader(file.as_ref());
606 /// assert!(parser.base_iri().is_none()); // No base IRI at the beginning
607 ///
608 /// parser.next().await.unwrap()?; // We read the first triple
609 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI
610 /// # Ok(())
611 /// # }
612 /// ```
613 pub fn base_iri(&self) -> Option<&str> {
614 self.inner
615 .parser
616 .context
617 .lexer_options
618 .base_iri
619 .as_ref()
620 .map(Iri::as_str)
621 }
622}
623
624/// Parses a N3 file from a byte slice.
625///
626/// Can be built using [`N3Parser::for_slice`].
627///
628/// Count the number of people:
629/// ```
630/// use oxrdf::vocab::rdf;
631/// use oxrdf::NamedNode;
632/// use oxttl::n3::{N3Parser, N3Term};
633///
634/// let file = br#"@base <http://example.com/> .
635/// @prefix schema: <http://schema.org/> .
636/// <foo> a schema:Person ;
637/// schema:name "Foo" .
638/// <bar> a schema:Person ;
639/// schema:name "Bar" ."#;
640///
641/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
642/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
643/// let mut count = 0;
644/// for triple in N3Parser::new().for_slice(file) {
645/// let triple = triple?;
646/// if triple.predicate == rdf_type && triple.object == schema_person {
647/// count += 1;
648/// }
649/// }
650/// assert_eq!(2, count);
651/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
652/// ```
653#[must_use]
654pub struct SliceN3Parser<'a> {
655 inner: SliceIterator<'a, N3Recognizer>,
656}
657
658impl SliceN3Parser<'_> {
659 /// The list of IRI prefixes considered at the current step of the parsing.
660 ///
661 /// This method returns (prefix name, prefix value) tuples.
662 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
663 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
664 ///
665 /// ```
666 /// use oxttl::N3Parser;
667 ///
668 /// let file = br#"@base <http://example.com/> .
669 /// @prefix schema: <http://schema.org/> .
670 /// <foo> a schema:Person ;
671 /// schema:name "Foo" ."#;
672 ///
673 /// let mut parser = N3Parser::new().for_slice(file);
674 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
675 ///
676 /// parser.next().unwrap()?; // We read the first triple
677 /// assert_eq!(
678 /// parser.prefixes().collect::<Vec<_>>(),
679 /// [("schema", "http://schema.org/")]
680 /// ); // There are now prefixes
681 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
682 /// ```
683 pub fn prefixes(&self) -> N3PrefixesIter<'_> {
684 N3PrefixesIter {
685 inner: self.inner.parser.context.prefixes.iter(),
686 }
687 }
688
689 /// The base IRI considered at the current step of the parsing.
690 ///
691 /// ```
692 /// use oxttl::N3Parser;
693 ///
694 /// let file = br#"@base <http://example.com/> .
695 /// @prefix schema: <http://schema.org/> .
696 /// <foo> a schema:Person ;
697 /// schema:name "Foo" ."#;
698 ///
699 /// let mut parser = N3Parser::new().for_slice(file);
700 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
701 ///
702 /// parser.next().unwrap()?; // We read the first triple
703 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
704 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
705 /// ```
706 pub fn base_iri(&self) -> Option<&str> {
707 self.inner
708 .parser
709 .context
710 .lexer_options
711 .base_iri
712 .as_ref()
713 .map(Iri::as_str)
714 }
715}
716
717impl Iterator for SliceN3Parser<'_> {
718 type Item = Result<N3Quad, TurtleSyntaxError>;
719
720 fn next(&mut self) -> Option<Self::Item> {
721 self.inner.next()
722 }
723}
724
725/// Parses a N3 file by using a low-level API.
726///
727/// Can be built using [`N3Parser::low_level`].
728///
729/// Count the number of people:
730/// ```
731/// use oxrdf::vocab::rdf;
732/// use oxrdf::NamedNode;
733/// use oxttl::n3::{N3Parser, N3Term};
734///
735/// let file: [&[u8]; 5] = [
736/// b"@base <http://example.com/>",
737/// b". @prefix schema: <http://schema.org/> .",
738/// b"<foo> a schema:Person",
739/// b" ; schema:name \"Foo\" . <bar>",
740/// b" a schema:Person ; schema:name \"Bar\" .",
741/// ];
742///
743/// let rdf_type = N3Term::NamedNode(rdf::TYPE.into_owned());
744/// let schema_person = N3Term::NamedNode(NamedNode::new("http://schema.org/Person")?);
745/// let mut count = 0;
746/// let mut parser = N3Parser::new().low_level();
747/// let mut file_chunks = file.iter();
748/// while !parser.is_end() {
749/// // We feed more data to the parser
750/// if let Some(chunk) = file_chunks.next() {
751/// parser.extend_from_slice(chunk);
752/// } else {
753/// parser.end(); // It's finished
754/// }
755/// // We read as many triples from the parser as possible
756/// while let Some(triple) = parser.parse_next() {
757/// let triple = triple?;
758/// if triple.predicate == rdf_type && triple.object == schema_person {
759/// count += 1;
760/// }
761/// }
762/// }
763/// assert_eq!(2, count);
764/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
765/// ```
766pub struct LowLevelN3Parser {
767 parser: Parser<Vec<u8>, N3Recognizer>,
768}
769
770impl LowLevelN3Parser {
771 /// Adds some extra bytes to the parser. Should be called when [`parse_next`](Self::parse_next) returns [`None`] and there is still unread data.
772 pub fn extend_from_slice(&mut self, other: &[u8]) {
773 self.parser.extend_from_slice(other)
774 }
775
776 /// Tell the parser that the file is finished.
777 ///
778 /// This triggers the parsing of the final bytes and might lead [`parse_next`](Self::parse_next) to return some extra values.
779 pub fn end(&mut self) {
780 self.parser.end()
781 }
782
783 /// Returns if the parsing is finished i.e. [`end`](Self::end) has been called and [`parse_next`](Self::parse_next) is always going to return `None`.
784 pub fn is_end(&self) -> bool {
785 self.parser.is_end()
786 }
787
788 /// Attempt to parse a new quad from the already provided data.
789 ///
790 /// Returns [`None`] if the parsing is finished or more data is required.
791 /// If it is the case more data should be fed using [`extend_from_slice`](Self::extend_from_slice).
792 pub fn parse_next(&mut self) -> Option<Result<N3Quad, TurtleSyntaxError>> {
793 self.parser.parse_next()
794 }
795
796 /// The list of IRI prefixes considered at the current step of the parsing.
797 ///
798 /// This method returns (prefix name, prefix value) tuples.
799 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
800 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
801 ///
802 /// ```
803 /// use oxttl::N3Parser;
804 ///
805 /// let file = br#"@base <http://example.com/> .
806 /// @prefix schema: <http://schema.org/> .
807 /// <foo> a schema:Person ;
808 /// schema:name "Foo" ."#;
809 ///
810 /// let mut parser = N3Parser::new().low_level();
811 /// parser.extend_from_slice(file);
812 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
813 ///
814 /// parser.parse_next().unwrap()?; // We read the first triple
815 /// assert_eq!(
816 /// parser.prefixes().collect::<Vec<_>>(),
817 /// [("schema", "http://schema.org/")]
818 /// ); // There are now prefixes
819 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
820 /// ```
821 pub fn prefixes(&self) -> N3PrefixesIter<'_> {
822 N3PrefixesIter {
823 inner: self.parser.context.prefixes.iter(),
824 }
825 }
826
827 /// The base IRI considered at the current step of the parsing.
828 ///
829 /// ```
830 /// use oxttl::N3Parser;
831 ///
832 /// let file = br#"@base <http://example.com/> .
833 /// @prefix schema: <http://schema.org/> .
834 /// <foo> a schema:Person ;
835 /// schema:name "Foo" ."#;
836 ///
837 /// let mut parser = N3Parser::new().low_level();
838 /// parser.extend_from_slice(file);
839 /// assert!(parser.base_iri().is_none()); // No base IRI at the beginning
840 ///
841 /// parser.parse_next().unwrap()?; // We read the first triple
842 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI
843 /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
844 /// ```
845 pub fn base_iri(&self) -> Option<&str> {
846 self.parser
847 .context
848 .lexer_options
849 .base_iri
850 .as_ref()
851 .map(Iri::as_str)
852 }
853}
854
855#[derive(Clone)]
856enum Predicate {
857 Regular(N3Term),
858 Inverted(N3Term),
859}
860
861struct N3Recognizer {
862 stack: Vec<N3State>,
863 terms: Vec<N3Term>,
864 predicates: Vec<Predicate>,
865 contexts: Vec<BlankNode>,
866}
867
868struct N3RecognizerContext {
869 lexer_options: N3LexerOptions,
870 prefixes: HashMap<String, Iri<String>>,
871}
872
873impl RuleRecognizer for N3Recognizer {
874 type TokenRecognizer = N3Lexer;
875 type Output = N3Quad;
876 type Context = N3RecognizerContext;
877
878 fn error_recovery_state(mut self) -> Self {
879 self.stack.clear();
880 self.terms.clear();
881 self.predicates.clear();
882 self.contexts.clear();
883 self
884 }
885
886 fn recognize_next(
887 mut self,
888 token: TokenOrLineJump<N3Token<'_>>,
889 context: &mut N3RecognizerContext,
890 results: &mut Vec<N3Quad>,
891 errors: &mut Vec<RuleRecognizerError>,
892 ) -> Self {
893 let TokenOrLineJump::Token(token) = token else {
894 return self;
895 };
896 while let Some(rule) = self.stack.pop() {
897 match rule {
898 // [1] n3Doc ::= ( ( n3Statement ".") | sparqlDirective) *
899 // [2] n3Statement ::= n3Directive | triples
900 // [3] n3Directive ::= prefixID | base
901 // [4] sparqlDirective ::= sparqlBase | sparqlPrefix
902 // [5] sparqlBase ::= BASE IRIREF
903 // [6] sparqlPrefix ::= PREFIX PNAME_NS IRIREF
904 // [7] prefixID ::= "@prefix" PNAME_NS IRIREF
905 // [8] base ::= "@base" IRIREF
906 N3State::N3Doc => {
907 self.stack.push(N3State::N3Doc);
908 match token {
909 N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("base") => {
910 self.stack.push(N3State::BaseExpectIri);
911 return self;
912 }
913 N3Token::PlainKeyword(k) if k.eq_ignore_ascii_case("prefix") => {
914 self.stack.push(N3State::PrefixExpectPrefix);
915 return self;
916 }
917 N3Token::LangTag("prefix") => {
918 self.stack.push(N3State::N3DocExpectDot);
919 self.stack.push(N3State::PrefixExpectPrefix);
920 return self;
921 }
922 N3Token::LangTag("base") => {
923 self.stack.push(N3State::N3DocExpectDot);
924 self.stack.push(N3State::BaseExpectIri);
925 return self;
926 }
927 _ => {
928 self.stack.push(N3State::N3DocExpectDot);
929 self.stack.push(N3State::Triples);
930 }
931 }
932 }
933 N3State::N3DocExpectDot => {
934 if token == N3Token::Punctuation(".") {
935 return self;
936 }
937 errors.push("A dot is expected at the end of N3 statements".into());
938 }
939 N3State::BaseExpectIri => return if let N3Token::IriRef(iri) = token {
940 context.lexer_options.base_iri = Some(Iri::parse_unchecked(iri));
941 self
942 } else {
943 self.error(errors, "The BASE keyword should be followed by an IRI")
944 },
945 N3State::PrefixExpectPrefix => return match token {
946 N3Token::PrefixedName { prefix, local, .. } if local.is_empty() => {
947 self.stack.push(N3State::PrefixExpectIri { name: prefix.to_owned() });
948 self
949 }
950 _ => {
951 self.error(errors, "The PREFIX keyword should be followed by a prefix like 'ex:'")
952 }
953 },
954 N3State::PrefixExpectIri { name } => return if let N3Token::IriRef(iri) = token {
955 context.prefixes.insert(name, Iri::parse_unchecked(iri));
956 self
957 } else { self.error(errors, "The PREFIX declaration should be followed by a prefix and its value as an IRI")
958 },
959 // [9] triples ::= subject predicateObjectList?
960 N3State::Triples => {
961 self.stack.push(N3State::TriplesMiddle);
962 self.stack.push(N3State::Path);
963 }
964 N3State::TriplesMiddle => if matches!(token, N3Token::Punctuation("." | "]" | "}" | ")")) {} else {
965 self.stack.push(N3State::TriplesEnd);
966 self.stack.push(N3State::PredicateObjectList);
967 },
968 N3State::TriplesEnd => {
969 self.terms.pop();
970 }
971 // [10] predicateObjectList ::= verb objectList ( ";" ( verb objectList) ? ) *
972 N3State::PredicateObjectList => {
973 self.stack.push(N3State::PredicateObjectListEnd);
974 self.stack.push(N3State::ObjectsList);
975 self.stack.push(N3State::Verb);
976 }
977 N3State::PredicateObjectListEnd => {
978 self.predicates.pop();
979 if token == N3Token::Punctuation(";") {
980 self.stack.push(N3State::PredicateObjectListPossibleContinuation);
981 return self;
982 }
983 }
984 N3State::PredicateObjectListPossibleContinuation => if token == N3Token::Punctuation(";") {
985 self.stack.push(N3State::PredicateObjectListPossibleContinuation);
986 return self;
987 } else if matches!(token, N3Token::Punctuation(";" | "." | "}" | "]" | ")")) {} else {
988 self.stack.push(N3State::PredicateObjectListEnd);
989 self.stack.push(N3State::ObjectsList);
990 self.stack.push(N3State::Verb);
991 },
992 // [11] objectList ::= object ( "," object) *
993 N3State::ObjectsList => {
994 self.stack.push(N3State::ObjectsListEnd);
995 self.stack.push(N3State::Path);
996 }
997 N3State::ObjectsListEnd => {
998 let object = self.terms.pop().unwrap();
999 let subject = self.terms.last().unwrap().clone();
1000 results.push(match self.predicates.last().unwrap().clone() {
1001 Predicate::Regular(predicate) => self.quad(
1002 subject,
1003 predicate,
1004 object,
1005 ),
1006 Predicate::Inverted(predicate) => self.quad(
1007 object,
1008 predicate,
1009 subject,
1010 )
1011 });
1012 if token == N3Token::Punctuation(",") {
1013 self.stack.push(N3State::ObjectsListEnd);
1014 self.stack.push(N3State::Path);
1015 return self;
1016 }
1017 }
1018 // [12] verb ::= predicate | "a" | ( "has" expression) | ( "is" expression "of") | "=" | "<=" | "=>"
1019 // [14] predicate ::= expression | ( "<-" expression)
1020 N3State::Verb => match token {
1021 N3Token::PlainKeyword("a") => {
1022 self.predicates.push(Predicate::Regular(rdf::TYPE.into()));
1023 return self;
1024 }
1025 N3Token::PlainKeyword("has") => {
1026 self.stack.push(N3State::AfterRegularVerb);
1027 self.stack.push(N3State::Path);
1028 return self;
1029 }
1030 N3Token::PlainKeyword("is") => {
1031 self.stack.push(N3State::AfterVerbIs);
1032 self.stack.push(N3State::Path);
1033 return self;
1034 }
1035 N3Token::Punctuation("=") => {
1036 self.predicates.push(Predicate::Regular(NamedNode::new_unchecked("http://www.w3.org/2002/07/owl#sameAs").into()));
1037 return self;
1038 }
1039 N3Token::Punctuation("=>") => {
1040 self.predicates.push(Predicate::Regular(NamedNode::new_unchecked("http://www.w3.org/2000/10/swap/log#implies").into()));
1041 return self;
1042 }
1043 N3Token::Punctuation("<=") => {
1044 self.predicates.push(Predicate::Inverted(NamedNode::new_unchecked("http://www.w3.org/2000/10/swap/log#implies").into()));
1045 return self;
1046 }
1047 N3Token::Punctuation("<-") => {
1048 self.stack.push(N3State::AfterInvertedVerb);
1049 self.stack.push(N3State::Path);
1050 return self;
1051 }
1052 _ => {
1053 self.stack.push(N3State::AfterRegularVerb);
1054 self.stack.push(N3State::Path);
1055 }
1056 }
1057 N3State::AfterRegularVerb => {
1058 self.predicates.push(Predicate::Regular(self.terms.pop().unwrap()));
1059 }
1060 N3State::AfterInvertedVerb => {
1061 self.predicates.push(Predicate::Inverted(self.terms.pop().unwrap()));
1062 }
1063 N3State::AfterVerbIs => return match token {
1064 N3Token::PlainKeyword("of") => {
1065 self.predicates.push(Predicate::Inverted(self.terms.pop().unwrap()));
1066 self
1067 }
1068 _ => {
1069 self.error(errors, "The keyword 'is' should be followed by a predicate then by the keyword 'of'")
1070 }
1071 },
1072 // [13] subject ::= expression
1073 // [15] object ::= expression
1074 // [16] expression ::= path
1075 // [17] path ::= pathItem ( ( "!" path) | ( "^" path) ) ?
1076 N3State::Path => {
1077 self.stack.push(N3State::PathFollowUp);
1078 self.stack.push(N3State::PathItem);
1079 }
1080 N3State::PathFollowUp => match token {
1081 N3Token::Punctuation("!") => {
1082 self.stack.push(N3State::PathAfterIndicator { is_inverse: false });
1083 self.stack.push(N3State::PathItem);
1084 return self;
1085 }
1086 N3Token::Punctuation("^") => {
1087 self.stack.push(N3State::PathAfterIndicator { is_inverse: true });
1088 self.stack.push(N3State::PathItem);
1089 return self;
1090 }
1091 _ => ()
1092 },
1093 N3State::PathAfterIndicator { is_inverse } => {
1094 let predicate = self.terms.pop().unwrap();
1095 let previous = self.terms.pop().unwrap();
1096 let current = BlankNode::default();
1097 results.push(if is_inverse { self.quad(current.clone(), predicate, previous) } else { self.quad(previous, predicate, current.clone()) });
1098 self.terms.push(current.into());
1099 self.stack.push(N3State::PathFollowUp);
1100 }
1101 // [18] pathItem ::= iri | blankNode | quickVar | collection | blankNodePropertyList | iriPropertyList | literal | formula
1102 // [19] literal ::= rdfLiteral | numericLiteral | BOOLEAN_LITERAL
1103 // [20] blankNodePropertyList ::= "[" predicateObjectList "]"
1104 // [21] iriPropertyList ::= IPLSTART iri predicateObjectList "]"
1105 // [22] collection ::= "(" object* ")"
1106 // [23] formula ::= "{" formulaContent? "}"
1107 // [25] numericLiteral ::= DOUBLE | DECIMAL | INTEGER
1108 // [26] rdfLiteral ::= STRING ( LANGTAG | ( "^^" iri) ) ?
1109 // [27] iri ::= IRIREF | prefixedName
1110 // [28] prefixedName ::= PNAME_LN | PNAME_NS
1111 // [29] blankNode ::= BLANK_NODE_LABEL | ANON
1112 // [30] quickVar ::= QUICK_VAR_NAME
1113 N3State::PathItem => {
1114 return match token {
1115 N3Token::IriRef(iri) => {
1116 self.terms.push(NamedNode::new_unchecked(iri).into());
1117 self
1118 }
1119 N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
1120 Ok(t) => {
1121 self.terms.push(t.into());
1122 self
1123 }
1124 Err(e) => self.error(errors, e)
1125 }
1126 N3Token::BlankNodeLabel(bnode) => {
1127 self.terms.push(BlankNode::new_unchecked(bnode).into());
1128 self
1129 }
1130 N3Token::Variable(name) => {
1131 self.terms.push(Variable::new_unchecked(name).into());
1132 self
1133 }
1134 N3Token::Punctuation("[") => {
1135 self.stack.push(N3State::PropertyListMiddle);
1136 self
1137 }
1138 N3Token::Punctuation("(") => {
1139 self.stack.push(N3State::CollectionBeginning);
1140 self
1141 }
1142 N3Token::String(value) => {
1143 self.stack.push(N3State::LiteralPossibleSuffix { value });
1144 self
1145 }
1146 N3Token::Integer(v) => {
1147 self.terms.push(Literal::new_typed_literal(v, xsd::INTEGER).into());
1148 self
1149 }
1150 N3Token::Decimal(v) => {
1151 self.terms.push(Literal::new_typed_literal(v, xsd::DECIMAL).into());
1152 self
1153 }
1154 N3Token::Double(v) => {
1155 self.terms.push(Literal::new_typed_literal(v, xsd::DOUBLE).into());
1156 self
1157 }
1158 N3Token::PlainKeyword("true") => {
1159 self.terms.push(Literal::new_typed_literal("true", xsd::BOOLEAN).into());
1160 self
1161 }
1162 N3Token::PlainKeyword("false") => {
1163 self.terms.push(Literal::new_typed_literal("false", xsd::BOOLEAN).into());
1164 self
1165 }
1166 N3Token::Punctuation("{") => {
1167 self.contexts.push(BlankNode::default());
1168 self.stack.push(N3State::FormulaContent);
1169 self
1170 }
1171 _ =>
1172 self.error(errors, "TOKEN is not a valid RDF value")
1173
1174 }
1175 }
1176 N3State::PropertyListMiddle => match token {
1177 N3Token::Punctuation("]") => {
1178 self.terms.push(BlankNode::default().into());
1179 return self;
1180 }
1181 N3Token::PlainKeyword("id") => {
1182 self.stack.push(N3State::IriPropertyList);
1183 return self;
1184 }
1185 _ => {
1186 self.terms.push(BlankNode::default().into());
1187 self.stack.push(N3State::PropertyListEnd);
1188 self.stack.push(N3State::PredicateObjectList);
1189 }
1190 }
1191 N3State::PropertyListEnd => if token == N3Token::Punctuation("]") {
1192 return self;
1193 } else {
1194 errors.push("blank node property lists should end with a ']'".into());
1195 }
1196 N3State::IriPropertyList => return match token {
1197 N3Token::IriRef(id) => {
1198 self.terms.push(NamedNode::new_unchecked(id).into());
1199 self.stack.push(N3State::PropertyListEnd);
1200 self.stack.push(N3State::PredicateObjectList);
1201 self
1202 }
1203 N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
1204 Ok(t) => {
1205 self.terms.push(t.into());
1206 self.stack.push(N3State::PropertyListEnd);
1207 self.stack.push(N3State::PredicateObjectList);
1208 self
1209 }
1210 Err(e) => {
1211 self.error(errors, e)
1212 }
1213 }
1214 _ => {
1215 self.error(errors, "The '[ id' construction should be followed by an IRI")
1216 }
1217 },
1218 N3State::CollectionBeginning => if let N3Token::Punctuation(")") = token {
1219 self.terms.push(rdf::NIL.into());
1220 return self;
1221 } else {
1222 let root = BlankNode::default();
1223 self.terms.push(root.clone().into());
1224 self.terms.push(root.into());
1225 self.stack.push(N3State::CollectionPossibleEnd);
1226 self.stack.push(N3State::Path);
1227 },
1228 N3State::CollectionPossibleEnd => {
1229 let value = self.terms.pop().unwrap();
1230 let old = self.terms.pop().unwrap();
1231 results.push(self.quad(
1232 old.clone(),
1233 rdf::FIRST,
1234 value,
1235 ));
1236 if let N3Token::Punctuation(")") = token {
1237 results.push(self.quad(
1238 old,
1239 rdf::REST,
1240 rdf::NIL,
1241 ));
1242 return self;
1243 }
1244 let new = BlankNode::default();
1245 results.push(self.quad(
1246 old,
1247 rdf::REST,
1248 new.clone(),
1249 ));
1250 self.terms.push(new.into());
1251 self.stack.push(N3State::CollectionPossibleEnd);
1252 self.stack.push(N3State::Path);
1253 }
1254 N3State::LiteralPossibleSuffix { value } => {
1255 match token {
1256 N3Token::LangTag(lang) => {
1257 self.terms.push(Literal::new_language_tagged_literal_unchecked(value, lang.to_ascii_lowercase()).into());
1258 return self;
1259 }
1260 N3Token::Punctuation("^^") => {
1261 self.stack.push(N3State::LiteralExpectDatatype { value });
1262 return self;
1263 }
1264 _ => {
1265 self.terms.push(Literal::new_simple_literal(value).into());
1266 }
1267 }
1268 }
1269 N3State::LiteralExpectDatatype { value } => {
1270 match token {
1271 N3Token::IriRef(datatype) => {
1272 self.terms.push(Literal::new_typed_literal(value, NamedNode::new_unchecked(datatype)).into());
1273 return self;
1274 }
1275 N3Token::PrefixedName { prefix, local, might_be_invalid_iri } => match resolve_local_name(prefix, &local, might_be_invalid_iri, &context.prefixes) {
1276 Ok(datatype) => {
1277 self.terms.push(Literal::new_typed_literal(value, datatype).into());
1278 return self;
1279 }
1280 Err(e) => {
1281 return self.error(errors, e);
1282 }
1283 }
1284 _ => {
1285 errors.push("Expecting a datatype IRI after '^^, found TOKEN".into());
1286 self.stack.clear();
1287 }
1288 }
1289 }
1290 // [24] formulaContent ::= ( n3Statement ( "." formulaContent? ) ? ) | ( sparqlDirective formulaContent? )
1291 N3State::FormulaContent => {
1292 match token {
1293 N3Token::Punctuation("}") => {
1294 self.terms.push(self.contexts.pop().unwrap().into());
1295 return self;
1296 }
1297 N3Token::PlainKeyword(k)if k.eq_ignore_ascii_case("base") => {
1298 self.stack.push(N3State::FormulaContent);
1299 self.stack.push(N3State::BaseExpectIri);
1300 return self;
1301 }
1302 N3Token::PlainKeyword(k)if k.eq_ignore_ascii_case("prefix") => {
1303 self.stack.push(N3State::FormulaContent);
1304 self.stack.push(N3State::PrefixExpectPrefix);
1305 return self;
1306 }
1307 N3Token::LangTag("prefix") => {
1308 self.stack.push(N3State::FormulaContentExpectDot);
1309 self.stack.push(N3State::PrefixExpectPrefix);
1310 return self;
1311 }
1312 N3Token::LangTag("base") => {
1313 self.stack.push(N3State::FormulaContentExpectDot);
1314 self.stack.push(N3State::BaseExpectIri);
1315 return self;
1316 }
1317 _ => {
1318 self.stack.push(N3State::FormulaContentExpectDot);
1319 self.stack.push(N3State::Triples);
1320 }
1321 }
1322 }
1323 N3State::FormulaContentExpectDot => {
1324 match token {
1325 N3Token::Punctuation("}") => {
1326 self.terms.push(self.contexts.pop().unwrap().into());
1327 return self;
1328 }
1329 N3Token::Punctuation(".") => {
1330 self.stack.push(N3State::FormulaContent);
1331 return self;
1332 }
1333 _ => {
1334 errors.push("A dot is expected at the end of N3 statements".into());
1335 self.stack.push(N3State::FormulaContent);
1336 }
1337 }
1338 }
1339 }
1340 }
1341 // Empty stack
1342 if token == N3Token::Punctuation(".") {
1343 self.stack.push(N3State::N3Doc);
1344 self
1345 } else {
1346 self
1347 }
1348 }
1349
1350 fn recognize_end(
1351 self,
1352 _state: &mut N3RecognizerContext,
1353 _results: &mut Vec<Self::Output>,
1354 errors: &mut Vec<RuleRecognizerError>,
1355 ) {
1356 match &*self.stack {
1357 [] | [N3State::N3Doc] => (),
1358 _ => errors.push("Unexpected end".into()), // TODO
1359 }
1360 }
1361
1362 fn lexer_options(context: &N3RecognizerContext) -> &N3LexerOptions {
1363 &context.lexer_options
1364 }
1365}
1366
1367impl N3Recognizer {
1368 pub fn new_parser<B>(
1369 data: B,
1370 is_ending: bool,
1371 unchecked: bool,
1372 base_iri: Option<Iri<String>>,
1373 prefixes: HashMap<String, Iri<String>>,
1374 ) -> Parser<B, Self> {
1375 Parser::new(
1376 Lexer::new(
1377 N3Lexer::new(N3LexerMode::N3, unchecked),
1378 data,
1379 is_ending,
1380 MIN_BUFFER_SIZE,
1381 MAX_BUFFER_SIZE,
1382 Some(b"#"),
1383 ),
1384 Self {
1385 stack: vec![N3State::N3Doc],
1386 terms: Vec::new(),
1387 predicates: Vec::new(),
1388 contexts: Vec::new(),
1389 },
1390 N3RecognizerContext {
1391 lexer_options: N3LexerOptions { base_iri },
1392 prefixes,
1393 },
1394 )
1395 }
1396
1397 #[must_use]
1398 fn error(
1399 mut self,
1400 errors: &mut Vec<RuleRecognizerError>,
1401 msg: impl Into<RuleRecognizerError>,
1402 ) -> Self {
1403 errors.push(msg.into());
1404 self.stack.clear();
1405 self
1406 }
1407
1408 fn quad(
1409 &self,
1410 subject: impl Into<N3Term>,
1411 predicate: impl Into<N3Term>,
1412 object: impl Into<N3Term>,
1413 ) -> N3Quad {
1414 N3Quad {
1415 subject: subject.into(),
1416 predicate: predicate.into(),
1417 object: object.into(),
1418 graph_name: self
1419 .contexts
1420 .last()
1421 .map_or(GraphName::DefaultGraph, |g| g.clone().into()),
1422 }
1423 }
1424}
1425
1426#[derive(Debug)]
1427enum N3State {
1428 N3Doc,
1429 N3DocExpectDot,
1430 BaseExpectIri,
1431 PrefixExpectPrefix,
1432 PrefixExpectIri { name: String },
1433 Triples,
1434 TriplesMiddle,
1435 TriplesEnd,
1436 PredicateObjectList,
1437 PredicateObjectListEnd,
1438 PredicateObjectListPossibleContinuation,
1439 ObjectsList,
1440 ObjectsListEnd,
1441 Verb,
1442 AfterRegularVerb,
1443 AfterInvertedVerb,
1444 AfterVerbIs,
1445 Path,
1446 PathFollowUp,
1447 PathAfterIndicator { is_inverse: bool },
1448 PathItem,
1449 PropertyListMiddle,
1450 PropertyListEnd,
1451 IriPropertyList,
1452 CollectionBeginning,
1453 CollectionPossibleEnd,
1454 LiteralPossibleSuffix { value: String },
1455 LiteralExpectDatatype { value: String },
1456 FormulaContent,
1457 FormulaContentExpectDot,
1458}
1459
1460/// Iterator on the file prefixes.
1461///
1462/// See [`LowLevelN3Parser::prefixes`].
1463pub struct N3PrefixesIter<'a> {
1464 inner: Iter<'a, String, Iri<String>>,
1465}
1466
1467impl<'a> Iterator for N3PrefixesIter<'a> {
1468 type Item = (&'a str, &'a str);
1469
1470 #[inline]
1471 fn next(&mut self) -> Option<Self::Item> {
1472 let (key, value) = self.inner.next()?;
1473 Some((key.as_str(), value.as_str()))
1474 }
1475
1476 #[inline]
1477 fn size_hint(&self) -> (usize, Option<usize>) {
1478 self.inner.size_hint()
1479 }
1480}