rio_api/
parser.rs

1//! Interfaces for RDF parsers.
2//!
3//! The main types are [`TriplesParser`] for triples parsing and [`QuadsParser`] for quads parsing.
4
5#[cfg(feature = "generalized")]
6pub use crate::generalized::parser::*;
7use crate::model::{Quad, Triple};
8use std::error::Error;
9
10/// A parser returning [`Triple`].
11pub trait TriplesParser: Sized {
12    type Error: Error;
13
14    /// Parses the complete file and calls `on_triple` each time a new triple is read.
15    ///
16    /// May fail on errors caused by the parser itself or by the callback function `on_triple`.
17    fn parse_all<E: From<Self::Error>>(
18        &mut self,
19        on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
20    ) -> Result<(), E> {
21        while !self.is_end() {
22            self.parse_step(on_triple)?;
23        }
24        Ok(())
25    }
26
27    /// Parses a small chunk of the file and calls `on_triple` each time a new triple is read.
28    /// (A "small chunk" could be a line for an N-Triples parser.)
29    ///
30    /// This method should be called as long as [`is_end`](TriplesParser::is_end) returns false.
31    ///
32    /// It may fail on errors caused by the parser itself or by the callback function `on_triple`.
33    fn parse_step<E: From<Self::Error>>(
34        &mut self,
35        on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
36    ) -> Result<(), E>;
37
38    /// Returns `true` if the file has been completely consumed by the parser.
39    fn is_end(&self) -> bool;
40
41    /// Converts the parser into a `Result<T, E>` iterator.
42    ///
43    /// `convert_triple` is a function converting Rio [`Triple`] to `T`.
44    fn into_iter<T, E: From<Self::Error>, F: FnMut(Triple<'_>) -> Result<T, E>>(
45        self,
46        convert_triple: F,
47    ) -> TriplesParserIterator<T, E, F, Self> {
48        TriplesParserIterator {
49            parser: self,
50            buffer: Vec::default(),
51            convert_triple,
52        }
53    }
54}
55
56/// Created with the method [`into_iter`](TriplesParser::into_iter()).
57pub struct TriplesParserIterator<
58    T,
59    E: From<P::Error>,
60    F: FnMut(Triple<'_>) -> Result<T, E>,
61    P: TriplesParser,
62> {
63    parser: P,
64    buffer: Vec<T>,
65    convert_triple: F,
66}
67
68impl<T, E: From<P::Error>, F: FnMut(Triple<'_>) -> Result<T, E>, P: TriplesParser> Iterator
69    for TriplesParserIterator<T, E, F, P>
70{
71    type Item = Result<T, E>;
72
73    fn next(&mut self) -> Option<Result<T, E>> {
74        loop {
75            if let Some(r) = self.buffer.pop() {
76                return Some(Ok(r));
77            }
78            if self.parser.is_end() {
79                return None;
80            }
81
82            let buffer = &mut self.buffer;
83            let convert_triple = &mut self.convert_triple;
84            if let Err(e) = self
85                .parser
86                .parse_step(&mut |t| convert_triple(t).map(|t| buffer.push(t)))
87            {
88                return Some(Err(e));
89            }
90        }
91    }
92}
93
94/// A parser returning [`Quad`].
95pub trait QuadsParser: Sized {
96    type Error: Error;
97
98    /// Parses the complete file and calls `on_quad` each time a new quad is read.
99    ///
100    /// May fails on errors caused by the parser itself or by the callback function `on_quad`.
101    fn parse_all<E: From<Self::Error>>(
102        &mut self,
103        on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
104    ) -> Result<(), E> {
105        while !self.is_end() {
106            self.parse_step(on_quad)?
107        }
108        Ok(())
109    }
110
111    /// Parses a small chunk of the file and calls `on_quad` each time a new quad is read.
112    /// (A "small chunk" could be a line for an N-Quads parser.)
113    ///
114    /// This method should be called as long as [`is_end`](QuadsParser::is_end) returns false.
115    ///
116    /// May fails on errors caused by the parser itself or by the callback function `on_quad`.
117    fn parse_step<E: From<Self::Error>>(
118        &mut self,
119        on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
120    ) -> Result<(), E>;
121
122    /// Returns `true` if the file has been completely consumed by the parser.
123    fn is_end(&self) -> bool;
124
125    /// Converts the parser into a `Result<T, E>` iterator.
126    ///
127    /// `convert_triple` is a function converting Rio [`Triple`] to `T`.
128    fn into_iter<T, E: From<Self::Error>, F: FnMut(Quad<'_>) -> Result<T, E>>(
129        self,
130        convert_quad: F,
131    ) -> QuadsParserIterator<T, E, F, Self> {
132        QuadsParserIterator {
133            parser: self,
134            buffer: Vec::default(),
135            convert_quad,
136        }
137    }
138}
139
140/// Created with the method [`into_iter`](QuadsParser::into_iter()).
141pub struct QuadsParserIterator<
142    T,
143    E: From<P::Error>,
144    F: FnMut(Quad<'_>) -> Result<T, E>,
145    P: QuadsParser,
146> {
147    parser: P,
148    buffer: Vec<T>,
149    convert_quad: F,
150}
151
152impl<T, E: From<P::Error>, F: FnMut(Quad<'_>) -> Result<T, E>, P: QuadsParser> Iterator
153    for QuadsParserIterator<T, E, F, P>
154{
155    type Item = Result<T, E>;
156
157    fn next(&mut self) -> Option<Result<T, E>> {
158        loop {
159            if let Some(r) = self.buffer.pop() {
160                return Some(Ok(r));
161            }
162            if self.parser.is_end() {
163                return None;
164            }
165
166            let buffer = &mut self.buffer;
167            let convert_quad = &mut self.convert_quad;
168            if let Err(e) = self
169                .parser
170                .parse_step(&mut |q| convert_quad(q).map(|q| buffer.push(q)))
171            {
172                return Some(Err(e));
173            }
174        }
175    }
176}
177
178/// Error trait that allows to get the textual position of the error
179pub trait ParseError: Error {
180    /// Returns the position of the error in the text, if known.
181    fn textual_position(&self) -> Option<LineBytePosition>;
182}
183
184#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
185pub struct LineBytePosition {
186    line_number: u64,
187    byte_number: u64,
188}
189
190impl LineBytePosition {
191    /// Creates a new position where `line_number` and `byte_number` are both starting from 1
192    pub fn new(line_number: u64, byte_number: u64) -> Self {
193        Self {
194            line_number,
195            byte_number,
196        }
197    }
198
199    /// The line number where the error occurred starting from 0
200    pub fn line_number(&self) -> u64 {
201        self.line_number
202    }
203
204    /// The byte number where the error occurred starting from 0
205    pub fn byte_number(&self) -> u64 {
206        self.byte_number
207    }
208}