rio_turtle/
gnquads.rs

1//! Implementation of a generalized RDF / RDF-star version of the N-Quads syntax
2
3use crate::error::*;
4use crate::gtriple_allocator::GeneralizedTripleAllocator;
5use crate::ntriples::{parse_literal, skip_until_eol, skip_whitespace};
6use crate::shared::*;
7use crate::utils::*;
8use oxiri::IriRef;
9use rio_api::model::*;
10use rio_api::parser::*;
11use std::io::BufRead;
12
13/// A [N-Quads](https://www.w3.org/TR/n-quads/) streaming parser parsing generalized quads.
14///
15/// It implements the `GeneralizedQuadsParser` trait.
16/// Using it requires to enable the `generalized` feature.
17pub struct GeneralizedNQuadsParser<R: BufRead> {
18    read: LookAheadByteReader<R>,
19    triple_alloc: GeneralizedTripleAllocator,
20    graph_name_alloc: GeneralizedTripleAllocator,
21}
22
23impl<R: BufRead> GeneralizedNQuadsParser<R> {
24    pub fn new(reader: R) -> Self {
25        Self {
26            read: LookAheadByteReader::new(reader),
27            triple_alloc: GeneralizedTripleAllocator::new(),
28            graph_name_alloc: GeneralizedTripleAllocator::new(),
29        }
30    }
31}
32
33impl<R: BufRead> GeneralizedQuadsParser for GeneralizedNQuadsParser<R> {
34    type Error = TurtleError;
35
36    fn parse_step<E: From<TurtleError>>(
37        &mut self,
38        on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
39    ) -> Result<(), E> {
40        match self.parse_quad_line() {
41            Ok(Some(named_graph)) => {
42                match on_quad(self.triple_alloc.top_quad(
43                    named_graph.then(|| self.graph_name_alloc.current_subject().unwrap()),
44                )) {
45                    Ok(()) => {
46                        if named_graph {
47                            // named graph is allocated as the subject of an incomplete triple
48                            self.graph_name_alloc.pop_term(0);
49                            self.graph_name_alloc.pop_top_empty_triple();
50                        }
51                        self.triple_alloc.pop_top_triple();
52                        debug_assert_eq!(self.triple_alloc.complete_len(), 0);
53                        debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
54                        debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
55                        debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
56                        Ok(())
57                    }
58                    Err(err) => {
59                        self.triple_alloc.clear();
60                        Err(err)
61                    }
62                }
63            }
64            Ok(None) => Ok(()),
65            Err(error) => {
66                self.read.consume_line_end()?;
67                self.triple_alloc.clear();
68                Err(E::from(error))
69            }
70        }
71    }
72
73    fn is_end(&self) -> bool {
74        self.read.current().is_none()
75    }
76}
77
78impl<R: BufRead> GeneralizedNQuadsParser<R> {
79    fn parse_quad_line(&mut self) -> Result<Option<bool>, TurtleError> {
80        let read = &mut self.read;
81        let triple_alloc = &mut &mut self.triple_alloc;
82
83        skip_whitespace(read)?;
84
85        if matches!(
86            read.current(),
87            None | Some(b'#') | Some(b'\r') | Some(b'\n')
88        ) {
89            skip_until_eol(read)?;
90            return Ok(None);
91        }
92
93        parse_triple(read, triple_alloc)?;
94        let named_graph = match read.current() {
95            Some(b'.') => false,
96            _ => {
97                self.graph_name_alloc.push_triple_start();
98                parse_term(0, read, &mut self.graph_name_alloc)?;
99                skip_whitespace(read)?;
100                true
101            }
102        };
103
104        read.check_is_current(b'.')?;
105        read.consume()?;
106        skip_whitespace(read)?;
107
108        match read.current() {
109            None | Some(b'#') | Some(b'\r') | Some(b'\n') => skip_until_eol(read)?,
110            _ => read.unexpected_char_error()?,
111        }
112
113        Ok(Some(named_graph))
114    }
115}
116
117fn parse_triple(
118    read: &mut LookAheadByteReader<impl BufRead>,
119    triple_alloc: &mut GeneralizedTripleAllocator,
120) -> Result<(), TurtleError> {
121    triple_alloc.push_triple_start();
122
123    for i in 0..3 {
124        parse_term(i, read, triple_alloc)?;
125        skip_whitespace(read)?;
126    }
127    Ok(())
128}
129
130fn parse_term(
131    pos: usize,
132    read: &mut LookAheadByteReader<impl BufRead>,
133    triple_alloc: &mut GeneralizedTripleAllocator,
134) -> Result<(), TurtleError> {
135    match read.required_current()? {
136        b'<' => match read.required_next()? {
137            b'<' => {
138                parse_quoted_triple(read, triple_alloc)?;
139                triple_alloc.push_quoted_triple(pos);
140                Ok(())
141            }
142            _ => triple_alloc.try_push_atom(pos, |b, _| {
143                parse_iriref(read, b)?;
144                IriRef::parse(b.as_str()).map_err(|error| {
145                    read.parse_error(TurtleErrorKind::InvalidIri {
146                        iri: b.to_owned(),
147                        error,
148                    })
149                })?;
150                Ok(NamedNode { iri: b }.into())
151            }),
152        },
153        b'_' => triple_alloc.try_push_atom(pos, |b, _| {
154            parse_blank_node_label(read, b).map(GeneralizedTerm::from)
155        }),
156        b'"' => triple_alloc.try_push_atom(pos, |b1, b2| {
157            parse_literal(read, b1, b2).map(GeneralizedTerm::from)
158        }),
159        b'?' | b'$' => triple_alloc.try_push_atom(pos, |b, _| {
160            parse_variable(read, b).map(GeneralizedTerm::from)
161        }),
162        _ => read.unexpected_char_error(),
163    }
164}
165
166fn parse_quoted_triple(
167    read: &mut LookAheadByteReader<impl BufRead>,
168    triple_alloc: &mut GeneralizedTripleAllocator,
169) -> Result<(), TurtleError> {
170    debug_assert_eq!(read.current(), Some(b'<'));
171    debug_assert_eq!(read.next()?, Some(b'<'));
172    read.increment_stack_size()?;
173    read.consume_many(2)?;
174
175    skip_whitespace(read)?;
176
177    parse_triple(read, triple_alloc)?;
178
179    read.check_is_current(b'>')?;
180    read.consume()?;
181    read.check_is_current(b'>')?;
182    read.consume()?;
183    read.decrement_stack_size();
184    skip_whitespace(read)
185}
186
187pub(crate) fn parse_variable<'a>(
188    read: &mut LookAheadByteReader<impl BufRead>,
189    buffer: &'a mut String,
190) -> Result<Variable<'a>, TurtleError> {
191    read.consume()?;
192    let c = read.required_current()?;
193    if c <= MAX_ASCII && (is_possible_pn_chars_u_ascii(c) || c.is_ascii_digit()) {
194        buffer.push(char::from(c))
195    } else {
196        let c = read_utf8_char(read)?;
197        if is_possible_pn_chars_u_unicode(c) {
198            buffer.push(c);
199        } else {
200            read.unexpected_char_error()?
201        }
202    }
203
204    loop {
205        read.consume()?;
206        if let Some(c) = read.current() {
207            if c <= MAX_ASCII
208                && (is_possible_pn_chars_u_ascii(c) || c.is_ascii_digit() || c == 0xb7)
209            {
210                buffer.push(char::from(c))
211            } else {
212                let c = read_utf8_char(read)?;
213                if is_possible_pn_chars_u_unicode(c) {
214                    buffer.push(c);
215                } else {
216                    return Ok(Variable { name: buffer });
217                }
218            }
219        } else {
220            return Ok(Variable { name: buffer });
221        }
222    }
223}
224
225#[cfg(test)]
226mod test {
227    use super::*;
228
229    #[test]
230    fn nquads_relative_irirefs() -> Result<(), Box<dyn std::error::Error>> {
231        // adding this test because there is currently no testsuite specific to N-Quads star
232        let file = r#"<#s> <../p> </o> <//g>."#;
233        let mut count = 0;
234        GeneralizedNQuadsParser::new(file.as_ref()).parse_all(&mut |q| -> Result<
235            (),
236            TurtleError,
237        > {
238            assert!(matches!(
239                q.subject,
240                GeneralizedTerm::NamedNode(NamedNode { iri: "#s" }),
241            ));
242            assert!(matches!(
243                q.predicate,
244                GeneralizedTerm::NamedNode(NamedNode { iri: "../p" }),
245            ));
246            assert!(matches!(
247                q.object,
248                GeneralizedTerm::NamedNode(NamedNode { iri: "/o" }),
249            ));
250            assert!(matches!(
251                q.graph_name,
252                Some(GeneralizedTerm::NamedNode(NamedNode { iri: "//g" })),
253            ));
254            count += 1;
255            Ok(())
256        })?;
257        assert_eq!(1, count);
258        Ok(())
259    }
260
261    #[test]
262    fn nquads_star_valid_quad() -> Result<(), Box<dyn std::error::Error>> {
263        // adding this test because there is currently no testsuite specific to N-Quads star
264        let file =
265            br#"<< "a" _:b <tag:c> >> << "d" ?e <./f> >> << "g" $h <../i> >> << "j" _:k </l> >>."#;
266        let mut count = 0;
267        GeneralizedNQuadsParser::new(file.as_ref()).parse_all(&mut |q| -> Result<
268            (),
269            TurtleError,
270        > {
271            assert!(matches!(
272                q.subject,
273                GeneralizedTerm::Triple([
274                    GeneralizedTerm::Literal(Literal::Simple { value: "a" }),
275                    GeneralizedTerm::BlankNode(BlankNode { id: "b" }),
276                    GeneralizedTerm::NamedNode(NamedNode { iri: "tag:c" }),
277                ])
278            ));
279            assert!(matches!(
280                q.predicate,
281                GeneralizedTerm::Triple([
282                    GeneralizedTerm::Literal(Literal::Simple { value: "d" }),
283                    GeneralizedTerm::Variable(Variable { name: "e" }),
284                    GeneralizedTerm::NamedNode(NamedNode { iri: "./f" }),
285                ])
286            ));
287            assert!(matches!(
288                q.object,
289                GeneralizedTerm::Triple([
290                    GeneralizedTerm::Literal(Literal::Simple { value: "g" }),
291                    GeneralizedTerm::Variable(Variable { name: "h" }),
292                    GeneralizedTerm::NamedNode(NamedNode { iri: "../i" }),
293                ])
294            ));
295            assert!(matches!(
296                q.graph_name,
297                Some(GeneralizedTerm::Triple([
298                    GeneralizedTerm::Literal(Literal::Simple { value: "j" }),
299                    GeneralizedTerm::BlankNode(BlankNode { id: "k" }),
300                    GeneralizedTerm::NamedNode(NamedNode { iri: "/l" }),
301                ]))
302            ));
303            count += 1;
304            Ok(())
305        })?;
306        assert_eq!(1, count);
307        Ok(())
308    }
309
310    #[test]
311    fn nquads_star_invalid_graph_name() {
312        // adding this test because there is currently no testsuite specific to N-Quads star
313        let file = b"<tag:s> <tag:p> << <tag:a> <tag:b> <tag:c> .";
314        let mut count = 0;
315        let res = GeneralizedNQuadsParser::new(file.as_ref()).parse_all(&mut |_| -> Result<
316            (),
317            TurtleError,
318        > {
319            count += 1;
320            Ok(())
321        });
322        assert!(res.is_err());
323    }
324}