1use crate::error::*;
4use crate::gtriple_allocator::GeneralizedTripleAllocator;
5use crate::ntriples::{parse_literal, skip_until_eol, skip_whitespace};
6use crate::shared::*;
7use crate::utils::*;
8use oxiri::IriRef;
9use rio_api::model::*;
10use rio_api::parser::*;
11use std::io::BufRead;
12
13pub struct GeneralizedNQuadsParser<R: BufRead> {
18 read: LookAheadByteReader<R>,
19 triple_alloc: GeneralizedTripleAllocator,
20 graph_name_alloc: GeneralizedTripleAllocator,
21}
22
23impl<R: BufRead> GeneralizedNQuadsParser<R> {
24 pub fn new(reader: R) -> Self {
25 Self {
26 read: LookAheadByteReader::new(reader),
27 triple_alloc: GeneralizedTripleAllocator::new(),
28 graph_name_alloc: GeneralizedTripleAllocator::new(),
29 }
30 }
31}
32
33impl<R: BufRead> GeneralizedQuadsParser for GeneralizedNQuadsParser<R> {
34 type Error = TurtleError;
35
36 fn parse_step<E: From<TurtleError>>(
37 &mut self,
38 on_quad: &mut impl FnMut(GeneralizedQuad<'_>) -> Result<(), E>,
39 ) -> Result<(), E> {
40 match self.parse_quad_line() {
41 Ok(Some(named_graph)) => {
42 match on_quad(self.triple_alloc.top_quad(
43 named_graph.then(|| self.graph_name_alloc.current_subject().unwrap()),
44 )) {
45 Ok(()) => {
46 if named_graph {
47 self.graph_name_alloc.pop_term(0);
49 self.graph_name_alloc.pop_top_empty_triple();
50 }
51 self.triple_alloc.pop_top_triple();
52 debug_assert_eq!(self.triple_alloc.complete_len(), 0);
53 debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
54 debug_assert_eq!(self.graph_name_alloc.complete_len(), 0);
55 debug_assert_eq!(self.graph_name_alloc.incomplete_len(), 0);
56 Ok(())
57 }
58 Err(err) => {
59 self.triple_alloc.clear();
60 Err(err)
61 }
62 }
63 }
64 Ok(None) => Ok(()),
65 Err(error) => {
66 self.read.consume_line_end()?;
67 self.triple_alloc.clear();
68 Err(E::from(error))
69 }
70 }
71 }
72
73 fn is_end(&self) -> bool {
74 self.read.current().is_none()
75 }
76}
77
78impl<R: BufRead> GeneralizedNQuadsParser<R> {
79 fn parse_quad_line(&mut self) -> Result<Option<bool>, TurtleError> {
80 let read = &mut self.read;
81 let triple_alloc = &mut &mut self.triple_alloc;
82
83 skip_whitespace(read)?;
84
85 if matches!(
86 read.current(),
87 None | Some(b'#') | Some(b'\r') | Some(b'\n')
88 ) {
89 skip_until_eol(read)?;
90 return Ok(None);
91 }
92
93 parse_triple(read, triple_alloc)?;
94 let named_graph = match read.current() {
95 Some(b'.') => false,
96 _ => {
97 self.graph_name_alloc.push_triple_start();
98 parse_term(0, read, &mut self.graph_name_alloc)?;
99 skip_whitespace(read)?;
100 true
101 }
102 };
103
104 read.check_is_current(b'.')?;
105 read.consume()?;
106 skip_whitespace(read)?;
107
108 match read.current() {
109 None | Some(b'#') | Some(b'\r') | Some(b'\n') => skip_until_eol(read)?,
110 _ => read.unexpected_char_error()?,
111 }
112
113 Ok(Some(named_graph))
114 }
115}
116
117fn parse_triple(
118 read: &mut LookAheadByteReader<impl BufRead>,
119 triple_alloc: &mut GeneralizedTripleAllocator,
120) -> Result<(), TurtleError> {
121 triple_alloc.push_triple_start();
122
123 for i in 0..3 {
124 parse_term(i, read, triple_alloc)?;
125 skip_whitespace(read)?;
126 }
127 Ok(())
128}
129
130fn parse_term(
131 pos: usize,
132 read: &mut LookAheadByteReader<impl BufRead>,
133 triple_alloc: &mut GeneralizedTripleAllocator,
134) -> Result<(), TurtleError> {
135 match read.required_current()? {
136 b'<' => match read.required_next()? {
137 b'<' => {
138 parse_quoted_triple(read, triple_alloc)?;
139 triple_alloc.push_quoted_triple(pos);
140 Ok(())
141 }
142 _ => triple_alloc.try_push_atom(pos, |b, _| {
143 parse_iriref(read, b)?;
144 IriRef::parse(b.as_str()).map_err(|error| {
145 read.parse_error(TurtleErrorKind::InvalidIri {
146 iri: b.to_owned(),
147 error,
148 })
149 })?;
150 Ok(NamedNode { iri: b }.into())
151 }),
152 },
153 b'_' => triple_alloc.try_push_atom(pos, |b, _| {
154 parse_blank_node_label(read, b).map(GeneralizedTerm::from)
155 }),
156 b'"' => triple_alloc.try_push_atom(pos, |b1, b2| {
157 parse_literal(read, b1, b2).map(GeneralizedTerm::from)
158 }),
159 b'?' | b'$' => triple_alloc.try_push_atom(pos, |b, _| {
160 parse_variable(read, b).map(GeneralizedTerm::from)
161 }),
162 _ => read.unexpected_char_error(),
163 }
164}
165
166fn parse_quoted_triple(
167 read: &mut LookAheadByteReader<impl BufRead>,
168 triple_alloc: &mut GeneralizedTripleAllocator,
169) -> Result<(), TurtleError> {
170 debug_assert_eq!(read.current(), Some(b'<'));
171 debug_assert_eq!(read.next()?, Some(b'<'));
172 read.increment_stack_size()?;
173 read.consume_many(2)?;
174
175 skip_whitespace(read)?;
176
177 parse_triple(read, triple_alloc)?;
178
179 read.check_is_current(b'>')?;
180 read.consume()?;
181 read.check_is_current(b'>')?;
182 read.consume()?;
183 read.decrement_stack_size();
184 skip_whitespace(read)
185}
186
187pub(crate) fn parse_variable<'a>(
188 read: &mut LookAheadByteReader<impl BufRead>,
189 buffer: &'a mut String,
190) -> Result<Variable<'a>, TurtleError> {
191 read.consume()?;
192 let c = read.required_current()?;
193 if c <= MAX_ASCII && (is_possible_pn_chars_u_ascii(c) || c.is_ascii_digit()) {
194 buffer.push(char::from(c))
195 } else {
196 let c = read_utf8_char(read)?;
197 if is_possible_pn_chars_u_unicode(c) {
198 buffer.push(c);
199 } else {
200 read.unexpected_char_error()?
201 }
202 }
203
204 loop {
205 read.consume()?;
206 if let Some(c) = read.current() {
207 if c <= MAX_ASCII
208 && (is_possible_pn_chars_u_ascii(c) || c.is_ascii_digit() || c == 0xb7)
209 {
210 buffer.push(char::from(c))
211 } else {
212 let c = read_utf8_char(read)?;
213 if is_possible_pn_chars_u_unicode(c) {
214 buffer.push(c);
215 } else {
216 return Ok(Variable { name: buffer });
217 }
218 }
219 } else {
220 return Ok(Variable { name: buffer });
221 }
222 }
223}
224
225#[cfg(test)]
226mod test {
227 use super::*;
228
229 #[test]
230 fn nquads_relative_irirefs() -> Result<(), Box<dyn std::error::Error>> {
231 let file = r#"<#s> <../p> </o> <//g>."#;
233 let mut count = 0;
234 GeneralizedNQuadsParser::new(file.as_ref()).parse_all(&mut |q| -> Result<
235 (),
236 TurtleError,
237 > {
238 assert!(matches!(
239 q.subject,
240 GeneralizedTerm::NamedNode(NamedNode { iri: "#s" }),
241 ));
242 assert!(matches!(
243 q.predicate,
244 GeneralizedTerm::NamedNode(NamedNode { iri: "../p" }),
245 ));
246 assert!(matches!(
247 q.object,
248 GeneralizedTerm::NamedNode(NamedNode { iri: "/o" }),
249 ));
250 assert!(matches!(
251 q.graph_name,
252 Some(GeneralizedTerm::NamedNode(NamedNode { iri: "//g" })),
253 ));
254 count += 1;
255 Ok(())
256 })?;
257 assert_eq!(1, count);
258 Ok(())
259 }
260
261 #[test]
262 fn nquads_star_valid_quad() -> Result<(), Box<dyn std::error::Error>> {
263 let file =
265 br#"<< "a" _:b <tag:c> >> << "d" ?e <./f> >> << "g" $h <../i> >> << "j" _:k </l> >>."#;
266 let mut count = 0;
267 GeneralizedNQuadsParser::new(file.as_ref()).parse_all(&mut |q| -> Result<
268 (),
269 TurtleError,
270 > {
271 assert!(matches!(
272 q.subject,
273 GeneralizedTerm::Triple([
274 GeneralizedTerm::Literal(Literal::Simple { value: "a" }),
275 GeneralizedTerm::BlankNode(BlankNode { id: "b" }),
276 GeneralizedTerm::NamedNode(NamedNode { iri: "tag:c" }),
277 ])
278 ));
279 assert!(matches!(
280 q.predicate,
281 GeneralizedTerm::Triple([
282 GeneralizedTerm::Literal(Literal::Simple { value: "d" }),
283 GeneralizedTerm::Variable(Variable { name: "e" }),
284 GeneralizedTerm::NamedNode(NamedNode { iri: "./f" }),
285 ])
286 ));
287 assert!(matches!(
288 q.object,
289 GeneralizedTerm::Triple([
290 GeneralizedTerm::Literal(Literal::Simple { value: "g" }),
291 GeneralizedTerm::Variable(Variable { name: "h" }),
292 GeneralizedTerm::NamedNode(NamedNode { iri: "../i" }),
293 ])
294 ));
295 assert!(matches!(
296 q.graph_name,
297 Some(GeneralizedTerm::Triple([
298 GeneralizedTerm::Literal(Literal::Simple { value: "j" }),
299 GeneralizedTerm::BlankNode(BlankNode { id: "k" }),
300 GeneralizedTerm::NamedNode(NamedNode { iri: "/l" }),
301 ]))
302 ));
303 count += 1;
304 Ok(())
305 })?;
306 assert_eq!(1, count);
307 Ok(())
308 }
309
310 #[test]
311 fn nquads_star_invalid_graph_name() {
312 let file = b"<tag:s> <tag:p> << <tag:a> <tag:b> <tag:c> .";
314 let mut count = 0;
315 let res = GeneralizedNQuadsParser::new(file.as_ref()).parse_all(&mut |_| -> Result<
316 (),
317 TurtleError,
318 > {
319 count += 1;
320 Ok(())
321 });
322 assert!(res.is_err());
323 }
324}