1use crate::error::*;
4use crate::shared::*;
5use crate::triple_allocator::TripleAllocator;
6use crate::utils::*;
7use rio_api::model::*;
8use rio_api::parser::*;
9use std::io::BufRead;
10
11pub struct NTriplesParser<R: BufRead> {
45 read: LookAheadByteReader<R>,
46 triple_alloc: TripleAllocator,
47}
48
49impl<R: BufRead> NTriplesParser<R> {
50 pub fn new(reader: R) -> Self {
51 Self {
52 read: LookAheadByteReader::new(reader),
53 triple_alloc: TripleAllocator::new(),
54 }
55 }
56}
57
58impl<R: BufRead> TriplesParser for NTriplesParser<R> {
59 type Error = TurtleError;
60
61 fn parse_step<E: From<TurtleError>>(
62 &mut self,
63 on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
64 ) -> Result<(), E> {
65 match parse_triple_line(&mut self.read, &mut self.triple_alloc) {
66 Ok(true) => match on_triple(*self.triple_alloc.top()) {
67 Ok(()) => {
68 self.triple_alloc.pop_top_triple();
69 debug_assert_eq!(self.triple_alloc.complete_len(), 0);
70 debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
71 Ok(())
72 }
73 Err(err) => {
74 self.triple_alloc.clear();
75 Err(err)
76 }
77 },
78 Ok(false) => Ok(()),
79 Err(error) => {
80 self.read.consume_line_end()?;
81 self.triple_alloc.clear();
82 Err(E::from(error))
83 }
84 }
85 }
86
87 fn is_end(&self) -> bool {
88 self.read.current().is_none()
89 }
90}
91
92pub struct NQuadsParser<R: BufRead> {
125 read: LookAheadByteReader<R>,
126 triple_alloc: TripleAllocator,
127 graph_name_buf: String,
128}
129
130impl<R: BufRead> NQuadsParser<R> {
131 pub fn new(reader: R) -> Self {
132 Self {
133 read: LookAheadByteReader::new(reader),
134 triple_alloc: TripleAllocator::new(),
135 graph_name_buf: String::default(),
136 }
137 }
138}
139
140impl<R: BufRead> QuadsParser for NQuadsParser<R> {
141 type Error = TurtleError;
142
143 fn parse_step<E: From<TurtleError>>(
144 &mut self,
145 on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
146 ) -> Result<(), E> {
147 match parse_quad_line(
148 &mut self.read,
149 &mut self.triple_alloc,
150 &mut self.graph_name_buf,
151 ) {
152 Ok(Some(opt_graph_name)) => match on_quad(self.triple_alloc.top_quad(opt_graph_name)) {
153 Ok(()) => {
154 self.triple_alloc.pop_top_triple();
155 debug_assert_eq!(self.triple_alloc.complete_len(), 0);
156 debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
157 Ok(())
158 }
159 Err(err) => {
160 self.triple_alloc.clear();
161 Err(err)
162 }
163 },
164 Ok(None) => Ok(()),
165 Err(error) => {
166 self.read.consume_line_end()?;
167 self.triple_alloc.clear();
168 Err(E::from(error))
169 }
170 }
171 }
172
173 fn is_end(&self) -> bool {
174 self.read.current().is_none()
175 }
176}
177
178fn parse_triple_line(
179 read: &mut LookAheadByteReader<impl BufRead>,
180 triple_alloc: &mut TripleAllocator,
181) -> Result<bool, TurtleError> {
182 skip_whitespace(read)?;
183
184 if matches!(
185 read.current(),
186 None | Some(b'#') | Some(b'\r') | Some(b'\n')
187 ) {
188 skip_until_eol(read)?;
189 return Ok(false);
190 }
191
192 parse_triple(read, triple_alloc)?;
193
194 read.check_is_current(b'.')?;
195 read.consume()?;
196 skip_whitespace(read)?;
197
198 match read.current() {
199 None | Some(b'#') | Some(b'\r') | Some(b'\n') => skip_until_eol(read)?,
200 _ => read.unexpected_char_error()?,
201 }
202
203 Ok(true)
204}
205
206fn parse_triple(
207 read: &mut LookAheadByteReader<impl BufRead>,
208 triple_alloc: &mut TripleAllocator,
209) -> Result<(), TurtleError> {
210 triple_alloc.push_triple_start();
211
212 parse_subject(read, triple_alloc)?;
213 skip_whitespace(read)?;
214
215 triple_alloc.try_push_predicate(|b| parse_iriref(read, b))?;
216 skip_whitespace(read)?;
217
218 parse_object(read, triple_alloc)?;
219 skip_whitespace(read)?;
220
221 Ok(())
222}
223
224fn parse_quad_line<'a>(
225 read: &mut LookAheadByteReader<impl BufRead>,
226 triple_alloc: &mut TripleAllocator,
227 graph_name_buf: &'a mut String,
228) -> Result<Option<Option<GraphName<'a>>>, TurtleError> {
229 skip_whitespace(read)?;
230
231 if matches!(
232 read.current(),
233 None | Some(b'#') | Some(b'\r') | Some(b'\n')
234 ) {
235 skip_until_eol(read)?;
236 return Ok(None);
237 }
238
239 parse_triple(read, triple_alloc)?;
240 let opt_graph_name = match read.current() {
241 Some(b'<') | Some(b'_') => {
242 graph_name_buf.clear();
243 Some(parse_graph_name(read, graph_name_buf)?)
244 }
245 _ => None,
246 };
247 skip_whitespace(read)?;
248
249 read.check_is_current(b'.')?;
250 read.consume()?;
251 skip_whitespace(read)?;
252
253 match read.current() {
254 None | Some(b'#') | Some(b'\r') | Some(b'\n') => skip_until_eol(read)?,
255 _ => read.unexpected_char_error()?,
256 }
257
258 Ok(Some(opt_graph_name))
259}
260
261fn parse_subject(
262 read: &mut LookAheadByteReader<impl BufRead>,
263 triple_alloc: &mut TripleAllocator,
264) -> Result<(), TurtleError> {
265 match read.required_current()? {
266 b'<' => match read.required_next()? {
267 b'<' => {
268 parse_quoted_triple(read, triple_alloc)?;
269 triple_alloc.push_subject_triple();
270 Ok(())
271 }
272 _ => triple_alloc.try_push_subject(|b| parse_iriref(read, b).map(Subject::from)),
273 },
274 b'_' => {
275 triple_alloc.try_push_subject(|b| parse_blank_node_label(read, b).map(Subject::from))
276 }
277 _ => read.unexpected_char_error(),
278 }
279}
280
281fn parse_object(
282 read: &mut LookAheadByteReader<impl BufRead>,
283 triple_alloc: &mut TripleAllocator,
284) -> Result<(), TurtleError> {
285 match read.required_current()? {
286 b'<' => match read.required_next()? {
287 b'<' => {
288 parse_quoted_triple(read, triple_alloc)?;
289 triple_alloc.push_object_triple();
290 Ok(())
291 }
292 _ => triple_alloc.try_push_object(|b, _| parse_iriref(read, b).map(Term::from)),
293 },
294 b'_' => {
295 triple_alloc.try_push_object(|b, _| parse_blank_node_label(read, b).map(Term::from))
296 }
297 b'"' => triple_alloc.try_push_object(|b1, b2| parse_literal(read, b1, b2).map(Term::from)),
298 _ => read.unexpected_char_error(),
299 }
300}
301
302fn parse_quoted_triple(
303 read: &mut LookAheadByteReader<impl BufRead>,
304 triple_alloc: &mut TripleAllocator,
305) -> Result<(), TurtleError> {
306 debug_assert_eq!(read.current(), Some(b'<'));
307 debug_assert_eq!(read.next()?, Some(b'<'));
308 read.increment_stack_size()?;
309 read.consume_many(2)?;
310
311 skip_whitespace(read)?;
312
313 parse_triple(read, triple_alloc)?;
314
315 read.check_is_current(b'>')?;
316 read.consume()?;
317 read.check_is_current(b'>')?;
318 read.consume()?;
319 read.decrement_stack_size();
320 skip_whitespace(read)
321}
322
323fn parse_graph_name<'a>(
324 read: &mut LookAheadByteReader<impl BufRead>,
325 buffer: &'a mut String,
326) -> Result<GraphName<'a>, TurtleError> {
327 match read.required_current()? {
328 b'<' => Ok(parse_iriref(read, buffer)?.into()),
329 b'_' => Ok(parse_blank_node_label(read, buffer)?.into()),
330 _ => read.unexpected_char_error(),
331 }
332}
333
334pub(crate) fn parse_literal<'a>(
335 read: &mut LookAheadByteReader<impl BufRead>,
336 buffer: &'a mut String,
337 annotation_buffer: &'a mut String,
338) -> Result<Literal<'a>, TurtleError> {
339 parse_string_literal_quote(read, buffer)?;
340 skip_whitespace(read)?;
341
342 match read.current() {
343 Some(b'@') => {
344 parse_langtag(read, annotation_buffer)?;
345 Ok(Literal::LanguageTaggedString {
346 value: buffer,
347 language: annotation_buffer,
348 })
349 }
350 Some(b'^') => {
351 read.consume()?;
352 read.check_is_current(b'^')?;
353 read.consume()?;
354 skip_whitespace(read)?;
355 Ok(Literal::Typed {
356 value: buffer,
357 datatype: parse_iriref(read, annotation_buffer)?,
358 })
359 }
360 _ => Ok(Literal::Simple { value: buffer }),
361 }
362}
363
364pub(crate) fn skip_whitespace(
365 read: &mut LookAheadByteReader<impl BufRead>,
366) -> Result<(), TurtleError> {
367 loop {
368 match read.current() {
369 Some(b' ') | Some(b'\t') => read.consume()?,
370 _ => return Ok(()),
371 }
372 }
373}
374
375pub(crate) fn skip_until_eol(
376 read: &mut LookAheadByteReader<impl BufRead>,
377) -> Result<(), TurtleError> {
378 loop {
379 match read.current() {
380 None => return Ok(()),
381 Some(b'\n') => {
382 read.consume()?;
383 return Ok(());
384 }
385 _ => (),
386 }
387 read.consume()?;
388 }
389}
390
391pub(crate) fn parse_iriref<'a>(
392 read: &mut LookAheadByteReader<impl BufRead>,
393 buffer: &'a mut String,
394) -> Result<NamedNode<'a>, TurtleError> {
395 parse_iriref_absolute(read, buffer)?;
396 Ok(NamedNode { iri: buffer })
397}
398
399#[cfg(test)]
400mod test {
401 #[test]
402 fn nquads_star_valid_quad() -> Result<(), Box<dyn std::error::Error>> {
403 use crate::{NQuadsParser, TurtleError};
405 use rio_api::parser::QuadsParser;
406 let file = b"<< <tag:a> <tag:b> <tag:c> >> <tag:d> << <tag:e> <tag:f> <tag:g> >> <tag:h>.";
407 let mut count = 0;
408 NQuadsParser::new(file.as_ref()).parse_all(&mut |_| -> Result<(), TurtleError> {
409 count += 1;
410 Ok(())
411 })?;
412 assert_eq!(1, count);
413 Ok(())
414 }
415
416 #[test]
417 fn nquads_star_invalid_graph_name() {
418 use crate::{NQuadsParser, TurtleError};
420 use rio_api::parser::QuadsParser;
421 let file = b"<tag:s> <tag:p> <tag:o> << <tag:a> <tag:b> <tag:c> >> .";
422 let mut count = 0;
423 let res = NQuadsParser::new(file.as_ref()).parse_all(&mut |_| -> Result<(), TurtleError> {
424 count += 1;
425 Ok(())
426 });
427 assert!(res.is_err());
428 }
429}