1use crate::lexer::{N3Lexer, N3LexerMode, N3LexerOptions, N3Token};
4use crate::toolkit::{Lexer, Parser, RuleRecognizer, RuleRecognizerError, TokenOrLineJump};
5use crate::{MAX_BUFFER_SIZE, MIN_BUFFER_SIZE};
6#[cfg(feature = "rdf-star")]
7use oxrdf::Triple;
8use oxrdf::{BlankNode, GraphName, Literal, NamedNode, Quad, Subject, Term};
9
10pub struct NQuadsRecognizer {
11 stack: Vec<NQuadsState>,
12 subjects: Vec<Subject>,
13 predicates: Vec<NamedNode>,
14 objects: Vec<Term>,
15}
16
17pub struct NQuadsRecognizerContext {
18 with_graph_name: bool,
19 #[cfg(feature = "rdf-star")]
20 with_quoted_triples: bool,
21 lexer_options: N3LexerOptions,
22}
23
24enum NQuadsState {
25 ExpectSubject,
26 ExpectPredicate,
27 ExpectedObject,
28 ExpectPossibleGraphOrEndOfQuotedTriple,
29 ExpectDot,
30 ExpectLiteralAnnotationOrGraphNameOrDot {
31 value: String,
32 },
33 ExpectLiteralDatatype {
34 value: String,
35 },
36 ExpectLineJump,
37 RecoverToLineJump,
38 #[cfg(feature = "rdf-star")]
39 AfterQuotedSubject,
40 #[cfg(feature = "rdf-star")]
41 AfterQuotedObject,
42}
43
44impl RuleRecognizer for NQuadsRecognizer {
45 type TokenRecognizer = N3Lexer;
46 type Output = Quad;
47 type Context = NQuadsRecognizerContext;
48
49 fn error_recovery_state(mut self) -> Self {
50 self.stack.clear();
51 self.stack.push(NQuadsState::RecoverToLineJump);
52 self.subjects.clear();
53 self.predicates.clear();
54 self.objects.clear();
55 self
56 }
57
58 fn recognize_next(
59 mut self,
60 token: TokenOrLineJump<N3Token<'_>>,
61 context: &mut NQuadsRecognizerContext,
62 results: &mut Vec<Quad>,
63 errors: &mut Vec<RuleRecognizerError>,
64 ) -> Self {
65 match self.stack.pop().unwrap_or(NQuadsState::ExpectSubject) {
66 NQuadsState::ExpectSubject => {
67 let TokenOrLineJump::Token(token) = token else {
68 return if self.stack.is_empty() {
69 self
70 } else {
71 self.error(
72 context,
73 results,
74 errors,
75 token,
76 "line jumps are not allowed inside of quoted triples",
77 )
78 };
79 };
80 match token {
81 N3Token::IriRef(s) => {
82 self.subjects.push(NamedNode::new_unchecked(s).into());
83 self.stack.push(NQuadsState::ExpectPredicate);
84 self
85 }
86 N3Token::BlankNodeLabel(s) => {
87 self.subjects.push(BlankNode::new_unchecked(s).into());
88 self.stack.push(NQuadsState::ExpectPredicate);
89 self
90 }
91 #[cfg(feature = "rdf-star")]
92 N3Token::Punctuation("<<") if context.with_quoted_triples => {
93 self.stack.push(NQuadsState::AfterQuotedSubject);
94 self.stack.push(NQuadsState::ExpectSubject);
95 self
96 }
97 _ => self.error(
98 context,
99 results,
100 errors,
101 TokenOrLineJump::Token(token),
102 "The subject of a triple must be an IRI or a blank node",
103 ),
104 }
105 }
106 NQuadsState::ExpectPredicate => {
107 let TokenOrLineJump::Token(token) = token else {
108 return self.error(
109 context,
110 results,
111 errors,
112 token,
113 "line jumps are not allowed in the middle of triples",
114 );
115 };
116 match token {
117 N3Token::IriRef(p) => {
118 self.predicates.push(NamedNode::new_unchecked(p));
119 self.stack.push(NQuadsState::ExpectedObject);
120 self
121 }
122 _ => self.error(
123 context,
124 results,
125 errors,
126 TokenOrLineJump::Token(token),
127 "The predicate of a triple must be an IRI",
128 ),
129 }
130 }
131 NQuadsState::ExpectedObject => {
132 let TokenOrLineJump::Token(token) = token else {
133 return self.error(
134 context,
135 results,
136 errors,
137 token,
138 "line jumps are not allowed in the middle of triples",
139 );
140 };
141 match token {
142 N3Token::IriRef(o) => {
143 self.objects.push(NamedNode::new_unchecked(o).into());
144 self.stack
145 .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
146 self
147 }
148 N3Token::BlankNodeLabel(o) => {
149 self.objects.push(BlankNode::new_unchecked(o).into());
150 self.stack
151 .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
152 self
153 }
154 N3Token::String(value) => {
155 self.stack
156 .push(NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value });
157 self
158 }
159 #[cfg(feature = "rdf-star")]
160 N3Token::Punctuation("<<") if context.with_quoted_triples => {
161 self.stack.push(NQuadsState::AfterQuotedObject);
162 self.stack.push(NQuadsState::ExpectSubject);
163 self
164 }
165 _ => self.error(
166 context,
167 results,
168 errors,
169 TokenOrLineJump::Token(token),
170 "The object of a triple must be an IRI, a blank node or a literal",
171 ),
172 }
173 }
174 NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value } => match token {
175 TokenOrLineJump::Token(N3Token::LangTag(lang_tag)) => {
176 self.objects.push(
177 Literal::new_language_tagged_literal_unchecked(
178 value,
179 lang_tag.to_ascii_lowercase(),
180 )
181 .into(),
182 );
183 self.stack
184 .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
185 self
186 }
187 TokenOrLineJump::Token(N3Token::Punctuation("^^")) => {
188 self.stack
189 .push(NQuadsState::ExpectLiteralDatatype { value });
190 self
191 }
192 _ => {
193 self.objects.push(Literal::new_simple_literal(value).into());
194 self.stack
195 .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
196 self.recognize_next(token, context, results, errors)
197 }
198 },
199 NQuadsState::ExpectLiteralDatatype { value } => {
200 let TokenOrLineJump::Token(token) = token else {
201 return self.error(
202 context,
203 results,
204 errors,
205 token,
206 "line jumps are not allowed in the middle of triples",
207 );
208 };
209 match token {
210 N3Token::IriRef(d) => {
211 self.objects.push(
212 Literal::new_typed_literal(value, NamedNode::new_unchecked(d)).into(),
213 );
214 self.stack
215 .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
216 self
217 }
218 _ => self.error(
219 context,
220 results,
221 errors,
222 TokenOrLineJump::Token(token),
223 "A literal datatype must be an IRI",
224 ),
225 }
226 }
227 NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple => {
228 if self.stack.is_empty() {
229 match token {
230 TokenOrLineJump::Token(N3Token::IriRef(g)) if context.with_graph_name => {
231 self.emit_quad(results, NamedNode::new_unchecked(g).into());
232 self.stack.push(NQuadsState::ExpectDot);
233 self
234 }
235 TokenOrLineJump::Token(N3Token::BlankNodeLabel(g))
236 if context.with_graph_name =>
237 {
238 self.emit_quad(results, BlankNode::new_unchecked(g).into());
239 self.stack.push(NQuadsState::ExpectDot);
240 self
241 }
242 _ => {
243 self.emit_quad(results, GraphName::DefaultGraph);
244 self.stack.push(NQuadsState::ExpectDot);
245 self.recognize_next(token, context, results, errors)
246 }
247 }
248 } else if token == TokenOrLineJump::Token(N3Token::Punctuation(">>")) {
249 self
250 } else {
251 self.error(
252 context,
253 results,
254 errors,
255 token,
256 "Expecting the end of a quoted triple '>>'",
257 )
258 }
259 }
260 NQuadsState::ExpectDot => {
261 let TokenOrLineJump::Token(token) = token else {
262 return self
263 .error(
264 context,
265 results,
266 errors,
267 token,
268 "Quads must be followed by a dot",
269 )
270 .recognize_next(TokenOrLineJump::LineJump, context, results, errors);
271 };
272 if let N3Token::Punctuation(".") = token {
273 self.stack.push(NQuadsState::ExpectLineJump);
274 self
275 } else {
276 errors.push("Quads must be followed by a dot".into());
277 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
278 }
279 }
280 NQuadsState::ExpectLineJump => {
281 let TokenOrLineJump::Token(token) = token else {
282 return self;
283 };
284 errors.push(
285 format!(
286 "Only a single triple or quad can be written in a line, found {token:?}"
287 )
288 .into(),
289 );
290 self.recognize_next(TokenOrLineJump::Token(token), context, results, errors)
291 }
292 #[cfg(feature = "rdf-star")]
293 NQuadsState::AfterQuotedSubject => {
294 let triple = Triple {
295 subject: self.subjects.pop().unwrap(),
296 predicate: self.predicates.pop().unwrap(),
297 object: self.objects.pop().unwrap(),
298 };
299 self.subjects.push(triple.into());
300 self.stack.push(NQuadsState::ExpectPredicate);
301 self.recognize_next(token, context, results, errors)
302 }
303 #[cfg(feature = "rdf-star")]
304 NQuadsState::AfterQuotedObject => {
305 let triple = Triple {
306 subject: self.subjects.pop().unwrap(),
307 predicate: self.predicates.pop().unwrap(),
308 object: self.objects.pop().unwrap(),
309 };
310 self.objects.push(triple.into());
311 self.stack
312 .push(NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple);
313 self.recognize_next(token, context, results, errors)
314 }
315 NQuadsState::RecoverToLineJump => {
316 if token != TokenOrLineJump::LineJump {
317 self.stack.push(NQuadsState::RecoverToLineJump);
318 }
319 self
320 }
321 }
322 }
323
324 fn recognize_end(
325 mut self,
326 _context: &mut NQuadsRecognizerContext,
327 results: &mut Vec<Quad>,
328 errors: &mut Vec<RuleRecognizerError>,
329 ) {
330 match &*self.stack {
331 [NQuadsState::ExpectSubject | NQuadsState::ExpectLineJump] | [] => (),
332 [NQuadsState::ExpectDot] => errors.push("Triples must be followed by a dot".into()),
333 [NQuadsState::ExpectPossibleGraphOrEndOfQuotedTriple] => {
334 self.emit_quad(results, GraphName::DefaultGraph);
335 errors.push("Triples must be followed by a dot".into())
336 }
337 [NQuadsState::ExpectLiteralAnnotationOrGraphNameOrDot { value }] => {
338 self.objects.push(Literal::new_simple_literal(value).into());
339 self.emit_quad(results, GraphName::DefaultGraph);
340 errors.push("Triples must be followed by a dot".into())
341 }
342 _ => errors.push("Unexpected end".into()), }
344 }
345
346 fn lexer_options(context: &NQuadsRecognizerContext) -> &N3LexerOptions {
347 &context.lexer_options
348 }
349}
350
351impl NQuadsRecognizer {
352 #[allow(clippy::fn_params_excessive_bools)]
353 pub fn new_parser<B>(
354 data: B,
355 is_ending: bool,
356 with_graph_name: bool,
357 #[cfg(feature = "rdf-star")] with_quoted_triples: bool,
358 unchecked: bool,
359 ) -> Parser<B, Self> {
360 Parser::new(
361 Lexer::new(
362 N3Lexer::new(N3LexerMode::NTriples, unchecked),
363 data,
364 is_ending,
365 MIN_BUFFER_SIZE,
366 MAX_BUFFER_SIZE,
367 Some(b"#"),
368 ),
369 Self {
370 stack: vec![NQuadsState::ExpectSubject],
371 subjects: Vec::new(),
372 predicates: Vec::new(),
373 objects: Vec::new(),
374 },
375 NQuadsRecognizerContext {
376 with_graph_name,
377 #[cfg(feature = "rdf-star")]
378 with_quoted_triples,
379 lexer_options: N3LexerOptions::default(),
380 },
381 )
382 }
383
384 #[must_use]
385 fn error(
386 self,
387 context: &mut NQuadsRecognizerContext,
388 results: &mut Vec<Quad>,
389 errors: &mut Vec<RuleRecognizerError>,
390 token: TokenOrLineJump<N3Token<'_>>,
391 msg: impl Into<RuleRecognizerError>,
392 ) -> Self {
393 errors.push(msg.into());
394 let this = self.error_recovery_state();
395 match token {
396 TokenOrLineJump::Token(_) => this,
397 TokenOrLineJump::LineJump => this.recognize_next(token, context, results, errors), }
399 }
400
401 fn emit_quad(&mut self, results: &mut Vec<Quad>, graph_name: GraphName) {
402 results.push(Quad {
403 subject: self.subjects.pop().unwrap(),
404 predicate: self.predicates.pop().unwrap(),
405 object: self.objects.pop().unwrap(),
406 graph_name,
407 })
408 }
409}