1use crate::vocab::xsd;
2#[cfg(feature = "rdf-star")]
3use crate::Subject;
4use crate::{
5 BlankNode, BlankNodeIdParseError, GraphName, IriParseError, LanguageTagParseError, Literal,
6 NamedNode, Quad, Term, Triple, Variable, VariableNameParseError,
7};
8use std::borrow::Cow;
9use std::char;
10use std::str::{Chars, FromStr};
11
12const MAX_NUMBER_OF_NESTED_TRIPLES: usize = 128;
15
16impl FromStr for NamedNode {
17 type Err = TermParseError;
18
19 fn from_str(s: &str) -> Result<Self, Self::Err> {
32 let (term, left) = read_named_node(s)?;
33 if !left.is_empty() {
34 return Err(Self::Err::msg(
35 "Named node serialization should end with a >",
36 ));
37 }
38 Ok(term)
39 }
40}
41
42impl FromStr for BlankNode {
43 type Err = TermParseError;
44
45 fn from_str(s: &str) -> Result<Self, Self::Err> {
55 let (term, left) = read_blank_node(s)?;
56 if !left.is_empty() {
57 return Err(Self::Err::msg(
58 "Blank node serialization should not contain whitespaces",
59 ));
60 }
61 Ok(term)
62 }
63}
64
65impl FromStr for Literal {
66 type Err = TermParseError;
67
68 fn from_str(s: &str) -> Result<Self, Self::Err> {
109 let (term, left) = read_literal(s)?;
110 if !left.is_empty() {
111 return Err(Self::Err::msg("Invalid literal serialization"));
112 }
113 Ok(term)
114 }
115}
116
117impl FromStr for Term {
118 type Err = TermParseError;
119
120 fn from_str(s: &str) -> Result<Self, Self::Err> {
133 let (term, left) = read_term(s, 0)?;
134 if !left.is_empty() {
135 return Err(Self::Err::msg("Invalid term serialization"));
136 }
137 Ok(term)
138 }
139}
140
141impl FromStr for Triple {
142 type Err = TermParseError;
143
144 fn from_str(s: &str) -> Result<Self, Self::Err> {
161 let (triple, left) = read_triple(s, 0)?;
162 if !matches!(left.trim(), "" | ".") {
163 return Err(Self::Err::msg("Invalid triple serialization"));
164 }
165 Ok(triple)
166 }
167}
168
169impl FromStr for Quad {
170 type Err = TermParseError;
171
172 fn from_str(s: &str) -> Result<Self, Self::Err> {
199 let (triple, left) = read_triple(s, 0)?;
200 if matches!(left.trim(), "" | ".") {
201 return Ok(triple.in_graph(GraphName::DefaultGraph));
202 }
203 let (graph_name, left) = read_term(left, 0)?;
204 if !matches!(left.trim(), "" | ".") {
205 return Err(Self::Err::msg("Invalid triple serialization"));
206 }
207 Ok(triple.in_graph(match graph_name {
208 Term::NamedNode(graph_name) => GraphName::from(graph_name),
209 Term::BlankNode(graph_name) => GraphName::from(graph_name),
210 Term::Literal(_) => {
211 return Err(TermParseError::msg(
212 "Literals are not allowed in graph name position",
213 ));
214 }
215 #[cfg(feature = "rdf-star")]
216 Term::Triple(_) => {
217 return Err(TermParseError::msg(
218 "Triple terms are not allowed in graph name position",
219 ));
220 }
221 }))
222 }
223}
224
225impl FromStr for Variable {
226 type Err = TermParseError;
227
228 fn from_str(s: &str) -> Result<Self, Self::Err> {
238 if !s.starts_with('?') && !s.starts_with('$') {
239 return Err(Self::Err::msg(
240 "Variable serialization should start with ? or $",
241 ));
242 }
243 Self::new(&s[1..]).map_err(|error| {
244 TermParseError(TermParseErrorKind::Variable {
245 value: s.to_owned(),
246 error,
247 })
248 })
249 }
250}
251
252fn read_named_node(s: &str) -> Result<(NamedNode, &str), TermParseError> {
253 let s = s.trim();
254 if let Some(remain) = s.strip_prefix('<') {
255 let end = remain
256 .find('>')
257 .ok_or_else(|| TermParseError::msg("Named node serialization should end with a >"))?;
258 let (value, remain) = remain.split_at(end);
259 let remain = &remain[1..];
260 let value = if value.contains('\\') {
261 let mut escaped = String::with_capacity(value.len());
262 let mut chars = value.chars();
263 while let Some(c) = chars.next() {
264 if c == '\\' {
265 match chars.next() {
266 Some('u') => escaped.push(read_hexa_char(&mut chars, 4)?),
267 Some('U') => escaped.push(read_hexa_char(&mut chars, 8)?),
268 Some(c) => {
269 escaped.push('\\');
270 escaped.push(c);
271 }
272 None => escaped.push('\\'),
273 }
274 } else {
275 escaped.push(c);
276 }
277 }
278 Cow::Owned(escaped)
279 } else {
280 Cow::Borrowed(value)
281 };
282 let term = NamedNode::new(value.as_ref()).map_err(|error| {
283 TermParseError(TermParseErrorKind::Iri {
284 value: value.into_owned(),
285 error,
286 })
287 })?;
288 Ok((term, remain))
289 } else {
290 Err(TermParseError::msg(
291 "Named node serialization should start with a <",
292 ))
293 }
294}
295
296fn read_blank_node(s: &str) -> Result<(BlankNode, &str), TermParseError> {
297 let s = s.trim();
298 if let Some(remain) = s.strip_prefix("_:") {
299 let mut end = remain
300 .find(|v: char| {
301 v.is_whitespace()
302 || matches!(v, '<' | '?' | '$' | '"' | '\'' | '>' | '@' | '^' | ':')
303 })
304 .unwrap_or(remain.len());
305 if let Some(pos) = remain[..end].find("..") {
306 end = pos;
307 }
308 if remain[..end].ends_with('.') {
309 end -= 1;
311 }
312 let (value, remain) = remain.split_at(end);
313 let term = BlankNode::new(value).map_err(|error| {
314 TermParseError(TermParseErrorKind::BlankNode {
315 value: value.to_owned(),
316 error,
317 })
318 })?;
319 Ok((term, remain))
320 } else {
321 Err(TermParseError::msg(
322 "Blank node serialization should start with '_:'",
323 ))
324 }
325}
326
327fn read_literal(s: &str) -> Result<(Literal, &str), TermParseError> {
328 let s = s.trim();
329 if let Some(s) = s.strip_prefix('"') {
330 let mut value = String::with_capacity(s.len());
331 let mut chars = s.chars();
332 while let Some(c) = chars.next() {
333 match c {
334 '"' => {
335 let remain = chars.as_str().trim();
336 return if let Some(remain) = remain.strip_prefix('@') {
337 let end = remain
338 .find(|v| !matches!(v, 'a'..='z' | 'A'..='Z' | '-'))
339 .unwrap_or(remain.len());
340 let (language, remain) = remain.split_at(end);
341 Ok((
342 Literal::new_language_tagged_literal(value, language).map_err(
343 |error| {
344 TermParseError(TermParseErrorKind::LanguageTag {
345 value: language.to_owned(),
346 error,
347 })
348 },
349 )?,
350 remain,
351 ))
352 } else if let Some(remain) = remain.strip_prefix("^^") {
353 let (datatype, remain) = read_named_node(remain)?;
354 Ok((Literal::new_typed_literal(value, datatype), remain))
355 } else {
356 Ok((Literal::new_simple_literal(value), remain))
357 };
358 }
359 '\\' => {
360 if let Some(c) = chars.next() {
361 value.push(match c {
362 't' => '\t',
363 'b' => '\u{08}',
364 'n' => '\n',
365 'r' => '\r',
366 'f' => '\u{0C}',
367 '"' => '"',
368 '\'' => '\'',
369 '\\' => '\\',
370 'u' => read_hexa_char(&mut chars, 4)?,
371 'U' => read_hexa_char(&mut chars, 8)?,
372 _ => return Err(TermParseError::msg("Unexpected escaped char")),
373 })
374 } else {
375 return Err(TermParseError::msg("Unexpected literal end"));
376 }
377 }
378 _ => value.push(c),
379 }
380 }
381 Err(TermParseError::msg("Unexpected literal end"))
382 } else if let Some(remain) = s.strip_prefix("true") {
383 Ok((Literal::new_typed_literal("true", xsd::BOOLEAN), remain))
384 } else if let Some(remain) = s.strip_prefix("false") {
385 Ok((Literal::new_typed_literal("false", xsd::BOOLEAN), remain))
386 } else {
387 let input = s.as_bytes();
388 if input.is_empty() {
389 return Err(TermParseError::msg("Empty term serialization"));
390 }
391
392 let mut cursor = match input.first() {
393 Some(b'+' | b'-') => 1,
394 _ => 0,
395 };
396 let mut with_dot = false;
397
398 let mut count_before: usize = 0;
399 while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
400 count_before += 1;
401 cursor += 1;
402 }
403
404 let mut count_after: usize = 0;
405 if cursor < input.len() && input[cursor] == b'.' {
406 with_dot = true;
407 cursor += 1;
408 while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
409 count_after += 1;
410 cursor += 1;
411 }
412 }
413
414 if cursor < input.len() && (input[cursor] == b'e' || input[cursor] == b'E') {
415 cursor += 1;
416 cursor += match input.get(cursor) {
417 Some(b'+' | b'-') => 1,
418 _ => 0,
419 };
420 let mut count_exponent = 0;
421 while cursor < input.len() && b'0' <= input[cursor] && input[cursor] <= b'9' {
422 count_exponent += 1;
423 cursor += 1;
424 }
425 if count_exponent > 0 {
426 Ok((Literal::new_typed_literal(s, xsd::DOUBLE), &s[cursor..]))
427 } else {
428 Err(TermParseError::msg(
429 "Double serialization with an invalid exponent",
430 ))
431 }
432 } else if with_dot {
433 if count_after > 0 {
434 Ok((Literal::new_typed_literal(s, xsd::DECIMAL), &s[cursor..]))
435 } else {
436 Err(TermParseError::msg(
437 "Decimal serialization without floating part",
438 ))
439 }
440 } else if count_before > 0 {
441 Ok((Literal::new_typed_literal(s, xsd::INTEGER), &s[cursor..]))
442 } else {
443 Err(TermParseError::msg("Empty integer serialization"))
444 }
445 }
446}
447
448fn read_term(s: &str, number_of_recursive_calls: usize) -> Result<(Term, &str), TermParseError> {
449 if number_of_recursive_calls == MAX_NUMBER_OF_NESTED_TRIPLES {
450 return Err(TermParseError::msg(
451 "Too many nested triples. The parser fails here to avoid a stack overflow.",
452 ));
453 }
454 let s = s.trim();
455 #[allow(unused_variables)]
456 if let Some(remain) = s.strip_prefix("<<") {
457 #[cfg(feature = "rdf-star")]
458 {
459 let (triple, remain) = read_triple(remain, number_of_recursive_calls + 1)?;
460 let remain = remain.trim_start();
461 if let Some(remain) = remain.strip_prefix(">>") {
462 Ok((triple.into(), remain))
463 } else {
464 Err(TermParseError::msg(
465 "Nested triple serialization must be enclosed between << and >>",
466 ))
467 }
468 }
469 #[cfg(not(feature = "rdf-star"))]
470 {
471 Err(TermParseError::msg("RDF-star is not supported"))
472 }
473 } else if s.starts_with('<') {
474 let (term, remain) = read_named_node(s)?;
475 Ok((term.into(), remain))
476 } else if s.starts_with('_') {
477 let (term, remain) = read_blank_node(s)?;
478 Ok((term.into(), remain))
479 } else {
480 let (term, remain) = read_literal(s)?;
481 Ok((term.into(), remain))
482 }
483}
484
485fn read_triple(
486 s: &str,
487 number_of_recursive_calls: usize,
488) -> Result<(Triple, &str), TermParseError> {
489 let s = s.trim();
490 let (subject, remain) = read_term(s, number_of_recursive_calls + 1)?;
491 let (predicate, remain) = read_named_node(remain)?;
492 let (object, remain) = read_term(remain, number_of_recursive_calls + 1)?;
493 Ok((
494 Triple {
495 subject: match subject {
496 Term::NamedNode(s) => s.into(),
497 Term::BlankNode(s) => s.into(),
498 Term::Literal(_) => {
499 return Err(TermParseError::msg(
500 "Literals are not allowed in subject position",
501 ));
502 }
503 #[cfg(feature = "rdf-star")]
504 Term::Triple(s) => Subject::Triple(s),
505 },
506 predicate,
507 object,
508 },
509 remain,
510 ))
511}
512
513fn read_hexa_char(input: &mut Chars<'_>, len: usize) -> Result<char, TermParseError> {
514 let mut value = 0;
515 for _ in 0..len {
516 if let Some(c) = input.next() {
517 value = value * 16
518 + match c {
519 '0'..='9' => u32::from(c) - u32::from('0'),
520 'a'..='f' => u32::from(c) - u32::from('a') + 10,
521 'A'..='F' => u32::from(c) - u32::from('A') + 10,
522 _ => {
523 return Err(TermParseError::msg(format!(
524 "Unexpected character in a unicode escape: {c}"
525 )));
526 }
527 }
528 } else {
529 return Err(TermParseError::msg("Unexpected literal string end"));
530 }
531 }
532 char::from_u32(value).ok_or_else(|| TermParseError::msg("Invalid encoded unicode code point"))
533}
534
535#[derive(Debug, thiserror::Error)]
537#[error(transparent)]
538pub struct TermParseError(#[from] TermParseErrorKind);
539
540#[derive(Debug, thiserror::Error)]
542enum TermParseErrorKind {
543 #[error("Error while parsing the named node '{value}': {error}")]
544 Iri { error: IriParseError, value: String },
545 #[error("Error while parsing the blank node '{value}': {error}")]
546 BlankNode {
547 error: BlankNodeIdParseError,
548 value: String,
549 },
550 #[error("Error while parsing the language tag '{value}': {error}")]
551 LanguageTag {
552 error: LanguageTagParseError,
553 value: String,
554 },
555 #[error("Error while parsing the variable '{value}': {error}")]
556 Variable {
557 error: VariableNameParseError,
558 value: String,
559 },
560 #[error("{0}")]
561 Msg(String),
562}
563
564impl TermParseError {
565 pub(crate) fn msg(msg: impl Into<String>) -> Self {
566 Self(TermParseErrorKind::Msg(msg.into()))
567 }
568}
569
570#[cfg(test)]
571#[cfg(feature = "rdf-star")]
572mod tests {
573 use super::*;
574
575 #[test]
576 fn triple_term_parsing() {
577 assert_eq!(
578 Term::from_str("\"ex\\u00E9\\U000000E9\"").unwrap(),
579 Literal::new_simple_literal("ex\u{e9}\u{e9}").into()
580 );
581 assert_eq!(
582 Term::from_str("<http://example.com/\\u00E9\\U000000E9>").unwrap(),
583 NamedNode::new_unchecked("http://example.com/\u{e9}\u{e9}").into()
584 );
585 assert_eq!(
586 Term::from_str("<< _:s <http://example.com/p> \"o\" >>").unwrap(),
587 Triple::new(
588 BlankNode::new("s").unwrap(),
589 NamedNode::new("http://example.com/p").unwrap(),
590 Literal::new_simple_literal("o"),
591 )
592 .into()
593 );
594 }
595}