sophia_api/term/
bnode_id.rs

1//! I define the [`BnodeId`] wrapper type,
2//! which guarantees that the underlying `str`
3//! satisfies the `BLANK_NODE_LABEL` rule in [Turtle](https://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL)
4//! (without the leading `_:`).
5use super::*;
6use lazy_static::lazy_static;
7use regex::Regex;
8use sophia_iri::wrap;
9use std::borrow::Borrow;
10use std::fmt::Debug;
11use thiserror::Error;
12
13lazy_static! {
14    /// A modified production of Turtle's BLANK_NODE_LABEL according to the
15    /// [Turtle spec](https://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL).
16    ///
17    /// In contrast to the original rule this regular expression does not look
18    /// for a leading `_:`. Accordingly it only checks if the label is valid.
19    ///
20    /// Actually, this regex is also valid for Notation3 nodes. Even Turtle is
21    /// a derivate of N3, it does not change the syntax of blank nodes.
22    ///
23    /// # Captures
24    ///
25    /// This regular expression matches the whole input (`^...$`),
26    /// therefore, it can not be used to capture `BLANK_NODE_LABEL`s in an arbitrary string.
27    ///
28    /// # Rule
29    ///
30    /// `BLANK_NODE_LABEL ::= (PN_CHARS_U | [0-9]) ((PN_CHARS | '.')* PN_CHARS)?`
31    static ref BNODE_ID: Regex = Regex::new(r"(?x)
32      ^
33      [A-Za-z\u{c0}-\u{d6}\u{d8}-\u{f6}\u{f8}-\u{2ff}\u{370}-\u{37D}\u{37F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}_0-9]
34      (
35          [A-Za-z\u{c0}-\u{d6}\u{d8}-\u{f6}\u{f8}-\u{2ff}\u{370}-\u{37D}\u{37F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}_\u{2d}0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}]
36          |
37          \u{2e} [A-Za-z\u{c0}-\u{d6}\u{d8}-\u{f6}\u{f8}-\u{2ff}\u{370}-\u{37D}\u{37F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}_\u{2d}0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}]
38      )*
39      $
40    ").unwrap();
41}
42
43wrap! { BnodeId borrowing str :
44    /// This wrapper guarantees that the underlying `str`
45    /// satisfies the `BLANK_NODE_LABEL` rule in [Turtle](https://www.w3.org/TR/turtle/#grammar-production-BLANK_NODE_LABEL)
46    /// (without the leading `_:`).
47    pub fn new(id: T) -> Result<Self, InvalidBnodeId> {
48        if BNODE_ID.is_match(id.borrow()) {
49            Ok(BnodeId(id))
50        } else {
51            Err(InvalidBnodeId(id.borrow().to_string()))
52        }
53    }
54
55    /// Gets a reference to the underlying &str.
56    pub fn as_str(&self) -> &str {
57        self.0.borrow()
58    }
59}
60/// This error is raised when trying to parse an invalid blank node identifier.
61#[derive(Debug, Error)]
62#[error("The given blank node identifier '{0}' does not comply with Turtle's BLANK_NODE_LABEL")]
63pub struct InvalidBnodeId(pub String);
64
65impl<T> Term for BnodeId<T>
66where
67    T: Borrow<str> + Debug,
68{
69    type BorrowTerm<'x> = &'x Self where T: 'x;
70
71    fn kind(&self) -> TermKind {
72        TermKind::BlankNode
73    }
74    fn bnode_id(&self) -> Option<BnodeId<MownStr>> {
75        Some(self.as_ref().map_unchecked(MownStr::from_str))
76    }
77    fn borrow_term(&self) -> Self::BorrowTerm<'_> {
78        self
79    }
80}
81
82#[cfg(test)]
83mod test {
84    use super::*;
85    use test_case::test_case;
86
87    #[test_case("x")]
88    #[test_case("_"; "underscore")]
89    #[test_case("foo_bar_baz")]
90    #[test_case("hé_hé")]
91    #[test_case("1")]
92    #[test_case("abc42")]
93    #[test_case("a.b"; "with dot")]
94    fn valid(tag: &str) {
95        assert!(BnodeId::new(tag).is_ok());
96    }
97
98    #[test_case(""; "empty")]
99    #[test_case(" "; "space")]
100    #[test_case("a."; "trailing dot")]
101    #[test_case(".b"; "leading dot")]
102    #[test_case("a,b"; "with comma")]
103    #[test_case("a:b"; "with colon")]
104    #[test_case("a b"; "with space")]
105    fn invalid(tag: &str) {
106        assert!(BnodeId::new(tag).is_err());
107    }
108}