oxrdf/
blank_node.rs

1#![allow(clippy::host_endian_bytes)] // We use it to go around 16 bytes alignment of u128
2use rand::random;
3use std::io::Write;
4use std::{fmt, str};
5
6/// An owned RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
7///
8/// The common way to create a new blank node is to use the [`BlankNode::default()`] function.
9///
10/// It is also possible to create a blank node from a blank node identifier using the [`BlankNode::new()`] function.
11/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
12///
13/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
14/// ```
15/// use oxrdf::BlankNode;
16///
17/// assert_eq!("_:a122", BlankNode::new("a122")?.to_string());
18/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
19/// ```
20#[derive(Eq, PartialEq, Debug, Clone, Hash)]
21pub struct BlankNode(BlankNodeContent);
22
23#[derive(PartialEq, Eq, Debug, Clone, Hash)]
24enum BlankNodeContent {
25    Named(String),
26    Anonymous { id: [u8; 16], str: IdStr },
27}
28
29impl BlankNode {
30    /// Creates a blank node from a unique identifier.
31    ///
32    /// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
33    ///
34    /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`]
35    /// that creates a random ID that could be easily inlined by Oxigraph stores.
36    pub fn new(id: impl Into<String>) -> Result<Self, BlankNodeIdParseError> {
37        let id = id.into();
38        validate_blank_node_identifier(&id)?;
39        Ok(Self::new_unchecked(id))
40    }
41
42    /// Creates a blank node from a unique identifier without validation.
43    ///
44    /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
45    /// according to N-Triples, Turtle, and SPARQL grammars.
46    ///
47    /// [`BlankNode::new()`] is a safe version of this constructor and should be used for untrusted data.
48    #[inline]
49    pub fn new_unchecked(id: impl Into<String>) -> Self {
50        let id = id.into();
51        if let Some(numerical_id) = to_integer_id(&id) {
52            Self::new_from_unique_id(numerical_id)
53        } else {
54            Self(BlankNodeContent::Named(id))
55        }
56    }
57
58    /// Creates a blank node from a unique numerical id.
59    ///
60    /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`].
61    #[inline]
62    pub fn new_from_unique_id(id: u128) -> Self {
63        Self(BlankNodeContent::Anonymous {
64            id: id.to_ne_bytes(),
65            str: IdStr::new(id),
66        })
67    }
68
69    /// Returns the underlying ID of this blank node.
70    #[inline]
71    pub fn as_str(&self) -> &str {
72        match &self.0 {
73            BlankNodeContent::Named(id) => id,
74            BlankNodeContent::Anonymous { str, .. } => str.as_str(),
75        }
76    }
77
78    /// Returns the underlying ID of this blank node.
79    #[inline]
80    pub fn into_string(self) -> String {
81        match self.0 {
82            BlankNodeContent::Named(id) => id,
83            BlankNodeContent::Anonymous { str, .. } => str.as_str().to_owned(),
84        }
85    }
86
87    #[inline]
88    pub fn as_ref(&self) -> BlankNodeRef<'_> {
89        BlankNodeRef(match &self.0 {
90            BlankNodeContent::Named(id) => BlankNodeRefContent::Named(id.as_str()),
91            BlankNodeContent::Anonymous { id, str } => BlankNodeRefContent::Anonymous {
92                id: *id,
93                str: str.as_str(),
94            },
95        })
96    }
97}
98
99impl fmt::Display for BlankNode {
100    #[inline]
101    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
102        self.as_ref().fmt(f)
103    }
104}
105
106impl Default for BlankNode {
107    /// Builds a new RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node) with a unique id.
108    #[inline]
109    fn default() -> Self {
110        // We ensure the ID does not start with a number to be also valid with RDF/XML
111        loop {
112            let id = random();
113            let str = IdStr::new(id);
114            if matches!(str.as_str().as_bytes().first(), Some(b'a'..=b'f')) {
115                return Self(BlankNodeContent::Anonymous {
116                    id: id.to_ne_bytes(),
117                    str,
118                });
119            }
120        }
121    }
122}
123
124/// A borrowed RDF [blank node](https://www.w3.org/TR/rdf11-concepts/#dfn-blank-node).
125///
126/// The common way to create a new blank node is to use the [`BlankNode::default`] trait method.
127///
128/// It is also possible to create a blank node from a blank node identifier using the [`BlankNodeRef::new()`] function.
129/// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
130///
131/// The default string formatter is returning an N-Triples, Turtle, and SPARQL compatible representation:
132/// ```
133/// use oxrdf::BlankNodeRef;
134///
135/// assert_eq!("_:a122", BlankNodeRef::new("a122")?.to_string());
136/// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
137/// ```
138#[derive(Eq, PartialEq, Debug, Clone, Copy, Hash)]
139pub struct BlankNodeRef<'a>(BlankNodeRefContent<'a>);
140
141#[derive(PartialEq, Eq, Debug, Clone, Copy, Hash)]
142enum BlankNodeRefContent<'a> {
143    Named(&'a str),
144    Anonymous { id: [u8; 16], str: &'a str },
145}
146
147impl<'a> BlankNodeRef<'a> {
148    /// Creates a blank node from a unique identifier.
149    ///
150    /// The blank node identifier must be valid according to N-Triples, Turtle, and SPARQL grammars.
151    ///
152    /// In most cases, it is much more convenient to create a blank node using [`BlankNode::default()`].
153    /// that creates a random ID that could be easily inlined by Oxigraph stores.
154    pub fn new(id: &'a str) -> Result<Self, BlankNodeIdParseError> {
155        validate_blank_node_identifier(id)?;
156        Ok(Self::new_unchecked(id))
157    }
158
159    /// Creates a blank node from a unique identifier without validation.
160    ///
161    /// It is the caller's responsibility to ensure that `id` is a valid blank node identifier
162    /// according to N-Triples, Turtle, and SPARQL grammars.
163    ///
164    /// [`BlankNodeRef::new()`) is a safe version of this constructor and should be used for untrusted data.
165    #[inline]
166    pub fn new_unchecked(id: &'a str) -> Self {
167        if let Some(numerical_id) = to_integer_id(id) {
168            Self(BlankNodeRefContent::Anonymous {
169                id: numerical_id.to_ne_bytes(),
170                str: id,
171            })
172        } else {
173            Self(BlankNodeRefContent::Named(id))
174        }
175    }
176
177    /// Returns the underlying ID of this blank node.
178    #[inline]
179    pub const fn as_str(self) -> &'a str {
180        match self.0 {
181            BlankNodeRefContent::Named(id) => id,
182            BlankNodeRefContent::Anonymous { str, .. } => str,
183        }
184    }
185
186    /// Returns the internal numerical ID of this blank node if it has been created using [`BlankNode::new_from_unique_id`].
187    ///
188    /// ```
189    /// use oxrdf::BlankNode;
190    ///
191    /// assert_eq!(
192    ///     BlankNode::new_from_unique_id(128).as_ref().unique_id(),
193    ///     Some(128)
194    /// );
195    /// assert_eq!(BlankNode::new("foo")?.as_ref().unique_id(), None);
196    /// # Result::<_,oxrdf::BlankNodeIdParseError>::Ok(())
197    /// ```
198    #[inline]
199    pub const fn unique_id(&self) -> Option<u128> {
200        match self.0 {
201            BlankNodeRefContent::Named(_) => None,
202            BlankNodeRefContent::Anonymous { id, .. } => Some(u128::from_ne_bytes(id)),
203        }
204    }
205
206    #[inline]
207    pub fn into_owned(self) -> BlankNode {
208        BlankNode(match self.0 {
209            BlankNodeRefContent::Named(id) => BlankNodeContent::Named(id.to_owned()),
210            BlankNodeRefContent::Anonymous { id, .. } => BlankNodeContent::Anonymous {
211                id,
212                str: IdStr::new(u128::from_ne_bytes(id)),
213            },
214        })
215    }
216}
217
218impl fmt::Display for BlankNodeRef<'_> {
219    #[inline]
220    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
221        write!(f, "_:{}", self.as_str())
222    }
223}
224
225impl<'a> From<&'a BlankNode> for BlankNodeRef<'a> {
226    #[inline]
227    fn from(node: &'a BlankNode) -> Self {
228        node.as_ref()
229    }
230}
231
232impl<'a> From<BlankNodeRef<'a>> for BlankNode {
233    #[inline]
234    fn from(node: BlankNodeRef<'a>) -> Self {
235        node.into_owned()
236    }
237}
238
239impl PartialEq<BlankNode> for BlankNodeRef<'_> {
240    #[inline]
241    fn eq(&self, other: &BlankNode) -> bool {
242        *self == other.as_ref()
243    }
244}
245
246impl PartialEq<BlankNodeRef<'_>> for BlankNode {
247    #[inline]
248    fn eq(&self, other: &BlankNodeRef<'_>) -> bool {
249        self.as_ref() == *other
250    }
251}
252
253#[derive(PartialEq, Eq, Debug, Clone, Hash)]
254struct IdStr([u8; 32]);
255
256impl IdStr {
257    #[inline]
258    fn new(id: u128) -> Self {
259        let mut str = [0; 32];
260        write!(&mut str[..], "{id:x}").unwrap();
261        Self(str)
262    }
263
264    #[inline]
265    fn as_str(&self) -> &str {
266        let len = self.0.iter().position(|x| x == &0).unwrap_or(32);
267        str::from_utf8(&self.0[..len]).unwrap()
268    }
269}
270
271fn validate_blank_node_identifier(id: &str) -> Result<(), BlankNodeIdParseError> {
272    let mut chars = id.chars();
273    let front = chars.next().ok_or(BlankNodeIdParseError)?;
274    match front {
275        '0'..='9'
276        | '_'
277        | ':'
278        | 'A'..='Z'
279        | 'a'..='z'
280        | '\u{00C0}'..='\u{00D6}'
281        | '\u{00D8}'..='\u{00F6}'
282        | '\u{00F8}'..='\u{02FF}'
283        | '\u{0370}'..='\u{037D}'
284        | '\u{037F}'..='\u{1FFF}'
285        | '\u{200C}'..='\u{200D}'
286        | '\u{2070}'..='\u{218F}'
287        | '\u{2C00}'..='\u{2FEF}'
288        | '\u{3001}'..='\u{D7FF}'
289        | '\u{F900}'..='\u{FDCF}'
290        | '\u{FDF0}'..='\u{FFFD}'
291        | '\u{10000}'..='\u{EFFFF}' => (),
292        _ => return Err(BlankNodeIdParseError),
293    }
294    for c in chars {
295        match c {
296            '.' // validated later
297            | '-'
298            | '0'..='9'
299            | '\u{00B7}'
300            | '\u{0300}'..='\u{036F}'
301            | '\u{203F}'..='\u{2040}'
302            | '_'
303            | ':'
304            | 'A'..='Z'
305            | 'a'..='z'
306            | '\u{00C0}'..='\u{00D6}'
307            | '\u{00D8}'..='\u{00F6}'
308            | '\u{00F8}'..='\u{02FF}'
309            | '\u{0370}'..='\u{037D}'
310            | '\u{037F}'..='\u{1FFF}'
311            | '\u{200C}'..='\u{200D}'
312            | '\u{2070}'..='\u{218F}'
313            | '\u{2C00}'..='\u{2FEF}'
314            | '\u{3001}'..='\u{D7FF}'
315            | '\u{F900}'..='\u{FDCF}'
316            | '\u{FDF0}'..='\u{FFFD}'
317            | '\u{10000}'..='\u{EFFFF}' => (),
318            _ => return Err(BlankNodeIdParseError),
319        }
320    }
321
322    // Could not end with a dot
323    if id.ends_with('.') {
324        Err(BlankNodeIdParseError)
325    } else {
326        Ok(())
327    }
328}
329
330#[inline]
331fn to_integer_id(id: &str) -> Option<u128> {
332    let digits = id.as_bytes();
333    let mut value: u128 = 0;
334    if let None | Some(b'0') = digits.first() {
335        return None; // No empty string or leading zeros
336    }
337    for digit in digits {
338        value = value.checked_mul(16)?.checked_add(
339            match *digit {
340                b'0'..=b'9' => digit - b'0',
341                b'a'..=b'f' => digit - b'a' + 10,
342                _ => return None,
343            }
344            .into(),
345        )?;
346    }
347    Some(value)
348}
349
350/// An error raised during [`BlankNode`] IDs validation.
351#[derive(Debug, thiserror::Error)]
352#[error("The blank node identifier is invalid")]
353pub struct BlankNodeIdParseError;
354
355#[cfg(test)]
356#[allow(clippy::panic_in_result_fn)]
357mod tests {
358    use super::*;
359    #[cfg(not(target_family = "wasm"))]
360    use std::mem::{align_of, size_of};
361
362    #[test]
363    fn as_str_partial() {
364        let b = BlankNode::new_from_unique_id(0x42);
365        assert_eq!(b.as_str(), "42");
366    }
367
368    #[test]
369    fn as_str_full() {
370        let b = BlankNode::new_from_unique_id(0x7777_6666_5555_4444_3333_2222_1111_0000);
371        assert_eq!(b.as_str(), "77776666555544443333222211110000");
372    }
373
374    #[test]
375    fn new_validation() {
376        BlankNode::new("").unwrap_err();
377        BlankNode::new("a").unwrap();
378        BlankNode::new("-").unwrap_err();
379        BlankNode::new("a-").unwrap();
380        BlankNode::new(".").unwrap_err();
381        BlankNode::new("a.").unwrap_err();
382        BlankNode::new("a.a").unwrap();
383    }
384
385    #[test]
386    fn new_numerical() {
387        assert_eq!(
388            BlankNode::new("100a").unwrap(),
389            BlankNode::new_from_unique_id(0x100a),
390        );
391        assert_ne!(
392            BlankNode::new("100A").unwrap(),
393            BlankNode::new_from_unique_id(0x100a)
394        );
395    }
396
397    #[test]
398    fn test_equals() {
399        assert_eq!(
400            BlankNode::new("100a").unwrap(),
401            BlankNodeRef::new("100a").unwrap()
402        );
403        assert_eq!(
404            BlankNode::new("zzz").unwrap(),
405            BlankNodeRef::new("zzz").unwrap()
406        );
407    }
408
409    #[cfg(target_pointer_width = "64")]
410    #[test]
411    fn test_size_and_alignment() {
412        assert_eq!(size_of::<BlankNode>(), 56);
413        assert_eq!(size_of::<BlankNodeRef<'_>>(), 32);
414        assert_eq!(align_of::<BlankNode>(), 8);
415        assert_eq!(align_of::<BlankNodeRef<'_>>(), 8);
416    }
417}