oxrdf/
interning.rs

1//! Interning of RDF elements using Rodeo
2
3use crate::*;
4use std::collections::hash_map::{Entry, HashMap, RandomState};
5use std::hash::{BuildHasher, Hasher};
6
7#[derive(Debug, Default, Clone)]
8pub struct Interner {
9    hasher: RandomState,
10    string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
11    string_for_blank_node_id: HashMap<u128, String>,
12    #[cfg(feature = "rdf-star")]
13    triples: HashMap<InternedTriple, Triple>,
14}
15
16impl Interner {
17    #[allow(clippy::never_loop)]
18    fn get_or_intern(&mut self, value: &str) -> Key {
19        let mut hash = self.hash(value);
20        loop {
21            match self.string_for_hash.entry(hash) {
22                Entry::Vacant(e) => {
23                    e.insert(value.into());
24                    return Key(hash);
25                }
26                Entry::Occupied(e) => loop {
27                    if e.get() == value {
28                        return Key(hash);
29                    } else if hash == u64::MAX - 1 {
30                        hash = 0;
31                    } else {
32                        hash += 1;
33                    }
34                },
35            }
36        }
37    }
38
39    fn get(&self, value: &str) -> Option<Key> {
40        let mut hash = self.hash(value);
41        loop {
42            let v = self.string_for_hash.get(&hash)?;
43            if v == value {
44                return Some(Key(hash));
45            } else if hash == u64::MAX - 1 {
46                hash = 0;
47            } else {
48                hash += 1;
49            }
50        }
51    }
52
53    fn hash(&self, value: &str) -> u64 {
54        let hash = self.hasher.hash_one(value);
55        if hash == u64::MAX {
56            0
57        } else {
58            hash
59        }
60    }
61
62    fn resolve(&self, key: Key) -> &str {
63        &self.string_for_hash[&key.0]
64    }
65}
66
67#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
68pub struct Key(u64);
69
70impl Key {
71    fn first() -> Self {
72        Self(0)
73    }
74
75    fn next(self) -> Self {
76        Self(self.0.saturating_add(1))
77    }
78
79    fn impossible() -> Self {
80        Self(u64::MAX)
81    }
82}
83
84#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
85pub struct InternedNamedNode {
86    id: Key,
87}
88
89impl InternedNamedNode {
90    pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self {
91        Self {
92            id: interner.get_or_intern(named_node.as_str()),
93        }
94    }
95
96    pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> {
97        Some(Self {
98            id: interner.get(named_node.as_str())?,
99        })
100    }
101
102    pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> {
103        NamedNodeRef::new_unchecked(interner.resolve(self.id))
104    }
105
106    pub fn first() -> Self {
107        Self { id: Key::first() }
108    }
109
110    pub fn next(self) -> Self {
111        Self { id: self.id.next() }
112    }
113
114    pub fn impossible() -> Self {
115        Self {
116            id: Key::impossible(),
117        }
118    }
119}
120
121#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
122pub enum InternedBlankNode {
123    Number { id: u128 },
124    Other { id: Key },
125}
126
127impl InternedBlankNode {
128    pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
129        if let Some(id) = blank_node.unique_id() {
130            interner
131                .string_for_blank_node_id
132                .entry(id)
133                .or_insert_with(|| blank_node.as_str().into());
134            Self::Number { id }
135        } else {
136            Self::Other {
137                id: interner.get_or_intern(blank_node.as_str()),
138            }
139        }
140    }
141
142    pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
143        if let Some(id) = blank_node.unique_id() {
144            interner
145                .string_for_blank_node_id
146                .contains_key(&id)
147                .then_some(Self::Number { id })
148        } else {
149            Some(Self::Other {
150                id: interner.get(blank_node.as_str())?,
151            })
152        }
153    }
154
155    pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> {
156        BlankNodeRef::new_unchecked(match self {
157            Self::Number { id } => &interner.string_for_blank_node_id[&id],
158            Self::Other { id } => interner.resolve(id),
159        })
160    }
161
162    pub fn next(self) -> Self {
163        match self {
164            Self::Number { id } => Self::Number {
165                id: id.saturating_add(1),
166            },
167            Self::Other { id } => Self::Other { id: id.next() },
168        }
169    }
170}
171
172#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
173pub enum InternedLiteral {
174    String {
175        value_id: Key,
176    },
177    LanguageTaggedString {
178        value_id: Key,
179        language_id: Key,
180    },
181    TypedLiteral {
182        value_id: Key,
183        datatype: InternedNamedNode,
184    },
185}
186
187impl InternedLiteral {
188    pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self {
189        let value_id = interner.get_or_intern(literal.value());
190        if literal.is_plain() {
191            if let Some(language) = literal.language() {
192                Self::LanguageTaggedString {
193                    value_id,
194                    language_id: interner.get_or_intern(language),
195                }
196            } else {
197                Self::String { value_id }
198            }
199        } else {
200            Self::TypedLiteral {
201                value_id,
202                datatype: InternedNamedNode::encoded_into(literal.datatype(), interner),
203            }
204        }
205    }
206
207    pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> {
208        let value_id = interner.get(literal.value())?;
209        Some(if literal.is_plain() {
210            if let Some(language) = literal.language() {
211                Self::LanguageTaggedString {
212                    value_id,
213                    language_id: interner.get(language)?,
214                }
215            } else {
216                Self::String { value_id }
217            }
218        } else {
219            Self::TypedLiteral {
220                value_id,
221                datatype: InternedNamedNode::encoded_from(literal.datatype(), interner)?,
222            }
223        })
224    }
225
226    pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> {
227        match self {
228            Self::String { value_id } => {
229                LiteralRef::new_simple_literal(interner.resolve(*value_id))
230            }
231            Self::LanguageTaggedString {
232                value_id,
233                language_id,
234            } => LiteralRef::new_language_tagged_literal_unchecked(
235                interner.resolve(*value_id),
236                interner.resolve(*language_id),
237            ),
238            Self::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal(
239                interner.resolve(*value_id),
240                datatype.decode_from(interner),
241            ),
242        }
243    }
244
245    pub fn next(&self) -> Self {
246        match self {
247            Self::String { value_id } => Self::String {
248                value_id: value_id.next(),
249            },
250            Self::LanguageTaggedString {
251                value_id,
252                language_id,
253            } => Self::LanguageTaggedString {
254                value_id: *value_id,
255                language_id: language_id.next(),
256            },
257            Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral {
258                value_id: *value_id,
259                datatype: datatype.next(),
260            },
261        }
262    }
263}
264
265#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
266pub enum InternedSubject {
267    NamedNode(InternedNamedNode),
268    BlankNode(InternedBlankNode),
269    #[cfg(feature = "rdf-star")]
270    Triple(Box<InternedTriple>),
271}
272
273impl InternedSubject {
274    pub fn encoded_into(node: SubjectRef<'_>, interner: &mut Interner) -> Self {
275        match node {
276            SubjectRef::NamedNode(node) => {
277                Self::NamedNode(InternedNamedNode::encoded_into(node, interner))
278            }
279            SubjectRef::BlankNode(node) => {
280                Self::BlankNode(InternedBlankNode::encoded_into(node, interner))
281            }
282            #[cfg(feature = "rdf-star")]
283            SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into(
284                triple.as_ref(),
285                interner,
286            ))),
287        }
288    }
289
290    pub fn encoded_from(node: SubjectRef<'_>, interner: &Interner) -> Option<Self> {
291        Some(match node {
292            SubjectRef::NamedNode(node) => {
293                Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?)
294            }
295            SubjectRef::BlankNode(node) => {
296                Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?)
297            }
298            #[cfg(feature = "rdf-star")]
299            SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from(
300                triple.as_ref(),
301                interner,
302            )?)),
303        })
304    }
305
306    pub fn decode_from<'a>(&self, interner: &'a Interner) -> SubjectRef<'a> {
307        match self {
308            Self::NamedNode(node) => SubjectRef::NamedNode(node.decode_from(interner)),
309            Self::BlankNode(node) => SubjectRef::BlankNode(node.decode_from(interner)),
310            #[cfg(feature = "rdf-star")]
311            Self::Triple(triple) => SubjectRef::Triple(&interner.triples[triple.as_ref()]),
312        }
313    }
314
315    pub fn first() -> Self {
316        Self::NamedNode(InternedNamedNode::first())
317    }
318
319    pub fn next(&self) -> Self {
320        match self {
321            Self::NamedNode(node) => Self::NamedNode(node.next()),
322            Self::BlankNode(node) => Self::BlankNode(node.next()),
323            #[cfg(feature = "rdf-star")]
324            Self::Triple(triple) => Self::Triple(Box::new(triple.next())),
325        }
326    }
327
328    pub fn impossible() -> Self {
329        Self::NamedNode(InternedNamedNode::impossible())
330    }
331}
332
333#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
334pub enum InternedGraphName {
335    DefaultGraph,
336    NamedNode(InternedNamedNode),
337    BlankNode(InternedBlankNode),
338}
339
340impl InternedGraphName {
341    pub fn encoded_into(node: GraphNameRef<'_>, interner: &mut Interner) -> Self {
342        match node {
343            GraphNameRef::DefaultGraph => Self::DefaultGraph,
344            GraphNameRef::NamedNode(node) => {
345                Self::NamedNode(InternedNamedNode::encoded_into(node, interner))
346            }
347            GraphNameRef::BlankNode(node) => {
348                Self::BlankNode(InternedBlankNode::encoded_into(node, interner))
349            }
350        }
351    }
352
353    pub fn encoded_from(node: GraphNameRef<'_>, interner: &Interner) -> Option<Self> {
354        Some(match node {
355            GraphNameRef::DefaultGraph => Self::DefaultGraph,
356            GraphNameRef::NamedNode(node) => {
357                Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?)
358            }
359            GraphNameRef::BlankNode(node) => {
360                Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?)
361            }
362        })
363    }
364
365    pub fn decode_from<'a>(&self, interner: &'a Interner) -> GraphNameRef<'a> {
366        match self {
367            Self::DefaultGraph => GraphNameRef::DefaultGraph,
368            Self::NamedNode(node) => GraphNameRef::NamedNode(node.decode_from(interner)),
369            Self::BlankNode(node) => GraphNameRef::BlankNode(node.decode_from(interner)),
370        }
371    }
372
373    pub fn first() -> Self {
374        Self::DefaultGraph
375    }
376
377    pub fn next(&self) -> Self {
378        match self {
379            Self::DefaultGraph => Self::NamedNode(InternedNamedNode::first()),
380            Self::NamedNode(node) => Self::NamedNode(node.next()),
381            Self::BlankNode(node) => Self::BlankNode(node.next()),
382        }
383    }
384
385    pub fn impossible() -> Self {
386        Self::NamedNode(InternedNamedNode::impossible())
387    }
388}
389
390#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
391pub enum InternedTerm {
392    NamedNode(InternedNamedNode),
393    BlankNode(InternedBlankNode),
394    Literal(InternedLiteral),
395    #[cfg(feature = "rdf-star")]
396    Triple(Box<InternedTriple>),
397}
398
399impl InternedTerm {
400    pub fn encoded_into(term: TermRef<'_>, interner: &mut Interner) -> Self {
401        match term {
402            TermRef::NamedNode(term) => {
403                Self::NamedNode(InternedNamedNode::encoded_into(term, interner))
404            }
405            TermRef::BlankNode(term) => {
406                Self::BlankNode(InternedBlankNode::encoded_into(term, interner))
407            }
408            TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_into(term, interner)),
409            #[cfg(feature = "rdf-star")]
410            TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into(
411                triple.as_ref(),
412                interner,
413            ))),
414        }
415    }
416
417    pub fn encoded_from(term: TermRef<'_>, interner: &Interner) -> Option<Self> {
418        Some(match term {
419            TermRef::NamedNode(term) => {
420                Self::NamedNode(InternedNamedNode::encoded_from(term, interner)?)
421            }
422            TermRef::BlankNode(term) => {
423                Self::BlankNode(InternedBlankNode::encoded_from(term, interner)?)
424            }
425            TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_from(term, interner)?),
426            #[cfg(feature = "rdf-star")]
427            TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from(
428                triple.as_ref(),
429                interner,
430            )?)),
431        })
432    }
433
434    pub fn decode_from<'a>(&self, interner: &'a Interner) -> TermRef<'a> {
435        match self {
436            Self::NamedNode(term) => TermRef::NamedNode(term.decode_from(interner)),
437            Self::BlankNode(term) => TermRef::BlankNode(term.decode_from(interner)),
438            Self::Literal(term) => TermRef::Literal(term.decode_from(interner)),
439            #[cfg(feature = "rdf-star")]
440            Self::Triple(triple) => TermRef::Triple(&interner.triples[triple.as_ref()]),
441        }
442    }
443
444    pub fn first() -> Self {
445        Self::NamedNode(InternedNamedNode::first())
446    }
447
448    pub fn next(&self) -> Self {
449        match self {
450            Self::NamedNode(node) => Self::NamedNode(node.next()),
451            Self::BlankNode(node) => Self::BlankNode(node.next()),
452            Self::Literal(node) => Self::Literal(node.next()),
453            #[cfg(feature = "rdf-star")]
454            Self::Triple(triple) => Self::Triple(Box::new(triple.next())),
455        }
456    }
457
458    pub fn impossible() -> Self {
459        Self::NamedNode(InternedNamedNode::impossible())
460    }
461}
462
463#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
464pub struct InternedTriple {
465    pub subject: InternedSubject,
466    pub predicate: InternedNamedNode,
467    pub object: InternedTerm,
468}
469
470#[cfg(feature = "rdf-star")]
471impl InternedTriple {
472    pub fn encoded_into(triple: TripleRef<'_>, interner: &mut Interner) -> Self {
473        let interned_triple = Self {
474            subject: InternedSubject::encoded_into(triple.subject, interner),
475            predicate: InternedNamedNode::encoded_into(triple.predicate, interner),
476            object: InternedTerm::encoded_into(triple.object, interner),
477        };
478        interner
479            .triples
480            .insert(interned_triple.clone(), triple.into_owned());
481        interned_triple
482    }
483
484    pub fn encoded_from(triple: TripleRef<'_>, interner: &Interner) -> Option<Self> {
485        let interned_triple = Self {
486            subject: InternedSubject::encoded_from(triple.subject, interner)?,
487            predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?,
488            object: InternedTerm::encoded_from(triple.object, interner)?,
489        };
490        interner
491            .triples
492            .contains_key(&interned_triple)
493            .then_some(interned_triple)
494    }
495
496    pub fn next(&self) -> Self {
497        Self {
498            subject: self.subject.clone(),
499            predicate: self.predicate,
500            object: self.object.next(),
501        }
502    }
503}
504
505#[derive(Default, Clone)]
506struct IdentityHasherBuilder;
507
508impl BuildHasher for IdentityHasherBuilder {
509    type Hasher = IdentityHasher;
510
511    fn build_hasher(&self) -> Self::Hasher {
512        Self::Hasher::default()
513    }
514}
515
516#[derive(Default)]
517struct IdentityHasher {
518    value: u64,
519}
520
521impl Hasher for IdentityHasher {
522    fn finish(&self) -> u64 {
523        self.value
524    }
525
526    fn write(&mut self, _bytes: &[u8]) {
527        unreachable!("Should only be used on u64 values")
528    }
529
530    fn write_u64(&mut self, i: u64) {
531        self.value = i
532    }
533}