1use crate::*;
4use std::collections::hash_map::{Entry, HashMap, RandomState};
5use std::hash::{BuildHasher, Hasher};
6
7#[derive(Debug, Default, Clone)]
8pub struct Interner {
9 hasher: RandomState,
10 string_for_hash: HashMap<u64, String, IdentityHasherBuilder>,
11 string_for_blank_node_id: HashMap<u128, String>,
12 #[cfg(feature = "rdf-star")]
13 triples: HashMap<InternedTriple, Triple>,
14}
15
16impl Interner {
17 #[allow(clippy::never_loop)]
18 fn get_or_intern(&mut self, value: &str) -> Key {
19 let mut hash = self.hash(value);
20 loop {
21 match self.string_for_hash.entry(hash) {
22 Entry::Vacant(e) => {
23 e.insert(value.into());
24 return Key(hash);
25 }
26 Entry::Occupied(e) => loop {
27 if e.get() == value {
28 return Key(hash);
29 } else if hash == u64::MAX - 1 {
30 hash = 0;
31 } else {
32 hash += 1;
33 }
34 },
35 }
36 }
37 }
38
39 fn get(&self, value: &str) -> Option<Key> {
40 let mut hash = self.hash(value);
41 loop {
42 let v = self.string_for_hash.get(&hash)?;
43 if v == value {
44 return Some(Key(hash));
45 } else if hash == u64::MAX - 1 {
46 hash = 0;
47 } else {
48 hash += 1;
49 }
50 }
51 }
52
53 fn hash(&self, value: &str) -> u64 {
54 let hash = self.hasher.hash_one(value);
55 if hash == u64::MAX {
56 0
57 } else {
58 hash
59 }
60 }
61
62 fn resolve(&self, key: Key) -> &str {
63 &self.string_for_hash[&key.0]
64 }
65}
66
67#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
68pub struct Key(u64);
69
70impl Key {
71 fn first() -> Self {
72 Self(0)
73 }
74
75 fn next(self) -> Self {
76 Self(self.0.saturating_add(1))
77 }
78
79 fn impossible() -> Self {
80 Self(u64::MAX)
81 }
82}
83
84#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
85pub struct InternedNamedNode {
86 id: Key,
87}
88
89impl InternedNamedNode {
90 pub fn encoded_into(named_node: NamedNodeRef<'_>, interner: &mut Interner) -> Self {
91 Self {
92 id: interner.get_or_intern(named_node.as_str()),
93 }
94 }
95
96 pub fn encoded_from(named_node: NamedNodeRef<'_>, interner: &Interner) -> Option<Self> {
97 Some(Self {
98 id: interner.get(named_node.as_str())?,
99 })
100 }
101
102 pub fn decode_from(self, interner: &Interner) -> NamedNodeRef<'_> {
103 NamedNodeRef::new_unchecked(interner.resolve(self.id))
104 }
105
106 pub fn first() -> Self {
107 Self { id: Key::first() }
108 }
109
110 pub fn next(self) -> Self {
111 Self { id: self.id.next() }
112 }
113
114 pub fn impossible() -> Self {
115 Self {
116 id: Key::impossible(),
117 }
118 }
119}
120
121#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
122pub enum InternedBlankNode {
123 Number { id: u128 },
124 Other { id: Key },
125}
126
127impl InternedBlankNode {
128 pub fn encoded_into(blank_node: BlankNodeRef<'_>, interner: &mut Interner) -> Self {
129 if let Some(id) = blank_node.unique_id() {
130 interner
131 .string_for_blank_node_id
132 .entry(id)
133 .or_insert_with(|| blank_node.as_str().into());
134 Self::Number { id }
135 } else {
136 Self::Other {
137 id: interner.get_or_intern(blank_node.as_str()),
138 }
139 }
140 }
141
142 pub fn encoded_from(blank_node: BlankNodeRef<'_>, interner: &Interner) -> Option<Self> {
143 if let Some(id) = blank_node.unique_id() {
144 interner
145 .string_for_blank_node_id
146 .contains_key(&id)
147 .then_some(Self::Number { id })
148 } else {
149 Some(Self::Other {
150 id: interner.get(blank_node.as_str())?,
151 })
152 }
153 }
154
155 pub fn decode_from(self, interner: &Interner) -> BlankNodeRef<'_> {
156 BlankNodeRef::new_unchecked(match self {
157 Self::Number { id } => &interner.string_for_blank_node_id[&id],
158 Self::Other { id } => interner.resolve(id),
159 })
160 }
161
162 pub fn next(self) -> Self {
163 match self {
164 Self::Number { id } => Self::Number {
165 id: id.saturating_add(1),
166 },
167 Self::Other { id } => Self::Other { id: id.next() },
168 }
169 }
170}
171
172#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
173pub enum InternedLiteral {
174 String {
175 value_id: Key,
176 },
177 LanguageTaggedString {
178 value_id: Key,
179 language_id: Key,
180 },
181 TypedLiteral {
182 value_id: Key,
183 datatype: InternedNamedNode,
184 },
185}
186
187impl InternedLiteral {
188 pub fn encoded_into(literal: LiteralRef<'_>, interner: &mut Interner) -> Self {
189 let value_id = interner.get_or_intern(literal.value());
190 if literal.is_plain() {
191 if let Some(language) = literal.language() {
192 Self::LanguageTaggedString {
193 value_id,
194 language_id: interner.get_or_intern(language),
195 }
196 } else {
197 Self::String { value_id }
198 }
199 } else {
200 Self::TypedLiteral {
201 value_id,
202 datatype: InternedNamedNode::encoded_into(literal.datatype(), interner),
203 }
204 }
205 }
206
207 pub fn encoded_from(literal: LiteralRef<'_>, interner: &Interner) -> Option<Self> {
208 let value_id = interner.get(literal.value())?;
209 Some(if literal.is_plain() {
210 if let Some(language) = literal.language() {
211 Self::LanguageTaggedString {
212 value_id,
213 language_id: interner.get(language)?,
214 }
215 } else {
216 Self::String { value_id }
217 }
218 } else {
219 Self::TypedLiteral {
220 value_id,
221 datatype: InternedNamedNode::encoded_from(literal.datatype(), interner)?,
222 }
223 })
224 }
225
226 pub fn decode_from<'a>(&self, interner: &'a Interner) -> LiteralRef<'a> {
227 match self {
228 Self::String { value_id } => {
229 LiteralRef::new_simple_literal(interner.resolve(*value_id))
230 }
231 Self::LanguageTaggedString {
232 value_id,
233 language_id,
234 } => LiteralRef::new_language_tagged_literal_unchecked(
235 interner.resolve(*value_id),
236 interner.resolve(*language_id),
237 ),
238 Self::TypedLiteral { value_id, datatype } => LiteralRef::new_typed_literal(
239 interner.resolve(*value_id),
240 datatype.decode_from(interner),
241 ),
242 }
243 }
244
245 pub fn next(&self) -> Self {
246 match self {
247 Self::String { value_id } => Self::String {
248 value_id: value_id.next(),
249 },
250 Self::LanguageTaggedString {
251 value_id,
252 language_id,
253 } => Self::LanguageTaggedString {
254 value_id: *value_id,
255 language_id: language_id.next(),
256 },
257 Self::TypedLiteral { value_id, datatype } => Self::TypedLiteral {
258 value_id: *value_id,
259 datatype: datatype.next(),
260 },
261 }
262 }
263}
264
265#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
266pub enum InternedSubject {
267 NamedNode(InternedNamedNode),
268 BlankNode(InternedBlankNode),
269 #[cfg(feature = "rdf-star")]
270 Triple(Box<InternedTriple>),
271}
272
273impl InternedSubject {
274 pub fn encoded_into(node: SubjectRef<'_>, interner: &mut Interner) -> Self {
275 match node {
276 SubjectRef::NamedNode(node) => {
277 Self::NamedNode(InternedNamedNode::encoded_into(node, interner))
278 }
279 SubjectRef::BlankNode(node) => {
280 Self::BlankNode(InternedBlankNode::encoded_into(node, interner))
281 }
282 #[cfg(feature = "rdf-star")]
283 SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into(
284 triple.as_ref(),
285 interner,
286 ))),
287 }
288 }
289
290 pub fn encoded_from(node: SubjectRef<'_>, interner: &Interner) -> Option<Self> {
291 Some(match node {
292 SubjectRef::NamedNode(node) => {
293 Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?)
294 }
295 SubjectRef::BlankNode(node) => {
296 Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?)
297 }
298 #[cfg(feature = "rdf-star")]
299 SubjectRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from(
300 triple.as_ref(),
301 interner,
302 )?)),
303 })
304 }
305
306 pub fn decode_from<'a>(&self, interner: &'a Interner) -> SubjectRef<'a> {
307 match self {
308 Self::NamedNode(node) => SubjectRef::NamedNode(node.decode_from(interner)),
309 Self::BlankNode(node) => SubjectRef::BlankNode(node.decode_from(interner)),
310 #[cfg(feature = "rdf-star")]
311 Self::Triple(triple) => SubjectRef::Triple(&interner.triples[triple.as_ref()]),
312 }
313 }
314
315 pub fn first() -> Self {
316 Self::NamedNode(InternedNamedNode::first())
317 }
318
319 pub fn next(&self) -> Self {
320 match self {
321 Self::NamedNode(node) => Self::NamedNode(node.next()),
322 Self::BlankNode(node) => Self::BlankNode(node.next()),
323 #[cfg(feature = "rdf-star")]
324 Self::Triple(triple) => Self::Triple(Box::new(triple.next())),
325 }
326 }
327
328 pub fn impossible() -> Self {
329 Self::NamedNode(InternedNamedNode::impossible())
330 }
331}
332
333#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
334pub enum InternedGraphName {
335 DefaultGraph,
336 NamedNode(InternedNamedNode),
337 BlankNode(InternedBlankNode),
338}
339
340impl InternedGraphName {
341 pub fn encoded_into(node: GraphNameRef<'_>, interner: &mut Interner) -> Self {
342 match node {
343 GraphNameRef::DefaultGraph => Self::DefaultGraph,
344 GraphNameRef::NamedNode(node) => {
345 Self::NamedNode(InternedNamedNode::encoded_into(node, interner))
346 }
347 GraphNameRef::BlankNode(node) => {
348 Self::BlankNode(InternedBlankNode::encoded_into(node, interner))
349 }
350 }
351 }
352
353 pub fn encoded_from(node: GraphNameRef<'_>, interner: &Interner) -> Option<Self> {
354 Some(match node {
355 GraphNameRef::DefaultGraph => Self::DefaultGraph,
356 GraphNameRef::NamedNode(node) => {
357 Self::NamedNode(InternedNamedNode::encoded_from(node, interner)?)
358 }
359 GraphNameRef::BlankNode(node) => {
360 Self::BlankNode(InternedBlankNode::encoded_from(node, interner)?)
361 }
362 })
363 }
364
365 pub fn decode_from<'a>(&self, interner: &'a Interner) -> GraphNameRef<'a> {
366 match self {
367 Self::DefaultGraph => GraphNameRef::DefaultGraph,
368 Self::NamedNode(node) => GraphNameRef::NamedNode(node.decode_from(interner)),
369 Self::BlankNode(node) => GraphNameRef::BlankNode(node.decode_from(interner)),
370 }
371 }
372
373 pub fn first() -> Self {
374 Self::DefaultGraph
375 }
376
377 pub fn next(&self) -> Self {
378 match self {
379 Self::DefaultGraph => Self::NamedNode(InternedNamedNode::first()),
380 Self::NamedNode(node) => Self::NamedNode(node.next()),
381 Self::BlankNode(node) => Self::BlankNode(node.next()),
382 }
383 }
384
385 pub fn impossible() -> Self {
386 Self::NamedNode(InternedNamedNode::impossible())
387 }
388}
389
390#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
391pub enum InternedTerm {
392 NamedNode(InternedNamedNode),
393 BlankNode(InternedBlankNode),
394 Literal(InternedLiteral),
395 #[cfg(feature = "rdf-star")]
396 Triple(Box<InternedTriple>),
397}
398
399impl InternedTerm {
400 pub fn encoded_into(term: TermRef<'_>, interner: &mut Interner) -> Self {
401 match term {
402 TermRef::NamedNode(term) => {
403 Self::NamedNode(InternedNamedNode::encoded_into(term, interner))
404 }
405 TermRef::BlankNode(term) => {
406 Self::BlankNode(InternedBlankNode::encoded_into(term, interner))
407 }
408 TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_into(term, interner)),
409 #[cfg(feature = "rdf-star")]
410 TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_into(
411 triple.as_ref(),
412 interner,
413 ))),
414 }
415 }
416
417 pub fn encoded_from(term: TermRef<'_>, interner: &Interner) -> Option<Self> {
418 Some(match term {
419 TermRef::NamedNode(term) => {
420 Self::NamedNode(InternedNamedNode::encoded_from(term, interner)?)
421 }
422 TermRef::BlankNode(term) => {
423 Self::BlankNode(InternedBlankNode::encoded_from(term, interner)?)
424 }
425 TermRef::Literal(term) => Self::Literal(InternedLiteral::encoded_from(term, interner)?),
426 #[cfg(feature = "rdf-star")]
427 TermRef::Triple(triple) => Self::Triple(Box::new(InternedTriple::encoded_from(
428 triple.as_ref(),
429 interner,
430 )?)),
431 })
432 }
433
434 pub fn decode_from<'a>(&self, interner: &'a Interner) -> TermRef<'a> {
435 match self {
436 Self::NamedNode(term) => TermRef::NamedNode(term.decode_from(interner)),
437 Self::BlankNode(term) => TermRef::BlankNode(term.decode_from(interner)),
438 Self::Literal(term) => TermRef::Literal(term.decode_from(interner)),
439 #[cfg(feature = "rdf-star")]
440 Self::Triple(triple) => TermRef::Triple(&interner.triples[triple.as_ref()]),
441 }
442 }
443
444 pub fn first() -> Self {
445 Self::NamedNode(InternedNamedNode::first())
446 }
447
448 pub fn next(&self) -> Self {
449 match self {
450 Self::NamedNode(node) => Self::NamedNode(node.next()),
451 Self::BlankNode(node) => Self::BlankNode(node.next()),
452 Self::Literal(node) => Self::Literal(node.next()),
453 #[cfg(feature = "rdf-star")]
454 Self::Triple(triple) => Self::Triple(Box::new(triple.next())),
455 }
456 }
457
458 pub fn impossible() -> Self {
459 Self::NamedNode(InternedNamedNode::impossible())
460 }
461}
462
463#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Hash)]
464pub struct InternedTriple {
465 pub subject: InternedSubject,
466 pub predicate: InternedNamedNode,
467 pub object: InternedTerm,
468}
469
470#[cfg(feature = "rdf-star")]
471impl InternedTriple {
472 pub fn encoded_into(triple: TripleRef<'_>, interner: &mut Interner) -> Self {
473 let interned_triple = Self {
474 subject: InternedSubject::encoded_into(triple.subject, interner),
475 predicate: InternedNamedNode::encoded_into(triple.predicate, interner),
476 object: InternedTerm::encoded_into(triple.object, interner),
477 };
478 interner
479 .triples
480 .insert(interned_triple.clone(), triple.into_owned());
481 interned_triple
482 }
483
484 pub fn encoded_from(triple: TripleRef<'_>, interner: &Interner) -> Option<Self> {
485 let interned_triple = Self {
486 subject: InternedSubject::encoded_from(triple.subject, interner)?,
487 predicate: InternedNamedNode::encoded_from(triple.predicate, interner)?,
488 object: InternedTerm::encoded_from(triple.object, interner)?,
489 };
490 interner
491 .triples
492 .contains_key(&interned_triple)
493 .then_some(interned_triple)
494 }
495
496 pub fn next(&self) -> Self {
497 Self {
498 subject: self.subject.clone(),
499 predicate: self.predicate,
500 object: self.object.next(),
501 }
502 }
503}
504
505#[derive(Default, Clone)]
506struct IdentityHasherBuilder;
507
508impl BuildHasher for IdentityHasherBuilder {
509 type Hasher = IdentityHasher;
510
511 fn build_hasher(&self) -> Self::Hasher {
512 Self::Hasher::default()
513 }
514}
515
516#[derive(Default)]
517struct IdentityHasher {
518 value: u64,
519}
520
521impl Hasher for IdentityHasher {
522 fn finish(&self) -> u64 {
523 self.value
524 }
525
526 fn write(&mut self, _bytes: &[u8]) {
527 unreachable!("Should only be used on u64 values")
528 }
529
530 fn write_u64(&mut self, i: u64) {
531 self.value = i
532 }
533}