oxigraph/storage/
numeric_encoder.rs

1#![allow(clippy::unreadable_literal)]
2
3use crate::model::*;
4use crate::storage::error::{CorruptionError, StorageError};
5use crate::storage::small_string::SmallString;
6use oxsdatatypes::*;
7use siphasher::sip128::{Hasher128, SipHasher24};
8use std::fmt::Debug;
9use std::hash::{Hash, Hasher};
10use std::mem::discriminant;
11use std::str;
12use std::sync::Arc;
13
14#[derive(Eq, PartialEq, Debug, Clone, Copy)]
15pub struct StrHash {
16    hash: [u8; 16],
17}
18
19impl StrHash {
20    pub fn new(value: &str) -> Self {
21        let mut hasher = SipHasher24::new();
22        hasher.write(value.as_bytes());
23        Self {
24            hash: u128::from(hasher.finish128()).to_be_bytes(),
25        }
26    }
27
28    #[inline]
29    pub fn from_be_bytes(hash: [u8; 16]) -> Self {
30        Self { hash }
31    }
32
33    #[inline]
34    pub fn to_be_bytes(self) -> [u8; 16] {
35        self.hash
36    }
37}
38
39impl Hash for StrHash {
40    #[inline]
41    #[allow(clippy::host_endian_bytes)]
42    fn hash<H: Hasher>(&self, state: &mut H) {
43        state.write_u128(u128::from_ne_bytes(self.hash))
44    }
45}
46
47#[derive(Debug, Clone)]
48pub enum EncodedTerm {
49    DefaultGraph, // TODO: do we still need it?
50    NamedNode {
51        iri_id: StrHash,
52    },
53    NumericalBlankNode {
54        id: [u8; 16],
55    },
56    SmallBlankNode(SmallString),
57    BigBlankNode {
58        id_id: StrHash,
59    },
60    SmallStringLiteral(SmallString),
61    BigStringLiteral {
62        value_id: StrHash,
63    },
64    SmallSmallLangStringLiteral {
65        value: SmallString,
66        language: SmallString,
67    },
68    SmallBigLangStringLiteral {
69        value: SmallString,
70        language_id: StrHash,
71    },
72    BigSmallLangStringLiteral {
73        value_id: StrHash,
74        language: SmallString,
75    },
76    BigBigLangStringLiteral {
77        value_id: StrHash,
78        language_id: StrHash,
79    },
80    SmallTypedLiteral {
81        value: SmallString,
82        datatype_id: StrHash,
83    },
84    BigTypedLiteral {
85        value_id: StrHash,
86        datatype_id: StrHash,
87    },
88    BooleanLiteral(Boolean),
89    FloatLiteral(Float),
90    DoubleLiteral(Double),
91    IntegerLiteral(Integer),
92    DecimalLiteral(Decimal),
93    DateTimeLiteral(DateTime),
94    TimeLiteral(Time),
95    DateLiteral(Date),
96    GYearMonthLiteral(GYearMonth),
97    GYearLiteral(GYear),
98    GMonthDayLiteral(GMonthDay),
99    GDayLiteral(GDay),
100    GMonthLiteral(GMonth),
101    DurationLiteral(Duration),
102    YearMonthDurationLiteral(YearMonthDuration),
103    DayTimeDurationLiteral(DayTimeDuration),
104    Triple(Arc<EncodedTriple>),
105}
106
107impl PartialEq for EncodedTerm {
108    fn eq(&self, other: &Self) -> bool {
109        discriminant(self) == discriminant(other)
110            && match (self, other) {
111                (Self::DefaultGraph, Self::DefaultGraph) => true,
112                (Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => {
113                    iri_id_a == iri_id_b
114                }
115                (Self::NumericalBlankNode { id: id_a }, Self::NumericalBlankNode { id: id_b }) => {
116                    id_a == id_b
117                }
118                (Self::SmallBlankNode(id_a), Self::SmallBlankNode(id_b)) => id_a == id_b,
119                (Self::BigBlankNode { id_id: id_a }, Self::BigBlankNode { id_id: id_b }) => {
120                    id_a == id_b
121                }
122                (Self::SmallStringLiteral(a), Self::SmallStringLiteral(b)) => a == b,
123                (
124                    Self::BigStringLiteral {
125                        value_id: value_id_a,
126                    },
127                    Self::BigStringLiteral {
128                        value_id: value_id_b,
129                    },
130                ) => value_id_a == value_id_b,
131                (
132                    Self::SmallSmallLangStringLiteral {
133                        value: value_a,
134                        language: language_a,
135                    },
136                    Self::SmallSmallLangStringLiteral {
137                        value: value_b,
138                        language: language_b,
139                    },
140                ) => value_a == value_b && language_a == language_b,
141                (
142                    Self::SmallBigLangStringLiteral {
143                        value: value_a,
144                        language_id: language_id_a,
145                    },
146                    Self::SmallBigLangStringLiteral {
147                        value: value_b,
148                        language_id: language_id_b,
149                    },
150                ) => value_a == value_b && language_id_a == language_id_b,
151                (
152                    Self::BigSmallLangStringLiteral {
153                        value_id: value_id_a,
154                        language: language_a,
155                    },
156                    Self::BigSmallLangStringLiteral {
157                        value_id: value_id_b,
158                        language: language_b,
159                    },
160                ) => value_id_a == value_id_b && language_a == language_b,
161                (
162                    Self::BigBigLangStringLiteral {
163                        value_id: value_id_a,
164                        language_id: language_id_a,
165                    },
166                    Self::BigBigLangStringLiteral {
167                        value_id: value_id_b,
168                        language_id: language_id_b,
169                    },
170                ) => value_id_a == value_id_b && language_id_a == language_id_b,
171                (
172                    Self::SmallTypedLiteral {
173                        value: value_a,
174                        datatype_id: datatype_id_a,
175                    },
176                    Self::SmallTypedLiteral {
177                        value: value_b,
178                        datatype_id: datatype_id_b,
179                    },
180                ) => value_a == value_b && datatype_id_a == datatype_id_b,
181                (
182                    Self::BigTypedLiteral {
183                        value_id: value_id_a,
184                        datatype_id: datatype_id_a,
185                    },
186                    Self::BigTypedLiteral {
187                        value_id: value_id_b,
188                        datatype_id: datatype_id_b,
189                    },
190                ) => value_id_a == value_id_b && datatype_id_a == datatype_id_b,
191                (Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b,
192                (Self::FloatLiteral(a), Self::FloatLiteral(b)) => a.is_identical_with(*b),
193                (Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => a.is_identical_with(*b),
194                (Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a.is_identical_with(*b),
195                (Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a.is_identical_with(*b),
196                (Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(*b),
197                (Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(*b),
198                (Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(*b),
199                (Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(*b),
200                (Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(*b),
201                (Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(*b),
202                (Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(*b),
203                (Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(*b),
204                (Self::DurationLiteral(a), Self::DurationLiteral(b)) => a.is_identical_with(*b),
205                (Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => {
206                    a.is_identical_with(*b)
207                }
208                (Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => {
209                    a.is_identical_with(*b)
210                }
211                (Self::Triple(a), Self::Triple(b)) => a == b,
212                (_, _) => unreachable!(),
213            }
214    }
215}
216
217impl Eq for EncodedTerm {}
218
219impl Hash for EncodedTerm {
220    fn hash<H: Hasher>(&self, state: &mut H) {
221        discriminant(self).hash(state);
222        match self {
223            Self::NamedNode { iri_id } => iri_id.hash(state),
224            Self::NumericalBlankNode { id } => id.hash(state),
225            Self::SmallBlankNode(id) => id.hash(state),
226            Self::BigBlankNode { id_id } => id_id.hash(state),
227            Self::DefaultGraph => (),
228            Self::SmallStringLiteral(value) => value.hash(state),
229            Self::BigStringLiteral { value_id } => value_id.hash(state),
230            Self::SmallSmallLangStringLiteral { value, language } => {
231                value.hash(state);
232                language.hash(state);
233            }
234            Self::SmallBigLangStringLiteral { value, language_id } => {
235                value.hash(state);
236                language_id.hash(state);
237            }
238            Self::BigSmallLangStringLiteral { value_id, language } => {
239                value_id.hash(state);
240                language.hash(state);
241            }
242            Self::BigBigLangStringLiteral {
243                value_id,
244                language_id,
245            } => {
246                value_id.hash(state);
247                language_id.hash(state);
248            }
249            Self::SmallTypedLiteral { value, datatype_id } => {
250                value.hash(state);
251                datatype_id.hash(state);
252            }
253            Self::BigTypedLiteral {
254                value_id,
255                datatype_id,
256            } => {
257                value_id.hash(state);
258                datatype_id.hash(state);
259            }
260            Self::BooleanLiteral(value) => value.hash(state),
261            Self::FloatLiteral(value) => value.to_be_bytes().hash(state),
262            Self::DoubleLiteral(value) => value.to_be_bytes().hash(state),
263            Self::IntegerLiteral(value) => value.hash(state),
264            Self::DecimalLiteral(value) => value.hash(state),
265            Self::DateTimeLiteral(value) => value.hash(state),
266            Self::TimeLiteral(value) => value.hash(state),
267            Self::DateLiteral(value) => value.hash(state),
268            Self::GYearMonthLiteral(value) => value.hash(state),
269            Self::GYearLiteral(value) => value.hash(state),
270            Self::GMonthDayLiteral(value) => value.hash(state),
271            Self::GDayLiteral(value) => value.hash(state),
272            Self::GMonthLiteral(value) => value.hash(state),
273            Self::DurationLiteral(value) => value.hash(state),
274            Self::YearMonthDurationLiteral(value) => value.hash(state),
275            Self::DayTimeDurationLiteral(value) => value.hash(state),
276            Self::Triple(value) => value.hash(state),
277        }
278    }
279}
280
281impl EncodedTerm {
282    pub fn is_default_graph(&self) -> bool {
283        matches!(self, Self::DefaultGraph)
284    }
285}
286impl From<NamedNodeRef<'_>> for EncodedTerm {
287    fn from(named_node: NamedNodeRef<'_>) -> Self {
288        Self::NamedNode {
289            iri_id: StrHash::new(named_node.as_str()),
290        }
291    }
292}
293
294impl From<BlankNodeRef<'_>> for EncodedTerm {
295    fn from(blank_node: BlankNodeRef<'_>) -> Self {
296        if let Some(id) = blank_node.unique_id() {
297            Self::NumericalBlankNode {
298                id: id.to_be_bytes(),
299            }
300        } else {
301            let id = blank_node.as_str();
302            if let Ok(id) = id.try_into() {
303                Self::SmallBlankNode(id)
304            } else {
305                Self::BigBlankNode {
306                    id_id: StrHash::new(id),
307                }
308            }
309        }
310    }
311}
312
313impl From<LiteralRef<'_>> for EncodedTerm {
314    fn from(literal: LiteralRef<'_>) -> Self {
315        let value = literal.value();
316        let datatype = literal.datatype().as_str();
317        let native_encoding = match datatype {
318            "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" => {
319                literal.language().map(|language| {
320                    if let Ok(value) = SmallString::try_from(value) {
321                        if let Ok(language) = SmallString::try_from(language) {
322                            Self::SmallSmallLangStringLiteral { value, language }
323                        } else {
324                            Self::SmallBigLangStringLiteral {
325                                value,
326                                language_id: StrHash::new(language),
327                            }
328                        }
329                    } else if let Ok(language) = SmallString::try_from(language) {
330                        Self::BigSmallLangStringLiteral {
331                            value_id: StrHash::new(value),
332                            language,
333                        }
334                    } else {
335                        Self::BigBigLangStringLiteral {
336                            value_id: StrHash::new(value),
337                            language_id: StrHash::new(language),
338                        }
339                    }
340                })
341            }
342            "http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
343            "http://www.w3.org/2001/XMLSchema#string" => {
344                Some(if let Ok(value) = SmallString::try_from(value) {
345                    Self::SmallStringLiteral(value)
346                } else {
347                    Self::BigStringLiteral {
348                        value_id: StrHash::new(value),
349                    }
350                })
351            }
352            "http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value),
353            "http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value),
354            "http://www.w3.org/2001/XMLSchema#integer"
355            | "http://www.w3.org/2001/XMLSchema#byte"
356            | "http://www.w3.org/2001/XMLSchema#short"
357            | "http://www.w3.org/2001/XMLSchema#int"
358            | "http://www.w3.org/2001/XMLSchema#long"
359            | "http://www.w3.org/2001/XMLSchema#unsignedByte"
360            | "http://www.w3.org/2001/XMLSchema#unsignedShort"
361            | "http://www.w3.org/2001/XMLSchema#unsignedInt"
362            | "http://www.w3.org/2001/XMLSchema#unsignedLong"
363            | "http://www.w3.org/2001/XMLSchema#positiveInteger"
364            | "http://www.w3.org/2001/XMLSchema#negativeInteger"
365            | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
366            | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => parse_integer_str(value),
367            "http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(value),
368            "http://www.w3.org/2001/XMLSchema#dateTime"
369            | "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => parse_date_time_str(value),
370            "http://www.w3.org/2001/XMLSchema#time" => parse_time_str(value),
371            "http://www.w3.org/2001/XMLSchema#date" => parse_date_str(value),
372            "http://www.w3.org/2001/XMLSchema#gYearMonth" => parse_g_year_month_str(value),
373            "http://www.w3.org/2001/XMLSchema#gYear" => parse_g_year_str(value),
374            "http://www.w3.org/2001/XMLSchema#gMonthDay" => parse_g_month_day_str(value),
375            "http://www.w3.org/2001/XMLSchema#gDay" => parse_g_day_str(value),
376            "http://www.w3.org/2001/XMLSchema#gMonth" => parse_g_month_str(value),
377            "http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(value),
378            "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => {
379                parse_year_month_duration_str(value)
380            }
381            "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => {
382                parse_day_time_duration_str(value)
383            }
384            _ => None,
385        };
386        match native_encoding {
387            Some(term) => term,
388            None => {
389                if let Ok(value) = SmallString::try_from(value) {
390                    Self::SmallTypedLiteral {
391                        value,
392                        datatype_id: StrHash::new(datatype),
393                    }
394                } else {
395                    Self::BigTypedLiteral {
396                        value_id: StrHash::new(value),
397                        datatype_id: StrHash::new(datatype),
398                    }
399                }
400            }
401        }
402    }
403}
404
405impl From<NamedOrBlankNodeRef<'_>> for EncodedTerm {
406    fn from(term: NamedOrBlankNodeRef<'_>) -> Self {
407        match term {
408            NamedOrBlankNodeRef::NamedNode(named_node) => named_node.into(),
409            NamedOrBlankNodeRef::BlankNode(blank_node) => blank_node.into(),
410        }
411    }
412}
413
414impl From<SubjectRef<'_>> for EncodedTerm {
415    fn from(term: SubjectRef<'_>) -> Self {
416        match term {
417            SubjectRef::NamedNode(named_node) => named_node.into(),
418            SubjectRef::BlankNode(blank_node) => blank_node.into(),
419            SubjectRef::Triple(triple) => triple.as_ref().into(),
420        }
421    }
422}
423
424impl From<TermRef<'_>> for EncodedTerm {
425    fn from(term: TermRef<'_>) -> Self {
426        match term {
427            TermRef::NamedNode(named_node) => named_node.into(),
428            TermRef::BlankNode(blank_node) => blank_node.into(),
429            TermRef::Literal(literal) => literal.into(),
430            TermRef::Triple(triple) => triple.as_ref().into(),
431        }
432    }
433}
434
435impl From<GraphNameRef<'_>> for EncodedTerm {
436    fn from(name: GraphNameRef<'_>) -> Self {
437        match name {
438            GraphNameRef::NamedNode(named_node) => named_node.into(),
439            GraphNameRef::BlankNode(blank_node) => blank_node.into(),
440            GraphNameRef::DefaultGraph => Self::DefaultGraph,
441        }
442    }
443}
444
445impl From<TripleRef<'_>> for EncodedTerm {
446    fn from(triple: TripleRef<'_>) -> Self {
447        Self::Triple(Arc::new(triple.into()))
448    }
449}
450
451#[derive(Eq, PartialEq, Debug, Clone, Hash)]
452pub struct EncodedTriple {
453    pub subject: EncodedTerm,
454    pub predicate: EncodedTerm,
455    pub object: EncodedTerm,
456}
457
458impl EncodedTriple {
459    pub fn new(subject: EncodedTerm, predicate: EncodedTerm, object: EncodedTerm) -> Self {
460        Self {
461            subject,
462            predicate,
463            object,
464        }
465    }
466}
467
468impl From<TripleRef<'_>> for EncodedTriple {
469    fn from(triple: TripleRef<'_>) -> Self {
470        Self {
471            subject: triple.subject.into(),
472            predicate: triple.predicate.into(),
473            object: triple.object.into(),
474        }
475    }
476}
477
478#[derive(Eq, PartialEq, Debug, Clone, Hash)]
479pub struct EncodedQuad {
480    pub subject: EncodedTerm,
481    pub predicate: EncodedTerm,
482    pub object: EncodedTerm,
483    pub graph_name: EncodedTerm,
484}
485
486impl EncodedQuad {
487    pub fn new(
488        subject: EncodedTerm,
489        predicate: EncodedTerm,
490        object: EncodedTerm,
491        graph_name: EncodedTerm,
492    ) -> Self {
493        Self {
494            subject,
495            predicate,
496            object,
497            graph_name,
498        }
499    }
500}
501
502impl From<QuadRef<'_>> for EncodedQuad {
503    fn from(quad: QuadRef<'_>) -> Self {
504        Self {
505            subject: quad.subject.into(),
506            predicate: quad.predicate.into(),
507            object: quad.object.into(),
508            graph_name: quad.graph_name.into(),
509        }
510    }
511}
512
513pub trait StrLookup {
514    fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError>;
515}
516
517pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
518    term: TermRef<'_>,
519    encoded: &EncodedTerm,
520    insert_str: &mut F,
521) -> Result<(), StorageError> {
522    match term {
523        TermRef::NamedNode(node) => {
524            if let EncodedTerm::NamedNode { iri_id } = encoded {
525                insert_str(iri_id, node.as_str())
526            } else {
527                Err(CorruptionError::from_encoded_term(encoded, &term).into())
528            }
529        }
530        TermRef::BlankNode(node) => match encoded {
531            EncodedTerm::BigBlankNode { id_id } => insert_str(id_id, node.as_str()),
532            EncodedTerm::SmallBlankNode(..) | EncodedTerm::NumericalBlankNode { .. } => Ok(()),
533            _ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
534        },
535        TermRef::Literal(literal) => match encoded {
536            EncodedTerm::BigStringLiteral { value_id }
537            | EncodedTerm::BigSmallLangStringLiteral { value_id, .. } => {
538                insert_str(value_id, literal.value())
539            }
540            EncodedTerm::SmallBigLangStringLiteral { language_id, .. } => {
541                if let Some(language) = literal.language() {
542                    insert_str(language_id, language)
543                } else {
544                    Err(CorruptionError::from_encoded_term(encoded, &term).into())
545                }
546            }
547            EncodedTerm::BigBigLangStringLiteral {
548                value_id,
549                language_id,
550            } => {
551                insert_str(value_id, literal.value())?;
552                if let Some(language) = literal.language() {
553                    insert_str(language_id, language)
554                } else {
555                    Err(CorruptionError::from_encoded_term(encoded, &term).into())
556                }
557            }
558            EncodedTerm::SmallTypedLiteral { datatype_id, .. } => {
559                insert_str(datatype_id, literal.datatype().as_str())
560            }
561            EncodedTerm::BigTypedLiteral {
562                value_id,
563                datatype_id,
564            } => {
565                insert_str(value_id, literal.value())?;
566                insert_str(datatype_id, literal.datatype().as_str())
567            }
568            EncodedTerm::SmallStringLiteral(..)
569            | EncodedTerm::SmallSmallLangStringLiteral { .. }
570            | EncodedTerm::BooleanLiteral(..)
571            | EncodedTerm::FloatLiteral(..)
572            | EncodedTerm::DoubleLiteral(..)
573            | EncodedTerm::IntegerLiteral(..)
574            | EncodedTerm::DecimalLiteral(..)
575            | EncodedTerm::DateTimeLiteral(..)
576            | EncodedTerm::TimeLiteral(..)
577            | EncodedTerm::DateLiteral(..)
578            | EncodedTerm::GYearMonthLiteral(..)
579            | EncodedTerm::GYearLiteral(..)
580            | EncodedTerm::GMonthDayLiteral(..)
581            | EncodedTerm::GDayLiteral(..)
582            | EncodedTerm::GMonthLiteral(..)
583            | EncodedTerm::DurationLiteral(..)
584            | EncodedTerm::YearMonthDurationLiteral(..)
585            | EncodedTerm::DayTimeDurationLiteral(..) => Ok(()),
586            _ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
587        },
588        TermRef::Triple(triple) => {
589            if let EncodedTerm::Triple(encoded) = encoded {
590                insert_term(triple.subject.as_ref().into(), &encoded.subject, insert_str)?;
591                insert_term(
592                    triple.predicate.as_ref().into(),
593                    &encoded.predicate,
594                    insert_str,
595                )?;
596                insert_term(triple.object.as_ref(), &encoded.object, insert_str)
597            } else {
598                Err(CorruptionError::from_encoded_term(encoded, &term).into())
599            }
600        }
601    }
602}
603
604pub fn parse_boolean_str(value: &str) -> Option<EncodedTerm> {
605    value.parse().map(EncodedTerm::BooleanLiteral).ok()
606}
607
608pub fn parse_float_str(value: &str) -> Option<EncodedTerm> {
609    value.parse().map(EncodedTerm::FloatLiteral).ok()
610}
611
612pub fn parse_double_str(value: &str) -> Option<EncodedTerm> {
613    value.parse().map(EncodedTerm::DoubleLiteral).ok()
614}
615
616pub fn parse_integer_str(value: &str) -> Option<EncodedTerm> {
617    value.parse().map(EncodedTerm::IntegerLiteral).ok()
618}
619
620pub fn parse_decimal_str(value: &str) -> Option<EncodedTerm> {
621    value.parse().map(EncodedTerm::DecimalLiteral).ok()
622}
623
624pub fn parse_date_time_str(value: &str) -> Option<EncodedTerm> {
625    value.parse().map(EncodedTerm::DateTimeLiteral).ok()
626}
627
628pub fn parse_time_str(value: &str) -> Option<EncodedTerm> {
629    value.parse().map(EncodedTerm::TimeLiteral).ok()
630}
631
632pub fn parse_date_str(value: &str) -> Option<EncodedTerm> {
633    value.parse().map(EncodedTerm::DateLiteral).ok()
634}
635
636pub fn parse_g_year_month_str(value: &str) -> Option<EncodedTerm> {
637    value.parse().map(EncodedTerm::GYearMonthLiteral).ok()
638}
639
640pub fn parse_g_year_str(value: &str) -> Option<EncodedTerm> {
641    value.parse().map(EncodedTerm::GYearLiteral).ok()
642}
643
644pub fn parse_g_month_day_str(value: &str) -> Option<EncodedTerm> {
645    value.parse().map(EncodedTerm::GMonthDayLiteral).ok()
646}
647
648pub fn parse_g_day_str(value: &str) -> Option<EncodedTerm> {
649    value.parse().map(EncodedTerm::GDayLiteral).ok()
650}
651
652pub fn parse_g_month_str(value: &str) -> Option<EncodedTerm> {
653    value.parse().map(EncodedTerm::GMonthLiteral).ok()
654}
655
656pub fn parse_duration_str(value: &str) -> Option<EncodedTerm> {
657    value.parse().map(EncodedTerm::DurationLiteral).ok()
658}
659
660pub fn parse_year_month_duration_str(value: &str) -> Option<EncodedTerm> {
661    value
662        .parse()
663        .map(EncodedTerm::YearMonthDurationLiteral)
664        .ok()
665}
666
667pub fn parse_day_time_duration_str(value: &str) -> Option<EncodedTerm> {
668    value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok()
669}
670
671pub trait Decoder: StrLookup {
672    fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError>;
673
674    fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, StorageError> {
675        match self.decode_term(encoded)? {
676            Term::NamedNode(named_node) => Ok(named_node.into()),
677            Term::BlankNode(blank_node) => Ok(blank_node.into()),
678            Term::Literal(_) => Err(CorruptionError::msg(
679                "A literal has been found instead of a subject node",
680            )
681            .into()),
682            Term::Triple(triple) => Ok(Subject::Triple(triple)),
683        }
684    }
685
686    fn decode_named_or_blank_node(
687        &self,
688        encoded: &EncodedTerm,
689    ) -> Result<NamedOrBlankNode, StorageError> {
690        match self.decode_term(encoded)? {
691            Term::NamedNode(named_node) => Ok(named_node.into()),
692            Term::BlankNode(blank_node) => Ok(blank_node.into()),
693            Term::Literal(_) => Err(CorruptionError::msg(
694                "A literal has been found instead of a named or blank node",
695            )
696            .into()),
697            Term::Triple(_) => Err(CorruptionError::msg(
698                "A triple has been found instead of a named or blank node",
699            )
700            .into()),
701        }
702    }
703
704    fn decode_named_node(&self, encoded: &EncodedTerm) -> Result<NamedNode, StorageError> {
705        match self.decode_term(encoded)? {
706            Term::NamedNode(named_node) => Ok(named_node),
707            Term::BlankNode(_) => Err(CorruptionError::msg(
708                "A blank node has been found instead of a named node",
709            )
710            .into()),
711            Term::Literal(_) => {
712                Err(CorruptionError::msg("A literal has been found instead of a named node").into())
713            }
714            Term::Triple(_) => {
715                Err(CorruptionError::msg("A triple has been found instead of a named node").into())
716            }
717        }
718    }
719
720    fn decode_triple(&self, encoded: &EncodedTriple) -> Result<Triple, StorageError> {
721        Ok(Triple::new(
722            self.decode_subject(&encoded.subject)?,
723            self.decode_named_node(&encoded.predicate)?,
724            self.decode_term(&encoded.object)?,
725        ))
726    }
727
728    fn decode_quad(&self, encoded: &EncodedQuad) -> Result<Quad, StorageError> {
729        Ok(Quad::new(
730            self.decode_subject(&encoded.subject)?,
731            self.decode_named_node(&encoded.predicate)?,
732            self.decode_term(&encoded.object)?,
733            if encoded.graph_name == EncodedTerm::DefaultGraph {
734                GraphName::DefaultGraph
735            } else {
736                match self.decode_term(&encoded.graph_name)? {
737                    Term::NamedNode(named_node) => named_node.into(),
738                    Term::BlankNode(blank_node) => blank_node.into(),
739                    Term::Literal(_) => {
740                        return Err(
741                            CorruptionError::msg("A literal is not a valid graph name").into()
742                        )
743                    }
744                    Term::Triple(_) => {
745                        return Err(
746                            CorruptionError::msg("A triple is not a valid graph name").into()
747                        )
748                    }
749                }
750            },
751        ))
752    }
753}
754
755impl<S: StrLookup> Decoder for S {
756    fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError> {
757        match encoded {
758            EncodedTerm::DefaultGraph => {
759                Err(CorruptionError::msg("The default graph tag is not a valid term").into())
760            }
761            EncodedTerm::NamedNode { iri_id } => {
762                Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into())
763            }
764            EncodedTerm::NumericalBlankNode { id } => {
765                Ok(BlankNode::new_from_unique_id(u128::from_be_bytes(*id)).into())
766            }
767            EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
768            EncodedTerm::BigBlankNode { id_id } => {
769                Ok(BlankNode::new_unchecked(get_required_str(self, id_id)?).into())
770            }
771            EncodedTerm::SmallStringLiteral(value) => {
772                Ok(Literal::new_simple_literal(*value).into())
773            }
774            EncodedTerm::BigStringLiteral { value_id } => {
775                Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into())
776            }
777            EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
778                Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into())
779            }
780            EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
781                Ok(Literal::new_language_tagged_literal_unchecked(
782                    *value,
783                    get_required_str(self, language_id)?,
784                )
785                .into())
786            }
787            EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
788                Ok(Literal::new_language_tagged_literal_unchecked(
789                    get_required_str(self, value_id)?,
790                    *language,
791                )
792                .into())
793            }
794            EncodedTerm::BigBigLangStringLiteral {
795                value_id,
796                language_id,
797            } => Ok(Literal::new_language_tagged_literal_unchecked(
798                get_required_str(self, value_id)?,
799                get_required_str(self, language_id)?,
800            )
801            .into()),
802            EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
803                Ok(Literal::new_typed_literal(
804                    *value,
805                    NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
806                )
807                .into())
808            }
809            EncodedTerm::BigTypedLiteral {
810                value_id,
811                datatype_id,
812            } => Ok(Literal::new_typed_literal(
813                get_required_str(self, value_id)?,
814                NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
815            )
816            .into()),
817            EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(*value).into()),
818            EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()),
819            EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()),
820            EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(*value).into()),
821            EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(*value).into()),
822            EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(*value).into()),
823            EncodedTerm::DateLiteral(value) => Ok(Literal::from(*value).into()),
824            EncodedTerm::TimeLiteral(value) => Ok(Literal::from(*value).into()),
825            EncodedTerm::GYearMonthLiteral(value) => Ok(Literal::from(*value).into()),
826            EncodedTerm::GYearLiteral(value) => Ok(Literal::from(*value).into()),
827            EncodedTerm::GMonthDayLiteral(value) => Ok(Literal::from(*value).into()),
828            EncodedTerm::GDayLiteral(value) => Ok(Literal::from(*value).into()),
829            EncodedTerm::GMonthLiteral(value) => Ok(Literal::from(*value).into()),
830            EncodedTerm::DurationLiteral(value) => Ok(Literal::from(*value).into()),
831            EncodedTerm::YearMonthDurationLiteral(value) => Ok(Literal::from(*value).into()),
832            EncodedTerm::DayTimeDurationLiteral(value) => Ok(Literal::from(*value).into()),
833            EncodedTerm::Triple(triple) => Ok(self.decode_triple(triple)?.into()),
834        }
835    }
836}
837
838fn get_required_str<L: StrLookup>(lookup: &L, id: &StrHash) -> Result<String, StorageError> {
839    Ok(lookup.get_str(id)?.ok_or_else(|| {
840        CorruptionError::new(format!(
841            "Not able to find the string with id {id:?} in the string store"
842        ))
843    })?)
844}
845
846#[derive(Default)]
847pub struct StrHashHasher {
848    value: u64,
849}
850
851impl Hasher for StrHashHasher {
852    #[inline]
853    fn finish(&self) -> u64 {
854        self.value
855    }
856
857    fn write(&mut self, _: &[u8]) {
858        unreachable!("Must only be used on StrHash")
859    }
860
861    #[inline]
862    #[allow(clippy::cast_possible_truncation)]
863    fn write_u128(&mut self, i: u128) {
864        self.value = i as u64;
865    }
866}
867
868#[cfg(test)]
869mod tests {
870    use super::*;
871    #[cfg(target_pointer_width = "64")]
872    use std::mem::{align_of, size_of};
873
874    #[test]
875    fn str_hash_stability() {
876        const EMPTY_HASH: [u8; 16] = [
877            244, 242, 206, 212, 71, 171, 2, 66, 125, 224, 163, 128, 71, 215, 73, 80,
878        ];
879
880        const FOO_HASH: [u8; 16] = [
881            177, 216, 59, 176, 7, 47, 87, 243, 76, 253, 150, 32, 126, 153, 216, 19,
882        ];
883
884        assert_eq!(StrHash::new("").to_be_bytes(), EMPTY_HASH);
885        assert_eq!(StrHash::from_be_bytes(EMPTY_HASH).to_be_bytes(), EMPTY_HASH);
886
887        assert_eq!(StrHash::new("foo").to_be_bytes(), FOO_HASH);
888        assert_eq!(StrHash::from_be_bytes(FOO_HASH).to_be_bytes(), FOO_HASH);
889    }
890
891    #[cfg(target_pointer_width = "64")]
892    #[test]
893    fn test_size_and_alignment() {
894        assert_eq!(size_of::<EncodedTerm>(), 40);
895        assert_eq!(size_of::<EncodedQuad>(), 160);
896        assert_eq!(align_of::<EncodedTerm>(), 8);
897    }
898}