1#![allow(clippy::unreadable_literal)]
2
3use crate::model::*;
4use crate::storage::error::{CorruptionError, StorageError};
5use crate::storage::small_string::SmallString;
6use oxsdatatypes::*;
7use siphasher::sip128::{Hasher128, SipHasher24};
8use std::fmt::Debug;
9use std::hash::{Hash, Hasher};
10use std::mem::discriminant;
11use std::str;
12use std::sync::Arc;
13
14#[derive(Eq, PartialEq, Debug, Clone, Copy)]
15pub struct StrHash {
16 hash: [u8; 16],
17}
18
19impl StrHash {
20 pub fn new(value: &str) -> Self {
21 let mut hasher = SipHasher24::new();
22 hasher.write(value.as_bytes());
23 Self {
24 hash: u128::from(hasher.finish128()).to_be_bytes(),
25 }
26 }
27
28 #[inline]
29 pub fn from_be_bytes(hash: [u8; 16]) -> Self {
30 Self { hash }
31 }
32
33 #[inline]
34 pub fn to_be_bytes(self) -> [u8; 16] {
35 self.hash
36 }
37}
38
39impl Hash for StrHash {
40 #[inline]
41 #[allow(clippy::host_endian_bytes)]
42 fn hash<H: Hasher>(&self, state: &mut H) {
43 state.write_u128(u128::from_ne_bytes(self.hash))
44 }
45}
46
47#[derive(Debug, Clone)]
48pub enum EncodedTerm {
49 DefaultGraph, NamedNode {
51 iri_id: StrHash,
52 },
53 NumericalBlankNode {
54 id: [u8; 16],
55 },
56 SmallBlankNode(SmallString),
57 BigBlankNode {
58 id_id: StrHash,
59 },
60 SmallStringLiteral(SmallString),
61 BigStringLiteral {
62 value_id: StrHash,
63 },
64 SmallSmallLangStringLiteral {
65 value: SmallString,
66 language: SmallString,
67 },
68 SmallBigLangStringLiteral {
69 value: SmallString,
70 language_id: StrHash,
71 },
72 BigSmallLangStringLiteral {
73 value_id: StrHash,
74 language: SmallString,
75 },
76 BigBigLangStringLiteral {
77 value_id: StrHash,
78 language_id: StrHash,
79 },
80 SmallTypedLiteral {
81 value: SmallString,
82 datatype_id: StrHash,
83 },
84 BigTypedLiteral {
85 value_id: StrHash,
86 datatype_id: StrHash,
87 },
88 BooleanLiteral(Boolean),
89 FloatLiteral(Float),
90 DoubleLiteral(Double),
91 IntegerLiteral(Integer),
92 DecimalLiteral(Decimal),
93 DateTimeLiteral(DateTime),
94 TimeLiteral(Time),
95 DateLiteral(Date),
96 GYearMonthLiteral(GYearMonth),
97 GYearLiteral(GYear),
98 GMonthDayLiteral(GMonthDay),
99 GDayLiteral(GDay),
100 GMonthLiteral(GMonth),
101 DurationLiteral(Duration),
102 YearMonthDurationLiteral(YearMonthDuration),
103 DayTimeDurationLiteral(DayTimeDuration),
104 Triple(Arc<EncodedTriple>),
105}
106
107impl PartialEq for EncodedTerm {
108 fn eq(&self, other: &Self) -> bool {
109 discriminant(self) == discriminant(other)
110 && match (self, other) {
111 (Self::DefaultGraph, Self::DefaultGraph) => true,
112 (Self::NamedNode { iri_id: iri_id_a }, Self::NamedNode { iri_id: iri_id_b }) => {
113 iri_id_a == iri_id_b
114 }
115 (Self::NumericalBlankNode { id: id_a }, Self::NumericalBlankNode { id: id_b }) => {
116 id_a == id_b
117 }
118 (Self::SmallBlankNode(id_a), Self::SmallBlankNode(id_b)) => id_a == id_b,
119 (Self::BigBlankNode { id_id: id_a }, Self::BigBlankNode { id_id: id_b }) => {
120 id_a == id_b
121 }
122 (Self::SmallStringLiteral(a), Self::SmallStringLiteral(b)) => a == b,
123 (
124 Self::BigStringLiteral {
125 value_id: value_id_a,
126 },
127 Self::BigStringLiteral {
128 value_id: value_id_b,
129 },
130 ) => value_id_a == value_id_b,
131 (
132 Self::SmallSmallLangStringLiteral {
133 value: value_a,
134 language: language_a,
135 },
136 Self::SmallSmallLangStringLiteral {
137 value: value_b,
138 language: language_b,
139 },
140 ) => value_a == value_b && language_a == language_b,
141 (
142 Self::SmallBigLangStringLiteral {
143 value: value_a,
144 language_id: language_id_a,
145 },
146 Self::SmallBigLangStringLiteral {
147 value: value_b,
148 language_id: language_id_b,
149 },
150 ) => value_a == value_b && language_id_a == language_id_b,
151 (
152 Self::BigSmallLangStringLiteral {
153 value_id: value_id_a,
154 language: language_a,
155 },
156 Self::BigSmallLangStringLiteral {
157 value_id: value_id_b,
158 language: language_b,
159 },
160 ) => value_id_a == value_id_b && language_a == language_b,
161 (
162 Self::BigBigLangStringLiteral {
163 value_id: value_id_a,
164 language_id: language_id_a,
165 },
166 Self::BigBigLangStringLiteral {
167 value_id: value_id_b,
168 language_id: language_id_b,
169 },
170 ) => value_id_a == value_id_b && language_id_a == language_id_b,
171 (
172 Self::SmallTypedLiteral {
173 value: value_a,
174 datatype_id: datatype_id_a,
175 },
176 Self::SmallTypedLiteral {
177 value: value_b,
178 datatype_id: datatype_id_b,
179 },
180 ) => value_a == value_b && datatype_id_a == datatype_id_b,
181 (
182 Self::BigTypedLiteral {
183 value_id: value_id_a,
184 datatype_id: datatype_id_a,
185 },
186 Self::BigTypedLiteral {
187 value_id: value_id_b,
188 datatype_id: datatype_id_b,
189 },
190 ) => value_id_a == value_id_b && datatype_id_a == datatype_id_b,
191 (Self::BooleanLiteral(a), Self::BooleanLiteral(b)) => a == b,
192 (Self::FloatLiteral(a), Self::FloatLiteral(b)) => a.is_identical_with(*b),
193 (Self::DoubleLiteral(a), Self::DoubleLiteral(b)) => a.is_identical_with(*b),
194 (Self::IntegerLiteral(a), Self::IntegerLiteral(b)) => a.is_identical_with(*b),
195 (Self::DecimalLiteral(a), Self::DecimalLiteral(b)) => a.is_identical_with(*b),
196 (Self::DateTimeLiteral(a), Self::DateTimeLiteral(b)) => a.is_identical_with(*b),
197 (Self::TimeLiteral(a), Self::TimeLiteral(b)) => a.is_identical_with(*b),
198 (Self::DateLiteral(a), Self::DateLiteral(b)) => a.is_identical_with(*b),
199 (Self::GYearMonthLiteral(a), Self::GYearMonthLiteral(b)) => a.is_identical_with(*b),
200 (Self::GYearLiteral(a), Self::GYearLiteral(b)) => a.is_identical_with(*b),
201 (Self::GMonthDayLiteral(a), Self::GMonthDayLiteral(b)) => a.is_identical_with(*b),
202 (Self::GMonthLiteral(a), Self::GMonthLiteral(b)) => a.is_identical_with(*b),
203 (Self::GDayLiteral(a), Self::GDayLiteral(b)) => a.is_identical_with(*b),
204 (Self::DurationLiteral(a), Self::DurationLiteral(b)) => a.is_identical_with(*b),
205 (Self::YearMonthDurationLiteral(a), Self::YearMonthDurationLiteral(b)) => {
206 a.is_identical_with(*b)
207 }
208 (Self::DayTimeDurationLiteral(a), Self::DayTimeDurationLiteral(b)) => {
209 a.is_identical_with(*b)
210 }
211 (Self::Triple(a), Self::Triple(b)) => a == b,
212 (_, _) => unreachable!(),
213 }
214 }
215}
216
217impl Eq for EncodedTerm {}
218
219impl Hash for EncodedTerm {
220 fn hash<H: Hasher>(&self, state: &mut H) {
221 discriminant(self).hash(state);
222 match self {
223 Self::NamedNode { iri_id } => iri_id.hash(state),
224 Self::NumericalBlankNode { id } => id.hash(state),
225 Self::SmallBlankNode(id) => id.hash(state),
226 Self::BigBlankNode { id_id } => id_id.hash(state),
227 Self::DefaultGraph => (),
228 Self::SmallStringLiteral(value) => value.hash(state),
229 Self::BigStringLiteral { value_id } => value_id.hash(state),
230 Self::SmallSmallLangStringLiteral { value, language } => {
231 value.hash(state);
232 language.hash(state);
233 }
234 Self::SmallBigLangStringLiteral { value, language_id } => {
235 value.hash(state);
236 language_id.hash(state);
237 }
238 Self::BigSmallLangStringLiteral { value_id, language } => {
239 value_id.hash(state);
240 language.hash(state);
241 }
242 Self::BigBigLangStringLiteral {
243 value_id,
244 language_id,
245 } => {
246 value_id.hash(state);
247 language_id.hash(state);
248 }
249 Self::SmallTypedLiteral { value, datatype_id } => {
250 value.hash(state);
251 datatype_id.hash(state);
252 }
253 Self::BigTypedLiteral {
254 value_id,
255 datatype_id,
256 } => {
257 value_id.hash(state);
258 datatype_id.hash(state);
259 }
260 Self::BooleanLiteral(value) => value.hash(state),
261 Self::FloatLiteral(value) => value.to_be_bytes().hash(state),
262 Self::DoubleLiteral(value) => value.to_be_bytes().hash(state),
263 Self::IntegerLiteral(value) => value.hash(state),
264 Self::DecimalLiteral(value) => value.hash(state),
265 Self::DateTimeLiteral(value) => value.hash(state),
266 Self::TimeLiteral(value) => value.hash(state),
267 Self::DateLiteral(value) => value.hash(state),
268 Self::GYearMonthLiteral(value) => value.hash(state),
269 Self::GYearLiteral(value) => value.hash(state),
270 Self::GMonthDayLiteral(value) => value.hash(state),
271 Self::GDayLiteral(value) => value.hash(state),
272 Self::GMonthLiteral(value) => value.hash(state),
273 Self::DurationLiteral(value) => value.hash(state),
274 Self::YearMonthDurationLiteral(value) => value.hash(state),
275 Self::DayTimeDurationLiteral(value) => value.hash(state),
276 Self::Triple(value) => value.hash(state),
277 }
278 }
279}
280
281impl EncodedTerm {
282 pub fn is_default_graph(&self) -> bool {
283 matches!(self, Self::DefaultGraph)
284 }
285}
286impl From<NamedNodeRef<'_>> for EncodedTerm {
287 fn from(named_node: NamedNodeRef<'_>) -> Self {
288 Self::NamedNode {
289 iri_id: StrHash::new(named_node.as_str()),
290 }
291 }
292}
293
294impl From<BlankNodeRef<'_>> for EncodedTerm {
295 fn from(blank_node: BlankNodeRef<'_>) -> Self {
296 if let Some(id) = blank_node.unique_id() {
297 Self::NumericalBlankNode {
298 id: id.to_be_bytes(),
299 }
300 } else {
301 let id = blank_node.as_str();
302 if let Ok(id) = id.try_into() {
303 Self::SmallBlankNode(id)
304 } else {
305 Self::BigBlankNode {
306 id_id: StrHash::new(id),
307 }
308 }
309 }
310 }
311}
312
313impl From<LiteralRef<'_>> for EncodedTerm {
314 fn from(literal: LiteralRef<'_>) -> Self {
315 let value = literal.value();
316 let datatype = literal.datatype().as_str();
317 let native_encoding = match datatype {
318 "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString" => {
319 literal.language().map(|language| {
320 if let Ok(value) = SmallString::try_from(value) {
321 if let Ok(language) = SmallString::try_from(language) {
322 Self::SmallSmallLangStringLiteral { value, language }
323 } else {
324 Self::SmallBigLangStringLiteral {
325 value,
326 language_id: StrHash::new(language),
327 }
328 }
329 } else if let Ok(language) = SmallString::try_from(language) {
330 Self::BigSmallLangStringLiteral {
331 value_id: StrHash::new(value),
332 language,
333 }
334 } else {
335 Self::BigBigLangStringLiteral {
336 value_id: StrHash::new(value),
337 language_id: StrHash::new(language),
338 }
339 }
340 })
341 }
342 "http://www.w3.org/2001/XMLSchema#boolean" => parse_boolean_str(value),
343 "http://www.w3.org/2001/XMLSchema#string" => {
344 Some(if let Ok(value) = SmallString::try_from(value) {
345 Self::SmallStringLiteral(value)
346 } else {
347 Self::BigStringLiteral {
348 value_id: StrHash::new(value),
349 }
350 })
351 }
352 "http://www.w3.org/2001/XMLSchema#float" => parse_float_str(value),
353 "http://www.w3.org/2001/XMLSchema#double" => parse_double_str(value),
354 "http://www.w3.org/2001/XMLSchema#integer"
355 | "http://www.w3.org/2001/XMLSchema#byte"
356 | "http://www.w3.org/2001/XMLSchema#short"
357 | "http://www.w3.org/2001/XMLSchema#int"
358 | "http://www.w3.org/2001/XMLSchema#long"
359 | "http://www.w3.org/2001/XMLSchema#unsignedByte"
360 | "http://www.w3.org/2001/XMLSchema#unsignedShort"
361 | "http://www.w3.org/2001/XMLSchema#unsignedInt"
362 | "http://www.w3.org/2001/XMLSchema#unsignedLong"
363 | "http://www.w3.org/2001/XMLSchema#positiveInteger"
364 | "http://www.w3.org/2001/XMLSchema#negativeInteger"
365 | "http://www.w3.org/2001/XMLSchema#nonPositiveInteger"
366 | "http://www.w3.org/2001/XMLSchema#nonNegativeInteger" => parse_integer_str(value),
367 "http://www.w3.org/2001/XMLSchema#decimal" => parse_decimal_str(value),
368 "http://www.w3.org/2001/XMLSchema#dateTime"
369 | "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => parse_date_time_str(value),
370 "http://www.w3.org/2001/XMLSchema#time" => parse_time_str(value),
371 "http://www.w3.org/2001/XMLSchema#date" => parse_date_str(value),
372 "http://www.w3.org/2001/XMLSchema#gYearMonth" => parse_g_year_month_str(value),
373 "http://www.w3.org/2001/XMLSchema#gYear" => parse_g_year_str(value),
374 "http://www.w3.org/2001/XMLSchema#gMonthDay" => parse_g_month_day_str(value),
375 "http://www.w3.org/2001/XMLSchema#gDay" => parse_g_day_str(value),
376 "http://www.w3.org/2001/XMLSchema#gMonth" => parse_g_month_str(value),
377 "http://www.w3.org/2001/XMLSchema#duration" => parse_duration_str(value),
378 "http://www.w3.org/2001/XMLSchema#yearMonthDuration" => {
379 parse_year_month_duration_str(value)
380 }
381 "http://www.w3.org/2001/XMLSchema#dayTimeDuration" => {
382 parse_day_time_duration_str(value)
383 }
384 _ => None,
385 };
386 match native_encoding {
387 Some(term) => term,
388 None => {
389 if let Ok(value) = SmallString::try_from(value) {
390 Self::SmallTypedLiteral {
391 value,
392 datatype_id: StrHash::new(datatype),
393 }
394 } else {
395 Self::BigTypedLiteral {
396 value_id: StrHash::new(value),
397 datatype_id: StrHash::new(datatype),
398 }
399 }
400 }
401 }
402 }
403}
404
405impl From<NamedOrBlankNodeRef<'_>> for EncodedTerm {
406 fn from(term: NamedOrBlankNodeRef<'_>) -> Self {
407 match term {
408 NamedOrBlankNodeRef::NamedNode(named_node) => named_node.into(),
409 NamedOrBlankNodeRef::BlankNode(blank_node) => blank_node.into(),
410 }
411 }
412}
413
414impl From<SubjectRef<'_>> for EncodedTerm {
415 fn from(term: SubjectRef<'_>) -> Self {
416 match term {
417 SubjectRef::NamedNode(named_node) => named_node.into(),
418 SubjectRef::BlankNode(blank_node) => blank_node.into(),
419 SubjectRef::Triple(triple) => triple.as_ref().into(),
420 }
421 }
422}
423
424impl From<TermRef<'_>> for EncodedTerm {
425 fn from(term: TermRef<'_>) -> Self {
426 match term {
427 TermRef::NamedNode(named_node) => named_node.into(),
428 TermRef::BlankNode(blank_node) => blank_node.into(),
429 TermRef::Literal(literal) => literal.into(),
430 TermRef::Triple(triple) => triple.as_ref().into(),
431 }
432 }
433}
434
435impl From<GraphNameRef<'_>> for EncodedTerm {
436 fn from(name: GraphNameRef<'_>) -> Self {
437 match name {
438 GraphNameRef::NamedNode(named_node) => named_node.into(),
439 GraphNameRef::BlankNode(blank_node) => blank_node.into(),
440 GraphNameRef::DefaultGraph => Self::DefaultGraph,
441 }
442 }
443}
444
445impl From<TripleRef<'_>> for EncodedTerm {
446 fn from(triple: TripleRef<'_>) -> Self {
447 Self::Triple(Arc::new(triple.into()))
448 }
449}
450
451#[derive(Eq, PartialEq, Debug, Clone, Hash)]
452pub struct EncodedTriple {
453 pub subject: EncodedTerm,
454 pub predicate: EncodedTerm,
455 pub object: EncodedTerm,
456}
457
458impl EncodedTriple {
459 pub fn new(subject: EncodedTerm, predicate: EncodedTerm, object: EncodedTerm) -> Self {
460 Self {
461 subject,
462 predicate,
463 object,
464 }
465 }
466}
467
468impl From<TripleRef<'_>> for EncodedTriple {
469 fn from(triple: TripleRef<'_>) -> Self {
470 Self {
471 subject: triple.subject.into(),
472 predicate: triple.predicate.into(),
473 object: triple.object.into(),
474 }
475 }
476}
477
478#[derive(Eq, PartialEq, Debug, Clone, Hash)]
479pub struct EncodedQuad {
480 pub subject: EncodedTerm,
481 pub predicate: EncodedTerm,
482 pub object: EncodedTerm,
483 pub graph_name: EncodedTerm,
484}
485
486impl EncodedQuad {
487 pub fn new(
488 subject: EncodedTerm,
489 predicate: EncodedTerm,
490 object: EncodedTerm,
491 graph_name: EncodedTerm,
492 ) -> Self {
493 Self {
494 subject,
495 predicate,
496 object,
497 graph_name,
498 }
499 }
500}
501
502impl From<QuadRef<'_>> for EncodedQuad {
503 fn from(quad: QuadRef<'_>) -> Self {
504 Self {
505 subject: quad.subject.into(),
506 predicate: quad.predicate.into(),
507 object: quad.object.into(),
508 graph_name: quad.graph_name.into(),
509 }
510 }
511}
512
513pub trait StrLookup {
514 fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError>;
515}
516
517pub fn insert_term<F: FnMut(&StrHash, &str) -> Result<(), StorageError>>(
518 term: TermRef<'_>,
519 encoded: &EncodedTerm,
520 insert_str: &mut F,
521) -> Result<(), StorageError> {
522 match term {
523 TermRef::NamedNode(node) => {
524 if let EncodedTerm::NamedNode { iri_id } = encoded {
525 insert_str(iri_id, node.as_str())
526 } else {
527 Err(CorruptionError::from_encoded_term(encoded, &term).into())
528 }
529 }
530 TermRef::BlankNode(node) => match encoded {
531 EncodedTerm::BigBlankNode { id_id } => insert_str(id_id, node.as_str()),
532 EncodedTerm::SmallBlankNode(..) | EncodedTerm::NumericalBlankNode { .. } => Ok(()),
533 _ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
534 },
535 TermRef::Literal(literal) => match encoded {
536 EncodedTerm::BigStringLiteral { value_id }
537 | EncodedTerm::BigSmallLangStringLiteral { value_id, .. } => {
538 insert_str(value_id, literal.value())
539 }
540 EncodedTerm::SmallBigLangStringLiteral { language_id, .. } => {
541 if let Some(language) = literal.language() {
542 insert_str(language_id, language)
543 } else {
544 Err(CorruptionError::from_encoded_term(encoded, &term).into())
545 }
546 }
547 EncodedTerm::BigBigLangStringLiteral {
548 value_id,
549 language_id,
550 } => {
551 insert_str(value_id, literal.value())?;
552 if let Some(language) = literal.language() {
553 insert_str(language_id, language)
554 } else {
555 Err(CorruptionError::from_encoded_term(encoded, &term).into())
556 }
557 }
558 EncodedTerm::SmallTypedLiteral { datatype_id, .. } => {
559 insert_str(datatype_id, literal.datatype().as_str())
560 }
561 EncodedTerm::BigTypedLiteral {
562 value_id,
563 datatype_id,
564 } => {
565 insert_str(value_id, literal.value())?;
566 insert_str(datatype_id, literal.datatype().as_str())
567 }
568 EncodedTerm::SmallStringLiteral(..)
569 | EncodedTerm::SmallSmallLangStringLiteral { .. }
570 | EncodedTerm::BooleanLiteral(..)
571 | EncodedTerm::FloatLiteral(..)
572 | EncodedTerm::DoubleLiteral(..)
573 | EncodedTerm::IntegerLiteral(..)
574 | EncodedTerm::DecimalLiteral(..)
575 | EncodedTerm::DateTimeLiteral(..)
576 | EncodedTerm::TimeLiteral(..)
577 | EncodedTerm::DateLiteral(..)
578 | EncodedTerm::GYearMonthLiteral(..)
579 | EncodedTerm::GYearLiteral(..)
580 | EncodedTerm::GMonthDayLiteral(..)
581 | EncodedTerm::GDayLiteral(..)
582 | EncodedTerm::GMonthLiteral(..)
583 | EncodedTerm::DurationLiteral(..)
584 | EncodedTerm::YearMonthDurationLiteral(..)
585 | EncodedTerm::DayTimeDurationLiteral(..) => Ok(()),
586 _ => Err(CorruptionError::from_encoded_term(encoded, &term).into()),
587 },
588 TermRef::Triple(triple) => {
589 if let EncodedTerm::Triple(encoded) = encoded {
590 insert_term(triple.subject.as_ref().into(), &encoded.subject, insert_str)?;
591 insert_term(
592 triple.predicate.as_ref().into(),
593 &encoded.predicate,
594 insert_str,
595 )?;
596 insert_term(triple.object.as_ref(), &encoded.object, insert_str)
597 } else {
598 Err(CorruptionError::from_encoded_term(encoded, &term).into())
599 }
600 }
601 }
602}
603
604pub fn parse_boolean_str(value: &str) -> Option<EncodedTerm> {
605 value.parse().map(EncodedTerm::BooleanLiteral).ok()
606}
607
608pub fn parse_float_str(value: &str) -> Option<EncodedTerm> {
609 value.parse().map(EncodedTerm::FloatLiteral).ok()
610}
611
612pub fn parse_double_str(value: &str) -> Option<EncodedTerm> {
613 value.parse().map(EncodedTerm::DoubleLiteral).ok()
614}
615
616pub fn parse_integer_str(value: &str) -> Option<EncodedTerm> {
617 value.parse().map(EncodedTerm::IntegerLiteral).ok()
618}
619
620pub fn parse_decimal_str(value: &str) -> Option<EncodedTerm> {
621 value.parse().map(EncodedTerm::DecimalLiteral).ok()
622}
623
624pub fn parse_date_time_str(value: &str) -> Option<EncodedTerm> {
625 value.parse().map(EncodedTerm::DateTimeLiteral).ok()
626}
627
628pub fn parse_time_str(value: &str) -> Option<EncodedTerm> {
629 value.parse().map(EncodedTerm::TimeLiteral).ok()
630}
631
632pub fn parse_date_str(value: &str) -> Option<EncodedTerm> {
633 value.parse().map(EncodedTerm::DateLiteral).ok()
634}
635
636pub fn parse_g_year_month_str(value: &str) -> Option<EncodedTerm> {
637 value.parse().map(EncodedTerm::GYearMonthLiteral).ok()
638}
639
640pub fn parse_g_year_str(value: &str) -> Option<EncodedTerm> {
641 value.parse().map(EncodedTerm::GYearLiteral).ok()
642}
643
644pub fn parse_g_month_day_str(value: &str) -> Option<EncodedTerm> {
645 value.parse().map(EncodedTerm::GMonthDayLiteral).ok()
646}
647
648pub fn parse_g_day_str(value: &str) -> Option<EncodedTerm> {
649 value.parse().map(EncodedTerm::GDayLiteral).ok()
650}
651
652pub fn parse_g_month_str(value: &str) -> Option<EncodedTerm> {
653 value.parse().map(EncodedTerm::GMonthLiteral).ok()
654}
655
656pub fn parse_duration_str(value: &str) -> Option<EncodedTerm> {
657 value.parse().map(EncodedTerm::DurationLiteral).ok()
658}
659
660pub fn parse_year_month_duration_str(value: &str) -> Option<EncodedTerm> {
661 value
662 .parse()
663 .map(EncodedTerm::YearMonthDurationLiteral)
664 .ok()
665}
666
667pub fn parse_day_time_duration_str(value: &str) -> Option<EncodedTerm> {
668 value.parse().map(EncodedTerm::DayTimeDurationLiteral).ok()
669}
670
671pub trait Decoder: StrLookup {
672 fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError>;
673
674 fn decode_subject(&self, encoded: &EncodedTerm) -> Result<Subject, StorageError> {
675 match self.decode_term(encoded)? {
676 Term::NamedNode(named_node) => Ok(named_node.into()),
677 Term::BlankNode(blank_node) => Ok(blank_node.into()),
678 Term::Literal(_) => Err(CorruptionError::msg(
679 "A literal has been found instead of a subject node",
680 )
681 .into()),
682 Term::Triple(triple) => Ok(Subject::Triple(triple)),
683 }
684 }
685
686 fn decode_named_or_blank_node(
687 &self,
688 encoded: &EncodedTerm,
689 ) -> Result<NamedOrBlankNode, StorageError> {
690 match self.decode_term(encoded)? {
691 Term::NamedNode(named_node) => Ok(named_node.into()),
692 Term::BlankNode(blank_node) => Ok(blank_node.into()),
693 Term::Literal(_) => Err(CorruptionError::msg(
694 "A literal has been found instead of a named or blank node",
695 )
696 .into()),
697 Term::Triple(_) => Err(CorruptionError::msg(
698 "A triple has been found instead of a named or blank node",
699 )
700 .into()),
701 }
702 }
703
704 fn decode_named_node(&self, encoded: &EncodedTerm) -> Result<NamedNode, StorageError> {
705 match self.decode_term(encoded)? {
706 Term::NamedNode(named_node) => Ok(named_node),
707 Term::BlankNode(_) => Err(CorruptionError::msg(
708 "A blank node has been found instead of a named node",
709 )
710 .into()),
711 Term::Literal(_) => {
712 Err(CorruptionError::msg("A literal has been found instead of a named node").into())
713 }
714 Term::Triple(_) => {
715 Err(CorruptionError::msg("A triple has been found instead of a named node").into())
716 }
717 }
718 }
719
720 fn decode_triple(&self, encoded: &EncodedTriple) -> Result<Triple, StorageError> {
721 Ok(Triple::new(
722 self.decode_subject(&encoded.subject)?,
723 self.decode_named_node(&encoded.predicate)?,
724 self.decode_term(&encoded.object)?,
725 ))
726 }
727
728 fn decode_quad(&self, encoded: &EncodedQuad) -> Result<Quad, StorageError> {
729 Ok(Quad::new(
730 self.decode_subject(&encoded.subject)?,
731 self.decode_named_node(&encoded.predicate)?,
732 self.decode_term(&encoded.object)?,
733 if encoded.graph_name == EncodedTerm::DefaultGraph {
734 GraphName::DefaultGraph
735 } else {
736 match self.decode_term(&encoded.graph_name)? {
737 Term::NamedNode(named_node) => named_node.into(),
738 Term::BlankNode(blank_node) => blank_node.into(),
739 Term::Literal(_) => {
740 return Err(
741 CorruptionError::msg("A literal is not a valid graph name").into()
742 )
743 }
744 Term::Triple(_) => {
745 return Err(
746 CorruptionError::msg("A triple is not a valid graph name").into()
747 )
748 }
749 }
750 },
751 ))
752 }
753}
754
755impl<S: StrLookup> Decoder for S {
756 fn decode_term(&self, encoded: &EncodedTerm) -> Result<Term, StorageError> {
757 match encoded {
758 EncodedTerm::DefaultGraph => {
759 Err(CorruptionError::msg("The default graph tag is not a valid term").into())
760 }
761 EncodedTerm::NamedNode { iri_id } => {
762 Ok(NamedNode::new_unchecked(get_required_str(self, iri_id)?).into())
763 }
764 EncodedTerm::NumericalBlankNode { id } => {
765 Ok(BlankNode::new_from_unique_id(u128::from_be_bytes(*id)).into())
766 }
767 EncodedTerm::SmallBlankNode(id) => Ok(BlankNode::new_unchecked(id.as_str()).into()),
768 EncodedTerm::BigBlankNode { id_id } => {
769 Ok(BlankNode::new_unchecked(get_required_str(self, id_id)?).into())
770 }
771 EncodedTerm::SmallStringLiteral(value) => {
772 Ok(Literal::new_simple_literal(*value).into())
773 }
774 EncodedTerm::BigStringLiteral { value_id } => {
775 Ok(Literal::new_simple_literal(get_required_str(self, value_id)?).into())
776 }
777 EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
778 Ok(Literal::new_language_tagged_literal_unchecked(*value, *language).into())
779 }
780 EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
781 Ok(Literal::new_language_tagged_literal_unchecked(
782 *value,
783 get_required_str(self, language_id)?,
784 )
785 .into())
786 }
787 EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
788 Ok(Literal::new_language_tagged_literal_unchecked(
789 get_required_str(self, value_id)?,
790 *language,
791 )
792 .into())
793 }
794 EncodedTerm::BigBigLangStringLiteral {
795 value_id,
796 language_id,
797 } => Ok(Literal::new_language_tagged_literal_unchecked(
798 get_required_str(self, value_id)?,
799 get_required_str(self, language_id)?,
800 )
801 .into()),
802 EncodedTerm::SmallTypedLiteral { value, datatype_id } => {
803 Ok(Literal::new_typed_literal(
804 *value,
805 NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
806 )
807 .into())
808 }
809 EncodedTerm::BigTypedLiteral {
810 value_id,
811 datatype_id,
812 } => Ok(Literal::new_typed_literal(
813 get_required_str(self, value_id)?,
814 NamedNode::new_unchecked(get_required_str(self, datatype_id)?),
815 )
816 .into()),
817 EncodedTerm::BooleanLiteral(value) => Ok(Literal::from(*value).into()),
818 EncodedTerm::FloatLiteral(value) => Ok(Literal::from(*value).into()),
819 EncodedTerm::DoubleLiteral(value) => Ok(Literal::from(*value).into()),
820 EncodedTerm::IntegerLiteral(value) => Ok(Literal::from(*value).into()),
821 EncodedTerm::DecimalLiteral(value) => Ok(Literal::from(*value).into()),
822 EncodedTerm::DateTimeLiteral(value) => Ok(Literal::from(*value).into()),
823 EncodedTerm::DateLiteral(value) => Ok(Literal::from(*value).into()),
824 EncodedTerm::TimeLiteral(value) => Ok(Literal::from(*value).into()),
825 EncodedTerm::GYearMonthLiteral(value) => Ok(Literal::from(*value).into()),
826 EncodedTerm::GYearLiteral(value) => Ok(Literal::from(*value).into()),
827 EncodedTerm::GMonthDayLiteral(value) => Ok(Literal::from(*value).into()),
828 EncodedTerm::GDayLiteral(value) => Ok(Literal::from(*value).into()),
829 EncodedTerm::GMonthLiteral(value) => Ok(Literal::from(*value).into()),
830 EncodedTerm::DurationLiteral(value) => Ok(Literal::from(*value).into()),
831 EncodedTerm::YearMonthDurationLiteral(value) => Ok(Literal::from(*value).into()),
832 EncodedTerm::DayTimeDurationLiteral(value) => Ok(Literal::from(*value).into()),
833 EncodedTerm::Triple(triple) => Ok(self.decode_triple(triple)?.into()),
834 }
835 }
836}
837
838fn get_required_str<L: StrLookup>(lookup: &L, id: &StrHash) -> Result<String, StorageError> {
839 Ok(lookup.get_str(id)?.ok_or_else(|| {
840 CorruptionError::new(format!(
841 "Not able to find the string with id {id:?} in the string store"
842 ))
843 })?)
844}
845
846#[derive(Default)]
847pub struct StrHashHasher {
848 value: u64,
849}
850
851impl Hasher for StrHashHasher {
852 #[inline]
853 fn finish(&self) -> u64 {
854 self.value
855 }
856
857 fn write(&mut self, _: &[u8]) {
858 unreachable!("Must only be used on StrHash")
859 }
860
861 #[inline]
862 #[allow(clippy::cast_possible_truncation)]
863 fn write_u128(&mut self, i: u128) {
864 self.value = i as u64;
865 }
866}
867
868#[cfg(test)]
869mod tests {
870 use super::*;
871 #[cfg(target_pointer_width = "64")]
872 use std::mem::{align_of, size_of};
873
874 #[test]
875 fn str_hash_stability() {
876 const EMPTY_HASH: [u8; 16] = [
877 244, 242, 206, 212, 71, 171, 2, 66, 125, 224, 163, 128, 71, 215, 73, 80,
878 ];
879
880 const FOO_HASH: [u8; 16] = [
881 177, 216, 59, 176, 7, 47, 87, 243, 76, 253, 150, 32, 126, 153, 216, 19,
882 ];
883
884 assert_eq!(StrHash::new("").to_be_bytes(), EMPTY_HASH);
885 assert_eq!(StrHash::from_be_bytes(EMPTY_HASH).to_be_bytes(), EMPTY_HASH);
886
887 assert_eq!(StrHash::new("foo").to_be_bytes(), FOO_HASH);
888 assert_eq!(StrHash::from_be_bytes(FOO_HASH).to_be_bytes(), FOO_HASH);
889 }
890
891 #[cfg(target_pointer_width = "64")]
892 #[test]
893 fn test_size_and_alignment() {
894 assert_eq!(size_of::<EncodedTerm>(), 40);
895 assert_eq!(size_of::<EncodedQuad>(), 160);
896 assert_eq!(align_of::<EncodedTerm>(), 8);
897 }
898}