oxigraph/sparql/
dataset.rs

1use crate::sparql::QueryDataset;
2use crate::storage::numeric_encoder::{
3    insert_term, Decoder, EncodedTerm, EncodedTriple, StrHash, StrHashHasher, StrLookup,
4};
5use crate::storage::{CorruptionError, StorageError, StorageReader};
6use oxrdf::Term;
7use oxsdatatypes::Boolean;
8use spareval::{ExpressionTerm, ExpressionTriple, InternalQuad, QueryableDataset};
9use std::cell::RefCell;
10use std::collections::hash_map::Entry;
11use std::collections::HashMap;
12use std::hash::BuildHasherDefault;
13use std::iter::empty;
14use std::sync::Arc;
15
16pub struct DatasetView {
17    reader: StorageReader,
18    extra: RefCell<HashMap<StrHash, String, BuildHasherDefault<StrHashHasher>>>,
19    dataset: EncodedDatasetSpec,
20}
21
22impl DatasetView {
23    pub fn new(reader: StorageReader, dataset: &QueryDataset) -> Self {
24        let dataset = EncodedDatasetSpec {
25            default: dataset
26                .default_graph_graphs()
27                .map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
28            named: dataset
29                .available_named_graphs()
30                .map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
31        };
32        Self {
33            reader,
34            extra: RefCell::new(HashMap::default()),
35            dataset,
36        }
37    }
38
39    pub fn insert_str(&self, key: &StrHash, value: &str) {
40        if let Entry::Vacant(e) = self.extra.borrow_mut().entry(*key) {
41            if !matches!(self.reader.contains_str(key), Ok(true)) {
42                e.insert(value.to_owned());
43            }
44        }
45    }
46}
47
48impl QueryableDataset for DatasetView {
49    type InternalTerm = EncodedTerm;
50    type Error = StorageError;
51
52    fn internal_quads_for_pattern(
53        &self,
54        subject: Option<&EncodedTerm>,
55        predicate: Option<&EncodedTerm>,
56        object: Option<&EncodedTerm>,
57        graph_name: Option<Option<&EncodedTerm>>,
58    ) -> Box<dyn Iterator<Item = Result<InternalQuad<Self>, StorageError>>> {
59        if let Some(graph_name) = graph_name {
60            if let Some(graph_name) = graph_name {
61                if self
62                    .dataset
63                    .named
64                    .as_ref()
65                    .map_or(true, |d| d.contains(graph_name))
66                {
67                    Box::new(
68                        self.reader
69                            .quads_for_pattern(subject, predicate, object, Some(graph_name))
70                            .map(|quad| {
71                                let quad = quad?;
72                                Ok(InternalQuad {
73                                    subject: quad.subject,
74                                    predicate: quad.predicate,
75                                    object: quad.object,
76                                    graph_name: if quad.graph_name.is_default_graph() {
77                                        None
78                                    } else {
79                                        Some(quad.graph_name)
80                                    },
81                                })
82                            }),
83                    )
84                } else {
85                    Box::new(empty())
86                }
87            } else if let Some(default_graph_graphs) = &self.dataset.default {
88                if default_graph_graphs.len() == 1 {
89                    // Single graph optimization
90                    Box::new(
91                        self.reader
92                            .quads_for_pattern(
93                                subject,
94                                predicate,
95                                object,
96                                Some(&default_graph_graphs[0]),
97                            )
98                            .map(|quad| {
99                                let quad = quad?;
100                                Ok(InternalQuad {
101                                    subject: quad.subject,
102                                    predicate: quad.predicate,
103                                    object: quad.object,
104                                    graph_name: None,
105                                })
106                            }),
107                    )
108                } else {
109                    let iters = default_graph_graphs
110                        .iter()
111                        .map(|graph_name| {
112                            self.reader.quads_for_pattern(
113                                subject,
114                                predicate,
115                                object,
116                                Some(graph_name),
117                            )
118                        })
119                        .collect::<Vec<_>>();
120                    Box::new(iters.into_iter().flatten().map(|quad| {
121                        let quad = quad?;
122                        Ok(InternalQuad {
123                            subject: quad.subject,
124                            predicate: quad.predicate,
125                            object: quad.object,
126                            graph_name: None,
127                        })
128                    }))
129                }
130            } else {
131                Box::new(
132                    self.reader
133                        .quads_for_pattern(subject, predicate, object, None)
134                        .map(|quad| {
135                            let quad = quad?;
136                            Ok(InternalQuad {
137                                subject: quad.subject,
138                                predicate: quad.predicate,
139                                object: quad.object,
140                                graph_name: None,
141                            })
142                        }),
143                )
144            }
145        } else if let Some(named_graphs) = &self.dataset.named {
146            let iters = named_graphs
147                .iter()
148                .map(|graph_name| {
149                    self.reader
150                        .quads_for_pattern(subject, predicate, object, Some(graph_name))
151                })
152                .collect::<Vec<_>>();
153            Box::new(iters.into_iter().flatten().map(|quad| {
154                let quad = quad?;
155                Ok(InternalQuad {
156                    subject: quad.subject,
157                    predicate: quad.predicate,
158                    object: quad.object,
159                    graph_name: if quad.graph_name.is_default_graph() {
160                        None
161                    } else {
162                        Some(quad.graph_name)
163                    },
164                })
165            }))
166        } else {
167            Box::new(
168                self.reader
169                    .quads_for_pattern(subject, predicate, object, None)
170                    .filter_map(|quad| {
171                        let quad = match quad {
172                            Ok(quad) => quad,
173                            Err(e) => return Some(Err(e)),
174                        };
175                        Some(Ok(InternalQuad {
176                            subject: quad.subject,
177                            predicate: quad.predicate,
178                            object: quad.object,
179                            graph_name: if quad.graph_name.is_default_graph() {
180                                return None;
181                            } else {
182                                Some(quad.graph_name)
183                            },
184                        }))
185                    }),
186            )
187        }
188    }
189
190    fn internal_named_graphs(&self) -> Box<dyn Iterator<Item = Result<EncodedTerm, StorageError>>> {
191        Box::new(self.reader.named_graphs())
192    }
193
194    fn contains_internal_graph_name(&self, graph_name: &EncodedTerm) -> Result<bool, StorageError> {
195        self.reader.contains_named_graph(graph_name)
196    }
197
198    fn internalize_term(&self, term: Term) -> Result<EncodedTerm, StorageError> {
199        let encoded = term.as_ref().into();
200        insert_term(term.as_ref(), &encoded, &mut |key, value| {
201            self.insert_str(key, value);
202            Ok(())
203        })?;
204        Ok(encoded)
205    }
206
207    fn externalize_term(&self, term: EncodedTerm) -> Result<Term, StorageError> {
208        self.decode_term(&term)
209    }
210
211    fn externalize_expression_term(
212        &self,
213        term: EncodedTerm,
214    ) -> Result<ExpressionTerm, StorageError> {
215        Ok(match term {
216            EncodedTerm::DefaultGraph => {
217                return Err(CorruptionError::new("Unexpected default graph").into())
218            }
219            EncodedTerm::BooleanLiteral(value) => ExpressionTerm::BooleanLiteral(value),
220            EncodedTerm::FloatLiteral(value) => ExpressionTerm::FloatLiteral(value),
221            EncodedTerm::DoubleLiteral(value) => ExpressionTerm::DoubleLiteral(value),
222            EncodedTerm::IntegerLiteral(value) => ExpressionTerm::IntegerLiteral(value),
223            EncodedTerm::DecimalLiteral(value) => ExpressionTerm::DecimalLiteral(value),
224            EncodedTerm::DateTimeLiteral(value) => ExpressionTerm::DateTimeLiteral(value),
225            EncodedTerm::TimeLiteral(value) => ExpressionTerm::TimeLiteral(value),
226            EncodedTerm::DateLiteral(value) => ExpressionTerm::DateLiteral(value),
227            EncodedTerm::GYearMonthLiteral(value) => ExpressionTerm::GYearMonthLiteral(value),
228            EncodedTerm::GYearLiteral(value) => ExpressionTerm::GYearLiteral(value),
229            EncodedTerm::GMonthDayLiteral(value) => ExpressionTerm::GMonthDayLiteral(value),
230            EncodedTerm::GDayLiteral(value) => ExpressionTerm::GDayLiteral(value),
231            EncodedTerm::GMonthLiteral(value) => ExpressionTerm::GMonthLiteral(value),
232            EncodedTerm::DurationLiteral(value) => ExpressionTerm::DurationLiteral(value),
233            EncodedTerm::YearMonthDurationLiteral(value) => {
234                ExpressionTerm::YearMonthDurationLiteral(value)
235            }
236            EncodedTerm::DayTimeDurationLiteral(value) => {
237                ExpressionTerm::DayTimeDurationLiteral(value)
238            }
239            EncodedTerm::Triple(t) => ExpressionTriple::new(
240                self.externalize_expression_term(t.subject.clone())?,
241                self.externalize_expression_term(t.predicate.clone())?,
242                self.externalize_expression_term(t.object.clone())?,
243            )
244            .ok_or_else(|| CorruptionError::msg("Invalid RDF-star triple term in the storage"))?
245            .into(),
246            _ => self.decode_term(&term)?.into(), // No escape
247        })
248    }
249
250    fn internalize_expression_term(
251        &self,
252        term: ExpressionTerm,
253    ) -> Result<EncodedTerm, StorageError> {
254        Ok(match term {
255            ExpressionTerm::BooleanLiteral(value) => EncodedTerm::BooleanLiteral(value),
256            ExpressionTerm::FloatLiteral(value) => EncodedTerm::FloatLiteral(value),
257            ExpressionTerm::DoubleLiteral(value) => EncodedTerm::DoubleLiteral(value),
258            ExpressionTerm::IntegerLiteral(value) => EncodedTerm::IntegerLiteral(value),
259            ExpressionTerm::DecimalLiteral(value) => EncodedTerm::DecimalLiteral(value),
260            ExpressionTerm::DateTimeLiteral(value) => EncodedTerm::DateTimeLiteral(value),
261            ExpressionTerm::TimeLiteral(value) => EncodedTerm::TimeLiteral(value),
262            ExpressionTerm::DateLiteral(value) => EncodedTerm::DateLiteral(value),
263            ExpressionTerm::GYearMonthLiteral(value) => EncodedTerm::GYearMonthLiteral(value),
264            ExpressionTerm::GYearLiteral(value) => EncodedTerm::GYearLiteral(value),
265            ExpressionTerm::GMonthDayLiteral(value) => EncodedTerm::GMonthDayLiteral(value),
266            ExpressionTerm::GDayLiteral(value) => EncodedTerm::GDayLiteral(value),
267            ExpressionTerm::GMonthLiteral(value) => EncodedTerm::GMonthLiteral(value),
268            ExpressionTerm::DurationLiteral(value) => EncodedTerm::DurationLiteral(value),
269            ExpressionTerm::YearMonthDurationLiteral(value) => {
270                EncodedTerm::YearMonthDurationLiteral(value)
271            }
272            ExpressionTerm::DayTimeDurationLiteral(value) => {
273                EncodedTerm::DayTimeDurationLiteral(value)
274            }
275            ExpressionTerm::Triple(t) => EncodedTerm::Triple(Arc::new(EncodedTriple {
276                subject: self.internalize_expression_term(t.subject.into())?,
277                predicate: self.internalize_expression_term(t.predicate.into())?,
278                object: self.internalize_expression_term(t.object)?,
279            })),
280            _ => self.internalize_term(term.into())?, // No fast path
281        })
282    }
283
284    fn internal_term_effective_boolean_value(
285        &self,
286        term: EncodedTerm,
287    ) -> Result<Option<bool>, StorageError> {
288        Ok(match term {
289            EncodedTerm::BooleanLiteral(value) => Some(value.into()),
290            EncodedTerm::SmallStringLiteral(value) => Some(!value.is_empty()),
291            EncodedTerm::BigStringLiteral { .. } => {
292                Some(false) // A big literal can't be empty
293            }
294            EncodedTerm::FloatLiteral(value) => Some(Boolean::from(value).into()),
295            EncodedTerm::DoubleLiteral(value) => Some(Boolean::from(value).into()),
296            EncodedTerm::IntegerLiteral(value) => Some(Boolean::from(value).into()),
297            EncodedTerm::DecimalLiteral(value) => Some(Boolean::from(value).into()),
298            _ => None,
299        })
300    }
301}
302
303impl StrLookup for DatasetView {
304    fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
305        Ok(if let Some(value) = self.extra.borrow().get(key) {
306            Some(value.clone())
307        } else {
308            self.reader.get_str(key)?
309        })
310    }
311}
312
313struct EncodedDatasetSpec {
314    default: Option<Vec<EncodedTerm>>,
315    named: Option<Vec<EncodedTerm>>,
316}