1use crate::sparql::QueryDataset;
2use crate::storage::numeric_encoder::{
3 insert_term, Decoder, EncodedTerm, EncodedTriple, StrHash, StrHashHasher, StrLookup,
4};
5use crate::storage::{CorruptionError, StorageError, StorageReader};
6use oxrdf::Term;
7use oxsdatatypes::Boolean;
8use spareval::{ExpressionTerm, ExpressionTriple, InternalQuad, QueryableDataset};
9use std::cell::RefCell;
10use std::collections::hash_map::Entry;
11use std::collections::HashMap;
12use std::hash::BuildHasherDefault;
13use std::iter::empty;
14use std::sync::Arc;
15
16pub struct DatasetView {
17 reader: StorageReader,
18 extra: RefCell<HashMap<StrHash, String, BuildHasherDefault<StrHashHasher>>>,
19 dataset: EncodedDatasetSpec,
20}
21
22impl DatasetView {
23 pub fn new(reader: StorageReader, dataset: &QueryDataset) -> Self {
24 let dataset = EncodedDatasetSpec {
25 default: dataset
26 .default_graph_graphs()
27 .map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
28 named: dataset
29 .available_named_graphs()
30 .map(|graphs| graphs.iter().map(|g| g.as_ref().into()).collect::<Vec<_>>()),
31 };
32 Self {
33 reader,
34 extra: RefCell::new(HashMap::default()),
35 dataset,
36 }
37 }
38
39 pub fn insert_str(&self, key: &StrHash, value: &str) {
40 if let Entry::Vacant(e) = self.extra.borrow_mut().entry(*key) {
41 if !matches!(self.reader.contains_str(key), Ok(true)) {
42 e.insert(value.to_owned());
43 }
44 }
45 }
46}
47
48impl QueryableDataset for DatasetView {
49 type InternalTerm = EncodedTerm;
50 type Error = StorageError;
51
52 fn internal_quads_for_pattern(
53 &self,
54 subject: Option<&EncodedTerm>,
55 predicate: Option<&EncodedTerm>,
56 object: Option<&EncodedTerm>,
57 graph_name: Option<Option<&EncodedTerm>>,
58 ) -> Box<dyn Iterator<Item = Result<InternalQuad<Self>, StorageError>>> {
59 if let Some(graph_name) = graph_name {
60 if let Some(graph_name) = graph_name {
61 if self
62 .dataset
63 .named
64 .as_ref()
65 .map_or(true, |d| d.contains(graph_name))
66 {
67 Box::new(
68 self.reader
69 .quads_for_pattern(subject, predicate, object, Some(graph_name))
70 .map(|quad| {
71 let quad = quad?;
72 Ok(InternalQuad {
73 subject: quad.subject,
74 predicate: quad.predicate,
75 object: quad.object,
76 graph_name: if quad.graph_name.is_default_graph() {
77 None
78 } else {
79 Some(quad.graph_name)
80 },
81 })
82 }),
83 )
84 } else {
85 Box::new(empty())
86 }
87 } else if let Some(default_graph_graphs) = &self.dataset.default {
88 if default_graph_graphs.len() == 1 {
89 Box::new(
91 self.reader
92 .quads_for_pattern(
93 subject,
94 predicate,
95 object,
96 Some(&default_graph_graphs[0]),
97 )
98 .map(|quad| {
99 let quad = quad?;
100 Ok(InternalQuad {
101 subject: quad.subject,
102 predicate: quad.predicate,
103 object: quad.object,
104 graph_name: None,
105 })
106 }),
107 )
108 } else {
109 let iters = default_graph_graphs
110 .iter()
111 .map(|graph_name| {
112 self.reader.quads_for_pattern(
113 subject,
114 predicate,
115 object,
116 Some(graph_name),
117 )
118 })
119 .collect::<Vec<_>>();
120 Box::new(iters.into_iter().flatten().map(|quad| {
121 let quad = quad?;
122 Ok(InternalQuad {
123 subject: quad.subject,
124 predicate: quad.predicate,
125 object: quad.object,
126 graph_name: None,
127 })
128 }))
129 }
130 } else {
131 Box::new(
132 self.reader
133 .quads_for_pattern(subject, predicate, object, None)
134 .map(|quad| {
135 let quad = quad?;
136 Ok(InternalQuad {
137 subject: quad.subject,
138 predicate: quad.predicate,
139 object: quad.object,
140 graph_name: None,
141 })
142 }),
143 )
144 }
145 } else if let Some(named_graphs) = &self.dataset.named {
146 let iters = named_graphs
147 .iter()
148 .map(|graph_name| {
149 self.reader
150 .quads_for_pattern(subject, predicate, object, Some(graph_name))
151 })
152 .collect::<Vec<_>>();
153 Box::new(iters.into_iter().flatten().map(|quad| {
154 let quad = quad?;
155 Ok(InternalQuad {
156 subject: quad.subject,
157 predicate: quad.predicate,
158 object: quad.object,
159 graph_name: if quad.graph_name.is_default_graph() {
160 None
161 } else {
162 Some(quad.graph_name)
163 },
164 })
165 }))
166 } else {
167 Box::new(
168 self.reader
169 .quads_for_pattern(subject, predicate, object, None)
170 .filter_map(|quad| {
171 let quad = match quad {
172 Ok(quad) => quad,
173 Err(e) => return Some(Err(e)),
174 };
175 Some(Ok(InternalQuad {
176 subject: quad.subject,
177 predicate: quad.predicate,
178 object: quad.object,
179 graph_name: if quad.graph_name.is_default_graph() {
180 return None;
181 } else {
182 Some(quad.graph_name)
183 },
184 }))
185 }),
186 )
187 }
188 }
189
190 fn internal_named_graphs(&self) -> Box<dyn Iterator<Item = Result<EncodedTerm, StorageError>>> {
191 Box::new(self.reader.named_graphs())
192 }
193
194 fn contains_internal_graph_name(&self, graph_name: &EncodedTerm) -> Result<bool, StorageError> {
195 self.reader.contains_named_graph(graph_name)
196 }
197
198 fn internalize_term(&self, term: Term) -> Result<EncodedTerm, StorageError> {
199 let encoded = term.as_ref().into();
200 insert_term(term.as_ref(), &encoded, &mut |key, value| {
201 self.insert_str(key, value);
202 Ok(())
203 })?;
204 Ok(encoded)
205 }
206
207 fn externalize_term(&self, term: EncodedTerm) -> Result<Term, StorageError> {
208 self.decode_term(&term)
209 }
210
211 fn externalize_expression_term(
212 &self,
213 term: EncodedTerm,
214 ) -> Result<ExpressionTerm, StorageError> {
215 Ok(match term {
216 EncodedTerm::DefaultGraph => {
217 return Err(CorruptionError::new("Unexpected default graph").into())
218 }
219 EncodedTerm::BooleanLiteral(value) => ExpressionTerm::BooleanLiteral(value),
220 EncodedTerm::FloatLiteral(value) => ExpressionTerm::FloatLiteral(value),
221 EncodedTerm::DoubleLiteral(value) => ExpressionTerm::DoubleLiteral(value),
222 EncodedTerm::IntegerLiteral(value) => ExpressionTerm::IntegerLiteral(value),
223 EncodedTerm::DecimalLiteral(value) => ExpressionTerm::DecimalLiteral(value),
224 EncodedTerm::DateTimeLiteral(value) => ExpressionTerm::DateTimeLiteral(value),
225 EncodedTerm::TimeLiteral(value) => ExpressionTerm::TimeLiteral(value),
226 EncodedTerm::DateLiteral(value) => ExpressionTerm::DateLiteral(value),
227 EncodedTerm::GYearMonthLiteral(value) => ExpressionTerm::GYearMonthLiteral(value),
228 EncodedTerm::GYearLiteral(value) => ExpressionTerm::GYearLiteral(value),
229 EncodedTerm::GMonthDayLiteral(value) => ExpressionTerm::GMonthDayLiteral(value),
230 EncodedTerm::GDayLiteral(value) => ExpressionTerm::GDayLiteral(value),
231 EncodedTerm::GMonthLiteral(value) => ExpressionTerm::GMonthLiteral(value),
232 EncodedTerm::DurationLiteral(value) => ExpressionTerm::DurationLiteral(value),
233 EncodedTerm::YearMonthDurationLiteral(value) => {
234 ExpressionTerm::YearMonthDurationLiteral(value)
235 }
236 EncodedTerm::DayTimeDurationLiteral(value) => {
237 ExpressionTerm::DayTimeDurationLiteral(value)
238 }
239 EncodedTerm::Triple(t) => ExpressionTriple::new(
240 self.externalize_expression_term(t.subject.clone())?,
241 self.externalize_expression_term(t.predicate.clone())?,
242 self.externalize_expression_term(t.object.clone())?,
243 )
244 .ok_or_else(|| CorruptionError::msg("Invalid RDF-star triple term in the storage"))?
245 .into(),
246 _ => self.decode_term(&term)?.into(), })
248 }
249
250 fn internalize_expression_term(
251 &self,
252 term: ExpressionTerm,
253 ) -> Result<EncodedTerm, StorageError> {
254 Ok(match term {
255 ExpressionTerm::BooleanLiteral(value) => EncodedTerm::BooleanLiteral(value),
256 ExpressionTerm::FloatLiteral(value) => EncodedTerm::FloatLiteral(value),
257 ExpressionTerm::DoubleLiteral(value) => EncodedTerm::DoubleLiteral(value),
258 ExpressionTerm::IntegerLiteral(value) => EncodedTerm::IntegerLiteral(value),
259 ExpressionTerm::DecimalLiteral(value) => EncodedTerm::DecimalLiteral(value),
260 ExpressionTerm::DateTimeLiteral(value) => EncodedTerm::DateTimeLiteral(value),
261 ExpressionTerm::TimeLiteral(value) => EncodedTerm::TimeLiteral(value),
262 ExpressionTerm::DateLiteral(value) => EncodedTerm::DateLiteral(value),
263 ExpressionTerm::GYearMonthLiteral(value) => EncodedTerm::GYearMonthLiteral(value),
264 ExpressionTerm::GYearLiteral(value) => EncodedTerm::GYearLiteral(value),
265 ExpressionTerm::GMonthDayLiteral(value) => EncodedTerm::GMonthDayLiteral(value),
266 ExpressionTerm::GDayLiteral(value) => EncodedTerm::GDayLiteral(value),
267 ExpressionTerm::GMonthLiteral(value) => EncodedTerm::GMonthLiteral(value),
268 ExpressionTerm::DurationLiteral(value) => EncodedTerm::DurationLiteral(value),
269 ExpressionTerm::YearMonthDurationLiteral(value) => {
270 EncodedTerm::YearMonthDurationLiteral(value)
271 }
272 ExpressionTerm::DayTimeDurationLiteral(value) => {
273 EncodedTerm::DayTimeDurationLiteral(value)
274 }
275 ExpressionTerm::Triple(t) => EncodedTerm::Triple(Arc::new(EncodedTriple {
276 subject: self.internalize_expression_term(t.subject.into())?,
277 predicate: self.internalize_expression_term(t.predicate.into())?,
278 object: self.internalize_expression_term(t.object)?,
279 })),
280 _ => self.internalize_term(term.into())?, })
282 }
283
284 fn internal_term_effective_boolean_value(
285 &self,
286 term: EncodedTerm,
287 ) -> Result<Option<bool>, StorageError> {
288 Ok(match term {
289 EncodedTerm::BooleanLiteral(value) => Some(value.into()),
290 EncodedTerm::SmallStringLiteral(value) => Some(!value.is_empty()),
291 EncodedTerm::BigStringLiteral { .. } => {
292 Some(false) }
294 EncodedTerm::FloatLiteral(value) => Some(Boolean::from(value).into()),
295 EncodedTerm::DoubleLiteral(value) => Some(Boolean::from(value).into()),
296 EncodedTerm::IntegerLiteral(value) => Some(Boolean::from(value).into()),
297 EncodedTerm::DecimalLiteral(value) => Some(Boolean::from(value).into()),
298 _ => None,
299 })
300 }
301}
302
303impl StrLookup for DatasetView {
304 fn get_str(&self, key: &StrHash) -> Result<Option<String>, StorageError> {
305 Ok(if let Some(value) = self.extra.borrow().get(key) {
306 Some(value.clone())
307 } else {
308 self.reader.get_str(key)?
309 })
310 }
311}
312
313struct EncodedDatasetSpec {
314 default: Option<Vec<EncodedTerm>>,
315 named: Option<Vec<EncodedTerm>>,
316}