spareval/
lib.rs

1#![doc = include_str!("../README.md")]
2#![doc(test(attr(deny(warnings))))]
3#![cfg_attr(docsrs, feature(doc_auto_cfg))]
4#![doc(html_favicon_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
5#![doc(html_logo_url = "https://raw.githubusercontent.com/oxigraph/oxigraph/main/logo.svg")]
6
7mod dataset;
8mod error;
9mod eval;
10mod model;
11mod service;
12#[cfg(feature = "rdf-star")]
13pub use crate::dataset::ExpressionTriple;
14pub use crate::dataset::{ExpressionTerm, InternalQuad, QueryableDataset};
15pub use crate::error::QueryEvaluationError;
16use crate::eval::{EvalNodeWithStats, SimpleEvaluator, Timer};
17pub use crate::model::{QueryResults, QuerySolution, QuerySolutionIter, QueryTripleIter};
18use crate::service::ServiceHandlerRegistry;
19pub use crate::service::{DefaultServiceHandler, ServiceHandler};
20use json_event_parser::{JsonEvent, WriterJsonSerializer};
21use oxrdf::{NamedNode, Term, Variable};
22use oxsdatatypes::{DayTimeDuration, Float};
23use spargebra::Query;
24use sparopt::algebra::GraphPattern;
25use sparopt::Optimizer;
26use std::collections::HashMap;
27use std::rc::Rc;
28use std::sync::Arc;
29use std::{fmt, io};
30
31/// Evaluates a query against a given [RDF dataset](https://www.w3.org/TR/rdf11-concepts/#dfn-rdf-dataset)
32///
33/// Note that this evaluator does not handle the `FROM` and `FROM NAMED` part of the query.
34/// You must select the proper dataset before using this struct.
35///
36/// To adapt this software to work on your own RDF dataset, you need to implement the [`QueryableDataset`] trait.
37///
38/// ```
39/// use oxrdf::{Dataset, GraphName, NamedNode, Quad};
40/// use spareval::{QueryEvaluator, QueryResults};
41/// use spargebra::Query;
42///
43/// let ex = NamedNode::new("http://example.com")?;
44/// let dataset = Dataset::from_iter([Quad::new(
45///     ex.clone(),
46///     ex.clone(),
47///     ex.clone(),
48///     GraphName::DefaultGraph,
49/// )]);
50/// let query = Query::parse("SELECT * WHERE { ?s ?p ?o }", None)?;
51/// let results = QueryEvaluator::new().execute(dataset, &query);
52/// if let QueryResults::Solutions(solutions) = results? {
53///     let solutions = solutions.collect::<Result<Vec<_>, _>>()?;
54///     assert_eq!(solutions.len(), 1);
55///     assert_eq!(solutions[0]["s"], ex.into());
56/// }
57/// # Result::<_, Box<dyn std::error::Error>>::Ok(())
58/// ```
59#[derive(Clone, Default)]
60pub struct QueryEvaluator {
61    service_handler: ServiceHandlerRegistry,
62    custom_functions: CustomFunctionRegistry,
63    without_optimizations: bool,
64    run_stats: bool,
65}
66
67impl QueryEvaluator {
68    #[must_use]
69    #[inline]
70    pub fn new() -> Self {
71        Self::default()
72    }
73
74    pub fn execute(
75        &self,
76        dataset: impl QueryableDataset,
77        query: &Query,
78    ) -> Result<QueryResults, QueryEvaluationError> {
79        self.explain(dataset, query).0
80    }
81
82    /// Executes a SPARQL query while substituting some variables with the given values.
83    ///
84    /// Substitution follows [RDF-dev SEP-0007](https://github.com/w3c/sparql-dev/blob/main/SEP/SEP-0007/sep-0007.md).
85    ///
86    /// ```
87    /// use oxrdf::{Dataset, GraphName, NamedNode, Quad, Variable};
88    /// use spareval::{QueryEvaluator, QueryResults};
89    /// use spargebra::Query;
90    ///
91    /// let ex = NamedNode::new("http://example.com")?;
92    /// let dataset = Dataset::from_iter([Quad::new(
93    ///     ex.clone(),
94    ///     ex.clone(),
95    ///     ex.clone(),
96    ///     GraphName::DefaultGraph,
97    /// )]);
98    /// let query = Query::parse("SELECT * WHERE { ?s ?p ?o }", None)?;
99    /// let results = QueryEvaluator::new().execute_with_substituted_variables(
100    ///     dataset,
101    ///     &query,
102    ///     [(Variable::new("s")?, ex.clone().into())],
103    /// );
104    /// if let QueryResults::Solutions(solutions) = results? {
105    ///     let solutions = solutions.collect::<Result<Vec<_>, _>>()?;
106    ///     assert_eq!(solutions.len(), 1);
107    ///     assert_eq!(solutions[0]["s"], ex.into());
108    /// }
109    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
110    /// ```
111    pub fn execute_with_substituted_variables(
112        &self,
113        dataset: impl QueryableDataset,
114        query: &Query,
115        substitutions: impl IntoIterator<Item = (Variable, Term)>,
116    ) -> Result<QueryResults, QueryEvaluationError> {
117        self.explain_with_substituted_variables(dataset, query, substitutions)
118            .0
119    }
120
121    pub fn explain(
122        &self,
123        dataset: impl QueryableDataset,
124        query: &Query,
125    ) -> (Result<QueryResults, QueryEvaluationError>, QueryExplanation) {
126        self.explain_with_substituted_variables(dataset, query, [])
127    }
128
129    pub fn explain_with_substituted_variables(
130        &self,
131        dataset: impl QueryableDataset,
132        query: &Query,
133        substitutions: impl IntoIterator<Item = (Variable, Term)>,
134    ) -> (Result<QueryResults, QueryEvaluationError>, QueryExplanation) {
135        let start_planning = Timer::now();
136        let (results, plan_node_with_stats, planning_duration) = match query {
137            Query::Select {
138                pattern, base_iri, ..
139            } => {
140                let mut pattern = GraphPattern::from(pattern);
141                if !self.without_optimizations {
142                    pattern = Optimizer::optimize_graph_pattern(pattern);
143                }
144                let planning_duration = start_planning.elapsed();
145                let (results, explanation) = SimpleEvaluator::new(
146                    dataset,
147                    base_iri.clone().map(Rc::new),
148                    Rc::new(self.service_handler.clone()),
149                    Rc::new(self.custom_functions.clone()),
150                    self.run_stats,
151                )
152                .evaluate_select(&pattern, substitutions);
153                (
154                    results.map(QueryResults::Solutions),
155                    explanation,
156                    planning_duration,
157                )
158            }
159            Query::Ask {
160                pattern, base_iri, ..
161            } => {
162                let mut pattern = GraphPattern::from(pattern);
163                if !self.without_optimizations {
164                    pattern = Optimizer::optimize_graph_pattern(pattern);
165                }
166                let planning_duration = start_planning.elapsed();
167                let (results, explanation) = SimpleEvaluator::new(
168                    dataset,
169                    base_iri.clone().map(Rc::new),
170                    Rc::new(self.service_handler.clone()),
171                    Rc::new(self.custom_functions.clone()),
172                    self.run_stats,
173                )
174                .evaluate_ask(&pattern, substitutions);
175                (
176                    results.map(QueryResults::Boolean),
177                    explanation,
178                    planning_duration,
179                )
180            }
181            Query::Construct {
182                template,
183                pattern,
184                base_iri,
185                ..
186            } => {
187                let mut pattern = GraphPattern::from(pattern);
188                if !self.without_optimizations {
189                    pattern = Optimizer::optimize_graph_pattern(pattern);
190                }
191                let planning_duration = start_planning.elapsed();
192                let (results, explanation) = SimpleEvaluator::new(
193                    dataset,
194                    base_iri.clone().map(Rc::new),
195                    Rc::new(self.service_handler.clone()),
196                    Rc::new(self.custom_functions.clone()),
197                    self.run_stats,
198                )
199                .evaluate_construct(&pattern, template, substitutions);
200                (
201                    results.map(QueryResults::Graph),
202                    explanation,
203                    planning_duration,
204                )
205            }
206            Query::Describe {
207                pattern, base_iri, ..
208            } => {
209                let mut pattern = GraphPattern::from(pattern);
210                if !self.without_optimizations {
211                    pattern = Optimizer::optimize_graph_pattern(pattern);
212                }
213                let planning_duration = start_planning.elapsed();
214                let (results, explanation) = SimpleEvaluator::new(
215                    dataset,
216                    base_iri.clone().map(Rc::new),
217                    Rc::new(self.service_handler.clone()),
218                    Rc::new(self.custom_functions.clone()),
219                    self.run_stats,
220                )
221                .evaluate_describe(&pattern, substitutions);
222                (
223                    results.map(QueryResults::Graph),
224                    explanation,
225                    planning_duration,
226                )
227            }
228        };
229        let explanation = QueryExplanation {
230            inner: plan_node_with_stats,
231            with_stats: self.run_stats,
232            planning_duration,
233        };
234        (results, explanation)
235    }
236
237    /// Use a given [`ServiceHandler`] to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls.
238    ///
239    /// See [`ServiceHandler`] for an example.
240    #[inline]
241    #[must_use]
242    pub fn with_service_handler(
243        mut self,
244        service_name: impl Into<NamedNode>,
245        handler: impl ServiceHandler + 'static,
246    ) -> Self {
247        self.service_handler = self
248            .service_handler
249            .with_handler(service_name.into(), handler);
250        self
251    }
252
253    /// Use a given [`DefaultServiceHandler`] to execute [SPARQL 1.1 Federated Query](https://www.w3.org/TR/sparql11-federated-query/) SERVICE calls if no explicit service handler is defined for the service.
254    ///
255    /// See [`DefaultServiceHandler`] for an example.
256    #[inline]
257    #[must_use]
258    pub fn with_default_service_handler(
259        mut self,
260        handler: impl DefaultServiceHandler + 'static,
261    ) -> Self {
262        self.service_handler = self.service_handler.with_default_handler(handler);
263        self
264    }
265
266    #[inline]
267    #[must_use]
268    pub fn has_default_service_handler(&self) -> bool {
269        self.service_handler.has_default_handler()
270    }
271
272    /// Adds a custom SPARQL evaluation function.
273    ///
274    /// Example with a function serializing terms to N-Triples:
275    /// ```
276    /// use oxrdf::{Dataset, Literal, NamedNode};
277    /// use spareval::{QueryEvaluator, QueryResults};
278    /// use spargebra::Query;
279    ///
280    /// let evaluator = QueryEvaluator::new().with_custom_function(
281    ///     NamedNode::new("http://www.w3.org/ns/formats/N-Triples")?,
282    ///     |args| args.get(0).map(|t| Literal::from(t.to_string()).into()),
283    /// );
284    /// let query = Query::parse(
285    ///     "SELECT (<http://www.w3.org/ns/formats/N-Triples>(1) AS ?nt) WHERE {}",
286    ///     None,
287    /// )?;
288    /// if let QueryResults::Solutions(mut solutions) = evaluator.execute(Dataset::new(), &query)? {
289    ///     assert_eq!(
290    ///         solutions.next().unwrap()?.get("nt"),
291    ///         Some(&Literal::from("\"1\"^^<http://www.w3.org/2001/XMLSchema#integer>").into())
292    ///     );
293    /// }
294    /// # Result::<_, Box<dyn std::error::Error>>::Ok(())
295    /// ```
296    #[inline]
297    #[must_use]
298    pub fn with_custom_function(
299        mut self,
300        name: NamedNode,
301        evaluator: impl Fn(&[Term]) -> Option<Term> + Send + Sync + 'static,
302    ) -> Self {
303        self.custom_functions.insert(name, Arc::new(evaluator));
304        self
305    }
306
307    /// Disables query optimizations and runs the query as it is.
308    #[inline]
309    #[must_use]
310    pub fn without_optimizations(mut self) -> Self {
311        self.without_optimizations = true;
312        self
313    }
314
315    /// Compute statistics during evaluation and fills them in the explanation tree.
316    #[inline]
317    #[must_use]
318    pub fn compute_statistics(mut self) -> Self {
319        self.run_stats = true;
320        self
321    }
322}
323
324pub(crate) type CustomFunctionRegistry =
325    HashMap<NamedNode, Arc<dyn (Fn(&[Term]) -> Option<Term>) + Send + Sync>>;
326
327/// The explanation of a query.
328#[derive(Clone)]
329pub struct QueryExplanation {
330    inner: Rc<EvalNodeWithStats>,
331    with_stats: bool,
332    planning_duration: Option<DayTimeDuration>,
333}
334
335impl QueryExplanation {
336    /// Writes the explanation as JSON.
337    pub fn write_in_json(&self, writer: impl io::Write) -> io::Result<()> {
338        let mut serializer = WriterJsonSerializer::new(writer);
339        serializer.serialize_event(JsonEvent::StartObject)?;
340        if let Some(planning_duration) = self.planning_duration {
341            serializer
342                .serialize_event(JsonEvent::ObjectKey("planning duration in seconds".into()))?;
343            serializer.serialize_event(JsonEvent::Number(
344                planning_duration.as_seconds().to_string().into(),
345            ))?;
346        }
347        serializer.serialize_event(JsonEvent::ObjectKey("plan".into()))?;
348        self.inner.json_node(&mut serializer, self.with_stats)?;
349        serializer.serialize_event(JsonEvent::EndObject)
350    }
351}
352
353impl fmt::Debug for QueryExplanation {
354    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
355        let mut obj = f.debug_struct("QueryExplanation");
356        if let Some(planning_duration) = self.planning_duration {
357            obj.field(
358                "planning duration in seconds",
359                &f32::from(Float::from(planning_duration.as_seconds())),
360            );
361        }
362        obj.field("tree", &self.inner);
363        obj.finish_non_exhaustive()
364    }
365}