shex_validation/
validator_runner.rs

1use crate::atom;
2use crate::validator_error::*;
3use crate::Reason;
4use crate::Reasons;
5use crate::ValidatorConfig;
6use either::Either;
7use indexmap::IndexSet;
8use iri_s::iri;
9use rbe::MatchTableIter;
10use shex_ast::ir::preds::Preds;
11use shex_ast::ir::schema_ir::SchemaIR;
12use shex_ast::ir::shape::Shape;
13use shex_ast::ir::shape_expr::ShapeExpr;
14use shex_ast::Node;
15use shex_ast::Pred;
16use shex_ast::ShapeLabelIdx;
17use srdf::Iri as _;
18use srdf::{Object, Query};
19use std::collections::hash_map::Entry;
20use std::collections::HashMap;
21use std::collections::HashSet;
22use tracing::debug;
23
24type Result<T> = std::result::Result<T, ValidatorError>;
25type Atom = atom::Atom<(Node, ShapeLabelIdx)>;
26type NegAtom = (Node, ShapeLabelIdx);
27type PosAtom = (Node, ShapeLabelIdx);
28// type Rule = rule::Rule<(Node, ShapeLabelIdx)>;
29type Neighs = (Vec<(Pred, Node)>, Vec<Pred>);
30
31#[derive(Debug, Clone)]
32pub struct Engine {
33    checked: IndexSet<Atom>,
34    processing: IndexSet<Atom>,
35    pending: IndexSet<Atom>,
36    //rules: Vec<Rule>,
37    alternative_match_iterators: Vec<MatchTableIter<Pred, Node, ShapeLabelIdx>>,
38    // alternatives: Vec<ResultMap<Node, ShapeLabelIdx>>,
39    config: ValidatorConfig,
40    step_counter: usize,
41    reasons: HashMap<PosAtom, Vec<Reason>>,
42    errors: HashMap<NegAtom, Vec<ValidatorError>>,
43}
44
45impl Engine {
46    pub fn new(config: &ValidatorConfig) -> Engine {
47        Engine {
48            checked: IndexSet::new(),
49            processing: IndexSet::new(),
50            pending: IndexSet::new(),
51            alternative_match_iterators: Vec::new(),
52            config: config.clone(),
53            step_counter: 0,
54            reasons: HashMap::new(),
55            errors: HashMap::new(),
56        }
57    }
58
59    pub fn reset(&mut self) {
60        let config = self.config.clone();
61        *self = Engine::new(&config);
62    }
63
64    pub(crate) fn add_processing(&mut self, atom: &Atom) {
65        self.processing.insert((*atom).clone());
66    }
67
68    pub(crate) fn remove_processing(&mut self, atom: &Atom) {
69        self.processing.swap_remove(atom);
70    }
71
72    pub(crate) fn validate_pending(&mut self, rdf: &impl Query, schema: &SchemaIR) -> Result<()> {
73        while let Some(atom) = self.pop_pending() {
74            match atom.clone() {
75                Atom::Pos((node, idx)) => {
76                    let mut hyp = Vec::new();
77                    match self.prove(&node, &idx, &mut hyp, schema, rdf)? {
78                        true => {
79                            self.add_checked_pos(atom, Vec::new());
80                        }
81                        false => {
82                            self.add_checked_neg(atom, Vec::new());
83                        }
84                    }
85                }
86                Atom::Neg((_node, _idx)) => {
87                    todo!()
88                }
89            }
90        }
91        Ok(())
92    }
93
94    pub(crate) fn add_checked_pos(&mut self, atom: Atom, reasons: Vec<Reason>) {
95        let new_atom = atom.clone();
96        match atom {
97            Atom::Pos(pa) => {
98                self.checked.insert(new_atom);
99                self.add_reasons(pa, reasons)
100            }
101            Atom::Neg(_na) => {
102                todo!()
103            }
104        }
105    }
106
107    pub(crate) fn add_checked_neg(&mut self, atom: Atom, errors: Vec<ValidatorError>) {
108        match atom.clone() {
109            Atom::Neg(na) => {
110                self.checked.insert(atom);
111                self.add_errors(na, errors)
112            }
113            Atom::Pos(na) => {
114                self.checked.insert(atom.negated());
115                self.add_errors(na, errors)
116            }
117        }
118    }
119
120    pub(crate) fn checked(&self) -> IndexSet<Atom> {
121        self.checked.clone()
122    }
123
124    fn add_reasons(&mut self, pa: PosAtom, rs: Vec<Reason>) {
125        match self.reasons.entry(pa) {
126            Entry::Occupied(mut vs) => vs.get_mut().extend(rs),
127            Entry::Vacant(vac) => {
128                vac.insert(rs);
129            }
130        }
131    }
132
133    fn add_errors(&mut self, na: NegAtom, es: Vec<ValidatorError>) {
134        match self.errors.entry(na) {
135            Entry::Occupied(mut vs) => vs.get_mut().extend(es),
136            Entry::Vacant(vac) => {
137                vac.insert(es);
138            }
139        }
140    }
141
142    pub(crate) fn pending(&self) -> IndexSet<Atom> {
143        self.pending.clone()
144    }
145
146    pub fn set_max_steps(&mut self, max_steps: usize) {
147        self.config.set_max_steps(max_steps);
148    }
149
150    pub(crate) fn is_processing(&self, atom: &Atom) -> bool {
151        self.processing.contains(atom)
152    }
153
154    /*pub(crate) fn get_result(&self, atom: &Atom) -> ResultValue {
155        if self.checked.contains(atom) {
156            ResultValue::Ok
157        } else if self.checked.contains(&atom.negated()) {
158            ResultValue::Failed
159        } else if self.pending.contains(atom) {
160            ResultValue::Pending
161        } else if self.processing.contains(atom) {
162            ResultValue::Processing
163        } else {
164            ResultValue::Unknown
165        }
166    }*/
167
168    pub fn new_step(&mut self) {
169        self.step_counter += 1;
170    }
171
172    pub fn add_ok(&mut self, n: Node, s: ShapeLabelIdx) {
173        let pa = (n, s);
174        self.checked.insert(Atom::pos(&pa));
175    }
176
177    pub fn add_failed(&mut self, n: Node, s: ShapeLabelIdx, err: ValidatorError) {
178        let atom = (n, s);
179        self.checked.insert(Atom::neg(&atom));
180        match self.errors.entry(atom) {
181            Entry::Occupied(mut es) => es.get_mut().push(err),
182            Entry::Vacant(vacant) => {
183                vacant.insert(vec![err]);
184            }
185        }
186    }
187
188    pub fn get_shape_expr(&self, _idx: ShapeLabelIdx) -> &ShapeExpr {
189        // self.config.get_shape_expr(idx)
190        todo!()
191    }
192
193    pub fn more_pending(&self) -> bool {
194        !self.pending.is_empty()
195    }
196
197    pub fn add_pending(&mut self, n: Node, s: ShapeLabelIdx) {
198        let pos_atom = (n, s);
199        self.pending.insert(Atom::pos(&pos_atom));
200    }
201
202    pub fn pop_pending(&mut self) -> Option<Atom> {
203        self.pending.pop()
204    }
205
206    pub fn steps(&self) -> usize {
207        self.step_counter
208    }
209
210    pub fn max_steps(&self) -> usize {
211        self.config.max_steps()
212    }
213
214    pub(crate) fn no_end_steps(&self) -> bool {
215        self.steps() < self.max_steps()
216    }
217
218    pub(crate) fn find_errors(&self, na: &NegAtom) -> Vec<ValidatorError> {
219        match self.errors.get(na) {
220            Some(vs) => vs.to_vec(),
221            None => Vec::new(),
222        }
223    }
224
225    pub(crate) fn find_reasons(&self, pa: &PosAtom) -> Vec<Reason> {
226        match self.reasons.get(pa) {
227            Some(vs) => vs.to_vec(),
228            None => Vec::new(),
229        }
230    }
231
232    pub(crate) fn dep(
233        &self,
234        node: &Node,
235        idx: &ShapeLabelIdx,
236        schema: &SchemaIR,
237        rdf: &impl Query,
238    ) -> Result<HashSet<(Node, ShapeLabelIdx)>> {
239        // Search all pairs (node', idx') in the shape expr referenced by idx such that there is a triple constraint (pred, ref)
240        // and the neighbours of node are (pred, node')
241        if let Some((_label, se)) = schema.find_shape_idx(idx) {
242            let references = se.references();
243            let preds = references.keys().cloned().collect::<Vec<_>>();
244            let (neighs, _remainder) = self.neighs(node, preds, rdf)?;
245            let mut dep = HashSet::new();
246            for (pred, neigh_node) in neighs {
247                if let Some(idx_list) = references.get(&pred) {
248                    for idx in idx_list {
249                        dep.insert((neigh_node.clone(), *idx));
250                    }
251                } else {
252                    debug!("No references found for predicate {pred}");
253                }
254            }
255            Ok(dep)
256        } else {
257            Err(ValidatorError::ShapeExprNotFound { idx: *idx })
258        }
259    }
260
261    pub(crate) fn prove(
262        &self,
263        node: &Node,
264        label: &ShapeLabelIdx,
265        hyp: &mut Vec<(Node, ShapeLabelIdx)>,
266        schema: &SchemaIR,
267        rdf: &impl Query,
268    ) -> Result<bool> {
269        // Implements algorithm presented in page 14 of this paper:
270        // https://labra.weso.es/publication/2017_semantics-validation-shapes-schemas/
271        hyp.push((node.clone(), *label));
272        let hyp_as_set: HashSet<(Node, ShapeLabelIdx)> = hyp
273            .iter()
274            .map(|(n, l)| (n.clone(), *l))
275            .collect::<HashSet<_>>();
276        let mut matched = HashSet::new();
277        let candidates = self.dep(node, label, schema, rdf)?;
278        let cleaned_candidates: HashSet<_> = candidates.difference(&hyp_as_set).cloned().collect();
279        for (n1, l1) in cleaned_candidates {
280            if self.prove(&n1, &l1, hyp, schema, rdf)? {
281                matched.insert((n1.clone(), l1));
282            }
283        }
284        let mut typing: HashSet<_> = matched.union(&hyp_as_set).cloned().collect();
285        let result = self.check_node_idx(node, label, schema, rdf, &mut typing)?;
286        hyp.pop();
287        Ok(result)
288    }
289
290    pub(crate) fn check_node_idx(
291        &self,
292        node: &Node,
293        idx: &ShapeLabelIdx,
294        schema: &SchemaIR,
295        rdf: &impl Query,
296        typing: &mut HashSet<(Node, ShapeLabelIdx)>,
297    ) -> Result<bool> {
298        if let Some((_maybe_label, se)) = schema.find_shape_idx(idx) {
299            self.check_node_shape_expr(node, se, schema, rdf, typing)
300        } else {
301            Err(ValidatorError::ShapeExprNotFound { idx: *idx })
302        }
303    }
304
305    pub(crate) fn check_node_shape_expr(
306        &self,
307        node: &Node,
308        se: &ShapeExpr,
309        schema: &SchemaIR,
310        rdf: &impl Query,
311        typing: &mut HashSet<(Node, ShapeLabelIdx)>,
312    ) -> Result<bool> {
313        match se {
314            ShapeExpr::ShapeAnd { exprs, .. } => {
315                // let mut reasons_collection = Vec::new();
316                for e in exprs {
317                    let result = self.check_node_shape_expr(node, e, schema, rdf, typing)?;
318                    match result {
319                        false => {
320                            return Ok(false);
321                        }
322                        true => {
323                            // reasons_collection.push(reasons);
324                        }
325                    }
326                }
327                Ok(true)
328            }
329            ShapeExpr::ShapeOr { exprs, .. } => {
330                // let mut errors_collection = Vec::new();
331                for e in exprs {
332                    let result = self.check_node_shape_expr(node, e, schema, rdf, typing)?;
333                    match result {
334                        false => {
335                            // errors_collection.push((e.clone(), ValidatorErrors::new(errors)));
336                        }
337                        true => {
338                            return Ok(true);
339                        }
340                    }
341                }
342                Ok(false)
343            }
344            ShapeExpr::ShapeNot { expr, .. } => {
345                let result = self.check_node_shape_expr(node, expr, schema, rdf, typing)?;
346                if result {
347                    Ok(false) // ShapeNot is false if the inner shape is true
348                } else {
349                    Ok(true) // ShapeNot is true if the inner shape is false
350                }
351            }
352            ShapeExpr::NodeConstraint(nc) => {
353                match nc.cond().matches(node) {
354                    Ok(_pending) => {
355                        // TODO: Add pending to pending nodes
356                        // I think this is not needed because node constraints will not generate pending nodes
357                        Ok(true)
358                    }
359                    Err(_err) => Ok(false),
360                }
361            }
362            ShapeExpr::Shape(shape) => self.check_node_shape(node, shape, schema, rdf, typing),
363            ShapeExpr::External {} => {
364                debug!("External shape expression encountered for node {node} with shape {se}");
365                Ok(true)
366            }
367            ShapeExpr::Ref { idx } => {
368                if typing.contains(&(node.clone(), *idx)) {
369                    // If the node is already in the typing, we can return true
370                    Ok(true)
371                } else {
372                    Ok(false)
373                }
374            }
375            ShapeExpr::Empty => Ok(true),
376        }
377    }
378
379    pub(crate) fn check_node_shape(
380        &self,
381        node: &Node,
382        shape: &Shape,
383        _schema: &SchemaIR,
384        rdf: &impl Query,
385        _typing: &mut HashSet<(Node, ShapeLabelIdx)>,
386    ) -> Result<bool> {
387        let (values, remainder) = self.neighs(node, shape.preds(), rdf)?;
388        if shape.is_closed() && !remainder.is_empty() {
389            debug!("Closed shape with remainder preds: {remainder:?}");
390            Ok(false)
391        } else {
392            debug!("Neighs of {node}: {values:?}");
393            let result_iter = shape.rbe_table().matches(values)?;
394            for result in result_iter {
395                if result.is_ok() {
396                    return Ok(true);
397                }
398            }
399            Ok(false)
400        }
401    }
402
403    pub(crate) fn check_node_shape_expr_old<S>(
404        &mut self,
405        node: &Node,
406        se: &ShapeExpr,
407        rdf: &S,
408        schema: &SchemaIR,
409    ) -> Result<Either<Vec<ValidatorError>, Vec<Reason>>>
410    where
411        S: Query,
412    {
413        debug!(
414            "Step {}. Checking node {node} with shape_expr: {se}",
415            self.step_counter
416        );
417        match se {
418            ShapeExpr::NodeConstraint(nc) => match nc.cond().matches(node) {
419                Ok(_pending) => {
420                    // TODO: Add pending to pending nodes
421                    // I think this is not needed because node constraints will not generate pending nodes
422                    Ok(Either::Right(vec![Reason::NodeConstraintPassed {
423                        node: node.clone(),
424                        nc: nc.clone(),
425                    }]))
426                }
427                Err(err) => Ok(Either::Left(vec![ValidatorError::RbeError(err)])),
428            },
429            ShapeExpr::Ref { idx } => {
430                // TODO: Should we remove the next
431                self.add_pending(node.clone(), *idx);
432                if let Some((_maybe_label, se)) = schema.find_shape_idx(idx) {
433                    self.check_node_shape_expr_old(node, se, rdf, schema)
434                } else {
435                    Ok(Either::Left(vec![ValidatorError::ShapeExprNotFound {
436                        idx: *idx,
437                    }]))
438                }
439            }
440            ShapeExpr::ShapeAnd { exprs, .. } => {
441                let mut reasons_collection = Vec::new();
442                for e in exprs {
443                    let result = self.check_node_shape_expr_old(node, e, rdf, schema)?;
444                    match result {
445                        Either::Left(errors) => {
446                            return Ok(Either::Left(vec![ValidatorError::ShapeAndError {
447                                shape_expr: e.clone(),
448                                node: node.clone(),
449                                errors: ValidatorErrors::new(errors),
450                            }]));
451                        }
452                        Either::Right(reasons) => {
453                            reasons_collection.push(reasons);
454                        }
455                    }
456                }
457                Ok(Either::Right(vec![Reason::ShapeAndPassed {
458                    node: node.clone(),
459                    se: se.clone(),
460                    reasons: reasons_collection,
461                }]))
462            }
463            ShapeExpr::ShapeNot { expr, .. } => {
464                let result = self.check_node_shape_expr_old(node, expr, rdf, schema)?;
465                match result {
466                    Either::Left(errors) => Ok(Either::Right(vec![Reason::ShapeNotPassed {
467                        node: node.clone(),
468                        shape_expr: se.clone(),
469                        errors_evidences: ValidatorErrors::new(errors),
470                    }])),
471                    Either::Right(reasons) => {
472                        Ok(Either::Left(vec![ValidatorError::ShapeNotError {
473                            node: node.clone(),
474                            shape_expr: se.clone(),
475                            reasons: Reasons::new(reasons),
476                        }]))
477                    }
478                }
479            }
480            ShapeExpr::ShapeOr { exprs, .. } => {
481                let mut errors_collection = Vec::new();
482                for e in exprs {
483                    let result = self.check_node_shape_expr_old(node, e, rdf, schema)?;
484                    match result {
485                        Either::Left(errors) => {
486                            errors_collection.push((e.clone(), ValidatorErrors::new(errors)));
487                        }
488                        Either::Right(reasons) => {
489                            return Ok(Either::Right(vec![Reason::ShapeOrPassed {
490                                shape_expr: e.clone(),
491                                node: node.clone(),
492                                reasons: Reasons::new(reasons),
493                            }]));
494                        }
495                    }
496                }
497                Ok(Either::Left(vec![ValidatorError::ShapeOrError {
498                    shape_expr: se.clone(),
499                    node: node.clone(),
500                    errors: errors_collection.clone(),
501                }]))
502            }
503            ShapeExpr::Shape(shape) => self.check_node_shape_old(node, shape, rdf),
504            ShapeExpr::Empty => Ok(Either::Right(Vec::new())),
505            ShapeExpr::External {} => Ok(Either::Right(Vec::new())),
506        }
507    }
508
509    fn check_node_shape_old<S>(
510        &mut self,
511        node: &Node,
512        shape: &Shape,
513        rdf: &S,
514    ) -> Result<Either<Vec<ValidatorError>, Vec<Reason>>>
515    where
516        S: Query,
517    {
518        let (values, remainder) = self.neighs(node, shape.preds(), rdf)?;
519        if shape.is_closed() && !remainder.is_empty() {
520            let errs = vec![ValidatorError::ClosedShapeWithRemainderPreds {
521                remainder: Preds::new(remainder),
522                declared: Preds::new(shape.preds().into_iter().collect()),
523            }];
524            return Ok(Either::Left(errs));
525        };
526        debug!("Neighs of {node}: {values:?}");
527        let mut result_iter = shape.rbe_table().matches(values)?;
528        let mut current_err = None;
529        let counter = self.step_counter;
530        let mut found = false;
531        let mut iter_count = 0;
532
533        // Search for the first result which is not an err
534        while let Some(next_result) = result_iter.next() {
535            iter_count += 1;
536            match next_result {
537                Ok(pending_values) => {
538                    debug!("Found result, iteration {iter_count}");
539                    for (p, v) in pending_values.iter() {
540                        debug!("Step {counter}: Value in pending: {p}/{v}");
541                        let pos_atom = ((*p).clone(), *v);
542                        let atom = Atom::pos(&pos_atom);
543                        if self.is_processing(&atom) {
544                            let pred = p.clone();
545                            debug!(
546                                "Step {counter} Adding ok: {}/{v} because it was already processed",
547                                &pred
548                            );
549                            self.add_ok(pred, *v);
550                        } else {
551                            self.insert_pending(&atom);
552                        }
553                    }
554                    // We keep alternative match iterators which will be recovered in case of failure
555                    self.alternative_match_iterators.push(result_iter);
556                    found = true;
557                    break;
558                }
559                Err(err) => {
560                    debug!("Result with error {err} at iteration {iter_count}");
561                    current_err = Some(err);
562                }
563            }
564        }
565        if !found {
566            let errs = match current_err {
567                Some(rbe_err) => vec![ValidatorError::RbeError(rbe_err)],
568                None => {
569                    debug!("No value found for node/shape where node = {node}, shape = {shape:?}. Current_err = empty");
570                    Vec::new()
571                }
572            };
573            Ok(Either::Left(errs))
574        } else {
575            Ok(Either::Right(vec![Reason::ShapePassed {
576                node: node.clone(),
577                shape: shape.clone(),
578            }]))
579        }
580    }
581
582    fn cnv_iri<S>(&self, iri: S::IRI) -> Pred
583    where
584        S: Query,
585    {
586        let iri_string = iri.as_str();
587        let iri_s = iri!(iri_string);
588        Pred::from(iri_s)
589    }
590
591    fn cnv_object<S>(&self, term: &S::Term) -> Node
592    where
593        S: Query,
594    {
595        Node::from(term.clone().into())
596    }
597
598    fn neighs<S>(&self, node: &Node, preds: Vec<Pred>, rdf: &S) -> Result<Neighs>
599    where
600        S: Query,
601    {
602        let node = self.get_rdf_node(node, rdf);
603        let list: Vec<_> = preds.iter().map(|pred| pred.iri().clone().into()).collect();
604        if let Ok(subject) = node.try_into() {
605            let (outgoing_arcs, remainder) = rdf
606                .outgoing_arcs_from_list(&subject, &list)
607                .map_err(|e| self.cnv_err::<S>(e))?;
608            let mut result = Vec::new();
609            for (pred, values) in outgoing_arcs.into_iter() {
610                for obj in values.into_iter() {
611                    let iri = self.cnv_iri::<S>(pred.clone());
612                    let object = self.cnv_object::<S>(&obj);
613                    result.push((iri.clone(), object))
614                }
615            }
616            let mut remainder_preds = Vec::new();
617            for r in remainder {
618                let iri_r = self.cnv_iri::<S>(r.clone());
619                remainder_preds.push(iri_r)
620            }
621            Ok((result, remainder_preds))
622        } else {
623            todo!()
624        }
625    }
626
627    fn cnv_err<S>(&self, _err: S::Err) -> ValidatorError
628    where
629        S: Query,
630    {
631        todo!()
632    }
633
634    fn get_rdf_node<S>(&self, node: &Node, _rdf: &S) -> S::Term
635    where
636        S: Query,
637    {
638        match node.as_object() {
639            Object::Iri(iri_s) => {
640                let iri: S::IRI = iri_s.clone().into();
641                iri.into()
642            }
643            Object::BlankNode(_id) => {
644                todo!()
645            }
646            Object::Literal(_lit) => {
647                todo!()
648            }
649        }
650    }
651
652    pub fn insert_pending(&mut self, atom: &Atom) {
653        self.pending.insert((*atom).clone());
654    }
655}