shex_ast/ir/
schema_ir.rs

1use crate::Pred;
2use crate::{
3    ast::Schema as SchemaJson, ir::ast2ir::AST2IR, CResult, SchemaIRError, ShapeExprLabel,
4    ShapeLabelIdx,
5};
6use iri_s::IriS;
7use prefixmap::{IriRef, PrefixMap};
8use std::collections::{HashMap, HashSet};
9use std::fmt::Display;
10
11use super::dependency_graph::{DependencyGraph, PosNeg};
12use super::shape_expr::ShapeExpr;
13use super::shape_label::ShapeLabel;
14
15type Result<A> = std::result::Result<A, SchemaIRError>;
16
17#[derive(Debug, Default, Clone)]
18pub struct SchemaIR {
19    shape_labels_map: HashMap<ShapeLabel, ShapeLabelIdx>,
20    shapes: HashMap<ShapeLabelIdx, (Option<ShapeLabel>, ShapeExpr)>,
21    shape_label_counter: usize,
22    prefixmap: PrefixMap,
23}
24
25impl SchemaIR {
26    pub fn new() -> SchemaIR {
27        SchemaIR {
28            shape_labels_map: HashMap::new(),
29            shape_label_counter: 0,
30            shapes: HashMap::new(),
31            prefixmap: PrefixMap::new(),
32        }
33    }
34
35    pub fn set_prefixmap(&mut self, prefixmap: Option<PrefixMap>) {
36        self.prefixmap = prefixmap.clone().unwrap_or_default();
37    }
38
39    pub fn prefixmap(&self) -> PrefixMap {
40        self.prefixmap.clone()
41    }
42
43    pub fn add_shape(&mut self, shape_label: ShapeLabel, se: ShapeExpr) {
44        let idx = ShapeLabelIdx::from(self.shape_label_counter);
45        self.shape_labels_map.insert(shape_label.clone(), idx);
46        self.shapes.insert(idx, (Some(shape_label.clone()), se));
47        self.shape_label_counter += 1;
48    }
49
50    pub fn get_shape_expr(&self, shape_label: &ShapeLabel) -> Option<&ShapeExpr> {
51        if let Some(idx) = self.find_shape_label_idx(shape_label) {
52            self.shapes.get(idx).map(|(_label, se)| se)
53        } else {
54            None
55        }
56    }
57
58    pub fn from_schema_json(&mut self, schema_json: &SchemaJson) -> Result<()> {
59        let mut schema_json_compiler = AST2IR::new();
60        schema_json_compiler.compile(schema_json, self)?;
61        Ok(())
62    }
63
64    pub fn find_ref(&self, se_ref: &ShapeExprLabel) -> CResult<ShapeLabelIdx> {
65        let shape_label = match se_ref {
66            ShapeExprLabel::IriRef { value } => match value {
67                IriRef::Iri(iri) => {
68                    let label = ShapeLabel::iri(iri.clone());
69                    Ok::<ShapeLabel, SchemaIRError>(label)
70                }
71                IriRef::Prefixed { prefix, local } => {
72                    let iri =
73                        self.prefixmap
74                            .resolve_prefix_local(prefix, local)
75                            .map_err(|err| SchemaIRError::PrefixedNotFound {
76                                prefix: prefix.clone(),
77                                local: local.clone(),
78                                err: Box::new(err),
79                            })?;
80                    Ok::<ShapeLabel, SchemaIRError>(ShapeLabel::iri(iri))
81                }
82            },
83            ShapeExprLabel::BNode { value } => {
84                let label = ShapeLabel::from_bnode((*value).clone());
85                Ok(label)
86            }
87            ShapeExprLabel::Start => Ok(ShapeLabel::Start),
88        }?;
89        match self.shape_labels_map.get(&shape_label) {
90            Some(idx) => Ok(*idx),
91            None => Err(SchemaIRError::LabelNotFound { shape_label }),
92        }
93    }
94
95    pub fn find_label(&self, label: &ShapeLabel) -> Option<(&ShapeLabelIdx, &ShapeExpr)> {
96        self.find_shape_label_idx(label)
97            .and_then(|idx| self.shapes.get(idx).map(|(_label, se)| (idx, se)))
98    }
99
100    pub fn find_shape_label_idx(&self, label: &ShapeLabel) -> Option<&ShapeLabelIdx> {
101        self.shape_labels_map.get(label)
102    }
103
104    pub fn find_shape_idx(&self, idx: &ShapeLabelIdx) -> Option<&(Option<ShapeLabel>, ShapeExpr)> {
105        self.shapes.get(idx)
106    }
107
108    pub fn shape_label_from_idx(&self, idx: &ShapeLabelIdx) -> Option<&ShapeLabel> {
109        self.shapes
110            .get(idx)
111            .and_then(|(label, _se)| label.as_ref())
112            .or(None)
113    }
114
115    pub fn new_index(&mut self) -> ShapeLabelIdx {
116        let idx = ShapeLabelIdx::from(self.shape_label_counter);
117        self.shape_label_counter += 1;
118        self.shapes.insert(idx, (None, ShapeExpr::Empty));
119        idx
120    }
121
122    pub fn existing_labels(&self) -> Vec<&ShapeLabel> {
123        self.shape_labels_map.keys().collect()
124    }
125
126    pub fn shapes(&self) -> impl Iterator<Item = &(Option<ShapeLabel>, ShapeExpr)> {
127        self.shapes.values()
128    }
129
130    // Returns a map of predicates to shape label indices that reference the given index
131    pub fn references(&self, idx: &ShapeLabelIdx) -> HashMap<Pred, Vec<ShapeLabelIdx>> {
132        let visited = HashSet::new();
133        self.references_visited(idx, visited)
134    }
135
136    //
137    pub fn references_visited(
138        &self,
139        idx: &ShapeLabelIdx,
140        mut visited: HashSet<ShapeLabelIdx>,
141    ) -> HashMap<Pred, Vec<ShapeLabelIdx>> {
142        if let Some((_label, shape_expr)) = self.find_shape_idx(idx) {
143            match shape_expr {
144                ShapeExpr::Ref { idx } => {
145                    if visited.contains(idx) {
146                        // If we have already visited this index, we return an empty map to avoid infinite recursion
147                        return HashMap::new();
148                    }
149                    visited.insert(*idx);
150                    self.references_visited(idx, visited)
151                }
152                _ => shape_expr.references(),
153            }
154        } else {
155            HashMap::new()
156        }
157    }
158
159    #[allow(dead_code)]
160    fn cnv_closed(closed: &Option<bool>) -> bool {
161        match closed {
162            None => false,
163            Some(closed) => *closed,
164        }
165    }
166
167    #[allow(dead_code)]
168    fn cnv_extra(&self, extra: &Option<Vec<IriRef>>) -> CResult<Vec<IriS>> {
169        extra
170            .as_ref()
171            .map(|extra| {
172                extra
173                    .iter()
174                    .map(|iri| self.cnv_iri_ref(iri))
175                    .collect::<CResult<Vec<_>>>()
176            })
177            .unwrap_or(Ok(vec![]))
178    }
179
180    fn cnv_iri_ref(&self, iri_ref: &IriRef) -> Result<IriS> {
181        let iri_s = (*iri_ref).clone().into();
182        Ok(iri_s)
183    }
184
185    pub fn get_shape_label_idx(&self, shape_label: &ShapeLabel) -> Result<ShapeLabelIdx> {
186        match self.shape_labels_map.get(shape_label) {
187            Some(shape_label_idx) => Ok(*shape_label_idx),
188            None => Err(SchemaIRError::ShapeLabelNotFound {
189                shape_label: shape_label.clone(),
190            }),
191        }
192    }
193
194    pub fn replace_shape(&mut self, idx: &ShapeLabelIdx, se: ShapeExpr) {
195        self.shapes.entry(*idx).and_modify(|(_label, s)| *s = se);
196    }
197
198    pub fn show_label(&self, label: &ShapeLabel) -> String {
199        match label {
200            ShapeLabel::Iri(iri) => self.prefixmap.qualify(iri),
201            ShapeLabel::BNode(bnode) => format!("{bnode}"),
202            ShapeLabel::Start => "START".to_string(),
203        }
204    }
205
206    pub fn neg_cycles(&self) -> Vec<Vec<(ShapeLabelIdx, ShapeLabelIdx, Vec<ShapeLabelIdx>)>> {
207        let dep_graph = self.dependency_graph();
208        dep_graph.neg_cycles()
209    }
210
211    /// This is used to detect cycles that involve negations in the schema
212    /// A well formed schema should not have any cyclic reference that involve a negation
213    pub fn has_neg_cycle(&self) -> bool {
214        let dep_graph = self.dependency_graph();
215        dep_graph.has_neg_cycle()
216    }
217
218    pub(crate) fn dependency_graph(&self) -> DependencyGraph {
219        let mut dep_graph = DependencyGraph::new();
220        for (idx, (_label, se)) in self.shapes.iter() {
221            se.add_edges(*idx, &mut dep_graph, PosNeg::pos());
222        }
223        dep_graph
224    }
225
226    pub fn dependencies(&self) -> Vec<(ShapeLabel, PosNeg, ShapeLabel)> {
227        let mut deps = Vec::new();
228        for (source, posneg, target) in self.dependency_graph().all_edges() {
229            match (
230                self.shape_label_from_idx(&source),
231                self.shape_label_from_idx(&target),
232            ) {
233                (Some(source_label), Some(target_label)) => {
234                    deps.push((source_label.clone(), posneg, target_label.clone()));
235                }
236                _ => {
237                    // We ignore dependencies between shapes that have no labels
238                }
239            }
240        }
241        println!("Dependencies: {deps:?}");
242        deps
243    }
244}
245
246impl Display for SchemaIR {
247    fn fmt(&self, dest: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> {
248        writeln!(dest, "SchemaIR with {} shapes", self.shape_label_counter)?;
249        writeln!(dest, "Labels to indexes:")?;
250        for (label, idx) in self.shape_labels_map.iter() {
251            let label = self.show_label(label);
252            writeln!(dest, "{label} -> {idx}")?;
253        }
254        writeln!(dest, "Indexes to Shape Expressions:")?;
255        for (idx, (maybe_label, se)) in self.shapes.iter() {
256            let label_str = match maybe_label {
257                None => "".to_string(),
258                Some(label) => format!("{}@", self.show_label(label)),
259            };
260            writeln!(dest, "{idx} -> {label_str}{se}")?;
261        }
262        writeln!(dest, "---end of schema IR")?;
263        Ok(())
264    }
265}
266
267#[cfg(test)]
268mod tests {
269    use std::collections::HashMap;
270
271    use iri_s::iri;
272
273    use super::SchemaIR;
274    use crate::{ast::Schema as SchemaJson, ir::shape_label::ShapeLabel, Pred, ShapeLabelIdx};
275
276    #[test]
277    fn test_find_component() {
278        let str = r#"{
279            "@context": "http://www.w3.org/ns/shex.jsonld",
280            "type": "Schema",
281            "shapes": [
282                {
283                    "type": "ShapeDecl",
284                    "id": "http://a.example/S1",
285                    "shapeExpr": {
286                        "type": "Shape",
287                        "expression": {
288                            "type": "TripleConstraint",
289                            "predicate": "http://a.example/p1"
290                        }
291                    }
292                }
293            ]
294        }"#;
295        let schema_json: SchemaJson = serde_json::from_str::<SchemaJson>(str).unwrap();
296        let mut ir = SchemaIR::new();
297        ir.from_schema_json(&schema_json).unwrap();
298        println!("Schema IR: {ir}");
299        let s1_label: ShapeLabel = ShapeLabel::iri(iri!("http://a.example/S1"));
300        let s1 = ir
301            .shape_label_from_idx(&ir.get_shape_label_idx(&s1_label).unwrap())
302            .unwrap();
303        assert_eq!(s1, &s1_label);
304    }
305
306    #[test]
307    fn test_ir_references() {
308        let str = r#"{ "type": "Schema",
309            "shapes": [{
310        "type": "ShapeDecl",
311        "id": "http://example.org/S",
312        "shapeExpr": {
313            "type": "Shape",
314            "expression": {
315            "type": "EachOf",
316            "expressions": [{
317              "type": "TripleConstraint",
318              "predicate": "http://example.org/p",
319              "valueExpr": "http://example.org/T"
320            },
321            {
322              "type": "TripleConstraint",
323              "predicate": "http://example.org/p",
324              "valueExpr": "http://example.org/U"
325            }
326          ]
327        }
328      }
329    },
330    {
331      "type": "ShapeDecl",
332      "id": "http://example.org/T",
333      "shapeExpr": {
334        "type": "Shape"
335      }
336    },
337    {
338      "type": "ShapeDecl",
339      "id": "http://example.org/U",
340      "shapeExpr": {
341        "type": "Shape"
342      }
343    }
344  ],
345  "@context": "http://www.w3.org/ns/shex.jsonld"
346}"#;
347        let schema: SchemaJson = serde_json::from_str(str).unwrap();
348        let mut ir = SchemaIR::new();
349        ir.from_schema_json(&schema).unwrap();
350        println!("Schema IR: {ir}");
351        let s: ShapeLabel = ShapeLabel::iri(iri!("http://example.org/S"));
352        let idx = ir.get_shape_label_idx(&s).unwrap();
353        let references = ir.references(&idx);
354        let expected: HashMap<Pred, Vec<ShapeLabelIdx>> = vec![(
355            Pred::new_unchecked("http://example.org/p"),
356            vec![
357                ShapeLabelIdx::from(1), // T
358                ShapeLabelIdx::from(2), // U
359            ],
360        )]
361        .into_iter()
362        .collect();
363        assert_eq!(references, expected);
364    }
365
366    #[test]
367    fn test_ir_references_and() {
368        let str = r#"{
369  "type": "Schema",
370  "shapes": [
371    {
372      "type": "ShapeDecl",
373      "id": "http://example.org/S",
374      "shapeExpr": {
375        "type": "ShapeAnd",
376        "shapeExprs": [
377          {
378            "type": "Shape",
379            "expression": {
380              "type": "TripleConstraint",
381              "predicate": "http://example.org/p",
382              "valueExpr": "http://example.org/T"
383            }
384          },
385          {
386            "type": "Shape",
387            "expression": {
388              "type": "TripleConstraint",
389              "predicate": "http://example.org/p",
390              "valueExpr": "http://example.org/U"
391            }
392          }
393        ]
394      }
395    },
396    {
397      "type": "ShapeDecl",
398      "id": "http://example.org/T",
399      "shapeExpr": {
400        "type": "Shape"
401      }
402    },
403    {
404      "type": "ShapeDecl",
405      "id": "http://example.org/U",
406      "shapeExpr": {
407        "type": "Shape"
408      }
409    }
410  ],
411  "@context": "http://www.w3.org/ns/shex.jsonld"
412}"#;
413        let schema: SchemaJson = serde_json::from_str(str).unwrap();
414        let mut ir = SchemaIR::new();
415        ir.from_schema_json(&schema).unwrap();
416        let s: ShapeLabel = ShapeLabel::iri(iri!("http://example.org/S"));
417        let idx = ir.get_shape_label_idx(&s).unwrap();
418        println!("Schema IR: {ir}");
419        println!("Idx: {idx}");
420        let references = ir.references(&idx);
421        let expected: HashMap<Pred, Vec<ShapeLabelIdx>> = vec![(
422            Pred::new_unchecked("http://example.org/p"),
423            vec![
424                ShapeLabelIdx::from(1), // T
425                ShapeLabelIdx::from(2), // U
426            ],
427        )]
428        .into_iter()
429        .collect();
430        assert_eq!(references, expected);
431    }
432
433    /*#[test]
434    fn validation_convert() {
435        let str = r#"{
436            "@context": "http://www.w3.org/ns/shex.jsonld",
437            "type": "Schema",
438            "shapes": [
439                {
440                    "type": "ShapeDecl",
441                    "id": "http://a.example/S1",
442                    "shapeExpr": {
443                        "type": "Shape",
444                        "expression": {
445                            "type": "TripleConstraint",
446                            "predicate": "http://a.example/p1"
447                        }
448                    }
449                }
450            ]
451        }"#;
452        let schema_json: SchemaJson = serde_json::from_str::<SchemaJson>(str).unwrap();
453        let mut compiled_schema = SchemaIR::new();
454        compiled_schema.from_schema_json(schema_json).unwrap();
455        let s1 = ShapeLabel::Iri(IriS::new("http://a.example/S1").unwrap());
456        let p1 = IriS::new("http://a.example/p1").unwrap();
457        let se1 = ShapeExpr::Shape {
458            closed: false,
459            extra: Vec::new(),
460            expression: Some(TripleExpr::TripleConstraint {
461                id: None,
462                inverse: false,
463                predicate: p1,
464                value_expr: None,
465                min: Min::from(1),
466                max: Max::from(1),
467                sem_acts: Vec::new(),
468                annotations: Vec::new(),
469            }),
470            sem_acts: Vec::new(),
471            annotations: Vec::new(),
472        };
473        assert_eq!(compiled_schema.find_label(&s1), Some(&se1));
474    }*/
475}