shex_compact/
shex_parser.rs

1use iri_s::IriS;
2// use nom::AsBytes;
3use nom::Err;
4use prefixmap::Deref;
5use shex_ast::Schema;
6use std::fs;
7use std::io;
8use std::path::Path;
9use tracing::debug;
10
11use crate::grammar_structs::ShExStatement;
12use crate::shex_statement;
13use crate::tws0;
14use crate::ParseError;
15use crate::Span;
16
17// This code is inspired from:
18// https://github.com/vandenoever/rome/blob/master/src/io/turtle/parser.rs
19
20type Result<A> = std::result::Result<A, ParseError>;
21
22pub struct ShExParser<'a> {
23    shex_statement_iterator: StatementIterator<'a>,
24    // state: ParserState,
25    // done: bool,
26}
27
28impl ShExParser<'_> {
29    /// Parse a ShEx schema that uses [ShEx compact syntax](https://shex.io/shex-semantics/index.html#shexc)
30    ///
31    /// `base` is an optional IRI that acts as the base for relative IRIs
32    pub fn parse(src: &str, base: Option<IriS>) -> Result<Schema> {
33        let mut schema = Schema::new().with_base(base);
34        let mut parser = ShExParser {
35            shex_statement_iterator: StatementIterator::new(Span::new(src))?,
36        };
37        let mut shapes_counter = 0;
38        for s in parser.shex_statement_iterator.by_ref() {
39            match s? {
40                ShExStatement::BaseDecl { iri } => {
41                    schema = schema.with_base(Some(iri));
42                }
43                ShExStatement::PrefixDecl { alias, iri } => {
44                    schema.add_prefix(alias, &iri)?;
45                }
46                ShExStatement::StartDecl { shape_expr } => {
47                    schema = schema.with_start(Some(shape_expr))
48                }
49                ShExStatement::ImportDecl { iri } => {
50                    schema = schema.with_import(iri);
51                }
52                ShExStatement::ShapeDecl {
53                    is_abstract,
54                    shape_label,
55                    shape_expr,
56                } => {
57                    let shape_label = shape_label.deref(&schema.base(), &schema.prefixmap())?;
58                    let shape_expr = shape_expr.deref(&schema.base(), &schema.prefixmap())?;
59                    shapes_counter += 1;
60                    tracing::debug!("Shape decl #{shapes_counter}: {shape_label} ");
61                    schema.add_shape(shape_label, shape_expr, is_abstract);
62                }
63                ShExStatement::StartActions { actions } => {
64                    schema = schema.with_start_actions(Some(actions));
65                }
66            }
67        }
68        Ok(schema)
69    }
70
71    pub fn parse_buf(path: &Path, base: Option<IriS>) -> Result<Schema> {
72        let data = fs::read_to_string(path)?;
73        let schema = ShExParser::parse(&data, base)?;
74        Ok(schema)
75    }
76
77    pub fn from_reader<R: io::Read>(mut reader: R, base: Option<IriS>) -> Result<Schema> {
78        let mut v = Vec::new();
79        reader.read_to_end(&mut v)?;
80        let s = String::from_utf8(v).map_err(|e| ParseError::Utf8Error {
81            error: format!("{e}"),
82        })?;
83        Self::parse(s.as_str(), base)
84    }
85}
86
87struct StatementIterator<'a> {
88    src: Span<'a>,
89    done: bool,
90}
91
92impl StatementIterator<'_> {
93    pub fn new(src: Span) -> Result<StatementIterator> {
94        match tws0(src) {
95            Ok((left, _)) => Ok(StatementIterator {
96                src: left,
97                done: false,
98            }),
99            Err(Err::Incomplete(_)) => Ok(StatementIterator { src, done: false }),
100            Err(e) => Err(ParseError::Custom {
101                msg: format!("cannot start parsing. Error: {}", e),
102            }),
103        }
104    }
105}
106
107impl<'a> Iterator for StatementIterator<'a> {
108    type Item = Result<ShExStatement<'a>>;
109
110    fn next(&mut self) -> Option<Self::Item> {
111        if self.done {
112            return None;
113        }
114        let mut r;
115        if self.src.is_empty() {
116            self.done = true;
117            return None;
118        }
119        match shex_statement()(self.src) {
120            Ok((left, s)) => {
121                r = Some(Ok(s));
122                self.src = left;
123            }
124            Err(Err::Incomplete(needed)) => {
125                debug!("Incomplete! shex_statement. Needed: {needed:?}");
126                self.done = true;
127                r = None;
128            }
129            Err(Err::Error(e)) | Err(Err::Failure(e)) => {
130                r = Some(Err(ParseError::NomError { err: Box::new(e) }));
131                self.done = true;
132            }
133        }
134
135        // Skip extra whitespace
136        match tws0(self.src) {
137            Ok((left, _)) => {
138                self.src = left;
139            }
140            Err(Err::Incomplete(needed)) => {
141                debug!("Incomplete on tws: needed {needed:?}");
142                self.done = true;
143            }
144            Err(e) => {
145                r = Some(Err(ParseError::Custom {
146                    msg: format!("error parsing whitespace. Error: {}", e),
147                }));
148                self.done = true;
149            }
150        }
151
152        /*if r.is_none() && !self.src.is_empty() {
153            r = Some(Err(ParseError::Custom {
154                msg: format!("trailing bytes {}", self.src),
155            }));
156        }*/
157        r
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use shex_ast::{Shape, ShapeExpr, ShapeExprLabel};
164
165    use super::*;
166
167    #[test]
168    fn test_prefix() {
169        let str = r#"
170 prefix e: <http://example.org/>
171 e:S {}
172 "#;
173        let schema = ShExParser::parse(str, None).unwrap();
174        let mut expected = Schema::new();
175        expected
176            .add_prefix("e", &IriS::new_unchecked("http://example.org/"))
177            .unwrap();
178        expected.add_shape(
179            ShapeExprLabel::iri_unchecked("http://example.org/S"),
180            ShapeExpr::Shape(Shape::new(None, None, None)),
181            false,
182        );
183        assert_eq!(schema, expected)
184    }
185}