lang_sparql/lang/
parsing.rs

1use chumsky::{prelude::*, Error};
2use lang_turtle::lang::{
3    context::Ctx,
4    model::TurtlePrefix,
5    parser::{named_node, not, triple},
6};
7use lsp_core::prelude::{spanned, PToken, Spanned, SparqlExpr, SparqlKeyword, Token};
8
9use crate::lang::model::{
10    Base, Bind, DatasetClause, Expression, GroupGraphPattern, GroupGraphPatternSub, Modifier,
11    Prologue, Query, QueryClause, SelectClause, Solution, SubSelect, Variable, WhereClause,
12};
13
14fn j(token: Token) -> impl Parser<PToken, Token, Error = Simple<PToken>> + Clone {
15    just(PToken(token, 0)).map(|x| x.0)
16}
17
18fn sparql_kwd(
19    kwd: SparqlKeyword,
20) -> impl Parser<PToken, Spanned<SparqlKeyword>, Error = Simple<PToken>> + Clone {
21    just(PToken(Token::SparqlKeyword(kwd.clone()), 0))
22        .to(kwd)
23        .map_with_span(spanned)
24}
25
26// DONE
27fn prologue() -> impl Parser<PToken, Prologue, Error = Simple<PToken>> + Clone {
28    let base = j(Token::SparqlBase)
29        .map_with_span(spanned)
30        .then(named_node().map_with_span(spanned))
31        .map(|(token, iri)| Prologue::Base { token, iri });
32
33    let prefix = j(Token::SparqlPrefix).map_with_span(|_, s| s)
34        .then(select! { |span| PToken(Token::PNameLN(x, _), _) => Spanned(x.unwrap_or_default(), span)})
35        .then(named_node()
36        .map_with_span(spanned))
37        .map(|((span, prefix), value)| {
38            Prologue::Prefix(TurtlePrefix {
39                span,
40                prefix,
41                value,
42            })
43        });
44
45    base.or(prefix)
46}
47
48// DONE
49fn dataset_clause() -> impl Parser<PToken, DatasetClause, Error = Simple<PToken>> + Clone {
50    sparql_kwd(SparqlKeyword::From)
51        .then(sparql_kwd(SparqlKeyword::Named).or_not())
52        .then(named_node().map_with_span(spanned))
53        .map(|((from, named), iri)| DatasetClause { from, named, iri })
54}
55
56fn expression() -> impl Parser<PToken, Expression, Error = Simple<PToken>> + Clone {
57    todo()
58}
59
60// DONE
61fn bind() -> impl Parser<PToken, Bind, Error = Simple<PToken>> + Clone {
62    sparql_kwd(SparqlKeyword::Bind)
63        .ignore_then(expression())
64        .map_with_span(spanned)
65        .then(sparql_kwd(SparqlKeyword::As))
66        .then(variable().map_with_span(spanned))
67        .map(|((expr, kwd), var)| Bind { var, kwd, expr })
68}
69
70fn variable() -> impl Parser<PToken, Variable, Error = Simple<PToken>> + Clone {
71    select! {
72        PToken(Token::Variable(s), _) => Variable(s),
73    }
74}
75
76// DONE
77fn select_clause() -> impl Parser<PToken, SelectClause, Error = Simple<PToken>> + Clone {
78    let star = j(Token::SparqlExpr(SparqlExpr::Times))
79        .to(Solution::All)
80        .map_with_span(spanned)
81        .map(|x| vec![x]);
82
83    let others = bind()
84        .delimited_by(j(Token::CurlOpen), j(Token::CurlClose))
85        .map(Solution::VarAs)
86        .or(variable().map(Solution::Var))
87        .map_with_span(spanned)
88        .repeated();
89
90    // This might be different to support ask and describe
91
92    sparql_kwd(SparqlKeyword::Select)
93        .then(
94            sparql_kwd(SparqlKeyword::Distinct)
95                .or(sparql_kwd(SparqlKeyword::Reduced))
96                .or_not(),
97        )
98        .then(star.or(others))
99        .map(|((kwd, modifier), solutions)| SelectClause {
100            kwd,
101            modifier,
102            solutions,
103        })
104}
105
106// DONE
107fn sub_select<'a>(
108    ctx: Ctx<'a>,
109) -> impl Parser<PToken, SubSelect, Error = Simple<PToken>> + Clone + use<'a> {
110    recursive(|sub_select| {
111        let modi = modifier().map_with_span(spanned).repeated();
112        select_clause()
113            .then(where_clause(sub_select, ctx))
114            .then(modi)
115            .map(|((select, where_clause), modifier)| SubSelect {
116                modifier,
117                where_clause,
118                select,
119            })
120    })
121}
122
123// DONE
124fn group_graph_pattern_sub<
125    'a,
126    T: Parser<PToken, GroupGraphPattern, Error = Simple<PToken>> + Clone,
127>(
128    ggp: T,
129    ctx: Ctx<'a>,
130) -> impl Parser<PToken, GroupGraphPatternSub, Error = Simple<PToken>> + Clone + use<'a, T> {
131    let next_check = not(Token::CurlClose).rewind();
132
133    let trip = triple(ctx)
134        .map_with_span(spanned)
135        .map(GroupGraphPatternSub::Triple)
136        .labelled("triple");
137
138    let kwd = sparql_kwd(SparqlKeyword::Minus)
139        .or(sparql_kwd(SparqlKeyword::Optional))
140        .clone()
141        .then_ignore(j(Token::CurlOpen).rewind())
142        .then(ggp.clone().map_with_span(spanned))
143        .map(|(kwd, ggp)| GroupGraphPatternSub::Kwd(kwd, ggp));
144
145    let union = j(Token::CurlOpen)
146        .rewind()
147        .ignore_then(ggp.clone())
148        .map_with_span(spanned)
149        .then(
150            sparql_kwd(SparqlKeyword::Union)
151                .then(ggp.map_with_span(spanned))
152                .repeated(),
153        )
154        .map(|(start, rest)| GroupGraphPatternSub::Union(start, rest));
155
156    // TODO add the others
157    //
158
159    next_check.ignore_then(trip.or(kwd).or(union).labelled("group_graph_pattern_sub"))
160}
161
162fn expect_it(
163    token: Token,
164    st: &'static str,
165) -> impl Parser<PToken, Token, Error = Simple<PToken>> + Clone {
166    j(token.clone()).or(not(token.clone())
167        .map(|x| x.0)
168        .try_map(move |x: Token, span| {
169            println!("{} didn't expect {}", st, x);
170            Err(Simple::expected_input_found(
171                span,
172                [Some(PToken(token.clone(), 0))],
173                Some(PToken(x.clone(), 0)),
174            ))
175        }))
176}
177
178// DONE
179fn group_graph_pattern<'a, T: Parser<PToken, SubSelect, Error = Simple<PToken>> + Clone + 'a>(
180    select: T,
181    ctx: Ctx<'a>,
182) -> impl Parser<PToken, GroupGraphPattern, Error = Simple<PToken>> + Clone + use<'a, T> {
183    let s = select.clone();
184    recursive(|ggp| {
185        let select = s
186            .clone()
187            .map(Box::from)
188            .map(GroupGraphPattern::SubSelect)
189            .labelled("sub_select");
190
191        let gg = group_graph_pattern_sub(ggp, ctx)
192            .map_with_span(spanned)
193            .repeated()
194            .map(GroupGraphPattern::GroupGraph);
195
196        let close = expect_it(Token::CurlClose, "close").labelled("CurlClose");
197        let open = expect_it(Token::CurlOpen, "open").labelled("CurlOpen");
198
199        open.ignore_then(gg.or(select)).then_ignore(close)
200    })
201}
202
203// DONE
204fn where_clause<'a, T: Parser<PToken, SubSelect, Error = Simple<PToken>> + Clone + 'a>(
205    select: T,
206    ctx: Ctx<'a>,
207) -> impl Parser<PToken, WhereClause, Error = Simple<PToken>> + Clone + use<'a, T> {
208    sparql_kwd(SparqlKeyword::Where)
209        .or_not()
210        .then(group_graph_pattern(select, ctx).map_with_span(spanned))
211        .map(|(kwd, ggp)| WhereClause { ggp, kwd })
212}
213
214// DONE
215fn modifier() -> impl Parser<PToken, Modifier, Error = Simple<PToken>> + Clone {
216    let num = select!(
217        PToken(Token::Number(x), _) => x,
218    )
219    .map_with_span(spanned);
220
221    let limit_offset = sparql_kwd(SparqlKeyword::Limit)
222        .or(sparql_kwd(SparqlKeyword::Offset))
223        .then(num)
224        .map(|(kwd, num)| Modifier::LimitOffset(kwd, num));
225    limit_offset
226}
227
228pub fn query<'a>(
229    base: lsp_types::Url,
230    ctx: Ctx<'a>,
231) -> impl Parser<PToken, Query, Error = Simple<PToken>> + Clone + use<'a> {
232    let prologues = prologue().map_with_span(spanned).repeated().map(|xs| {
233        let mut base = None;
234        let mut prefixes = vec![];
235        xs.into_iter().for_each(|Spanned(x, span)| match x {
236            Prologue::Base { token, iri } => base = Some(Spanned(Base { token, iri }, span)),
237            Prologue::Prefix(prefix) => prefixes.push(Spanned(prefix, span)),
238        });
239        (base, prefixes)
240    });
241    let kwds = select_clause().map(QueryClause::Select);
242    let datasets = dataset_clause().map_with_span(spanned).repeated();
243    let where_clause = where_clause(sub_select(ctx), ctx).map_with_span(spanned);
244    let modifiers = modifier().map_with_span(spanned).repeated();
245
246    prologues
247        .then(kwds)
248        .then(datasets)
249        .then(where_clause)
250        .then(modifiers)
251        .map(
252            move |(((((base_statement, prefixes), kwds), datasets), where_clause), modifier)| {
253                Query {
254                    base_statement,
255                    prefixes,
256                    base: base.clone(),
257                    modifier,
258                    where_clause,
259                    datasets,
260                    kwds,
261                }
262            },
263        )
264}
265
266pub fn parse(
267    source: &str,
268    base: lsp_types::Url,
269    tokens: Vec<Spanned<Token>>,
270    ctx: Ctx<'_>,
271) -> (Spanned<Query>, Vec<Simple<PToken>>) {
272    let len = source.len();
273    let stream = chumsky::Stream::from_iter(
274        0..len,
275        tokens
276            .into_iter()
277            .enumerate()
278            .filter(|(_, x)| !x.is_comment())
279            .map(|(i, t)| t.map(|x| PToken(x, i)))
280            .map(|Spanned(x, s)| (x, s)),
281    );
282
283    let parser = query(base, ctx)
284        .map_with_span(spanned)
285        .then_ignore(end().recover_with(skip_then_retry_until([])));
286    let (json, json_errors) = parser.parse_recovery(stream);
287
288    (
289        json.unwrap_or(Spanned(Query::default(), 0..source.len())),
290        json_errors,
291    )
292}
293
294#[cfg(test)]
295mod tests {
296    use chumsky::Stream;
297    use lang_turtle::lang::context::Context;
298
299    use super::*;
300    use crate::lang::{
301        parsing::select_clause,
302        tokenizer::{parse_tokens_str},
303    };
304    pub fn parse_it<T, P: Parser<PToken, T, Error = Simple<PToken>>>(
305        turtle: &str,
306        parser: P,
307    ) -> (Option<T>, Vec<Simple<PToken>>) {
308        let (tokens, _) = parse_tokens_str(turtle);
309        for token in &tokens {
310            println!("token {:?}", token);
311        }
312        let end = turtle.len()..turtle.len();
313        let stream = Stream::from_iter(
314            end,
315            tokens
316                .into_iter()
317                .enumerate()
318                .filter(|x| !x.1.is_comment())
319                .map(|(i, t)| t.map(|x| PToken(x, i)))
320                .map(|Spanned(x, y)| (x, y)),
321        );
322
323        parser
324            .then_ignore(chumsky::prelude::end())
325            .parse_recovery(stream)
326    }
327    #[test]
328    fn parse_prologue() {
329        let inp = r#"
330PREFIX  dc:  <http://purl.org/dc/elements/1.1/>
331        "#;
332
333        let (q, tok) = parse_it(inp, prologue());
334
335        assert_eq!(tok, vec![]);
336        assert!(q.is_some());
337    }
338
339    #[test]
340    fn parse_select_clause() {
341        let inp = r#"
342SELECT  ?title ?price
343        "#;
344
345        let (q, tok) = parse_it(inp, select_clause());
346
347        println!("q {:?}", q);
348
349        for t in &tok {
350            println!("t {:?}", t);
351        }
352
353        assert_eq!(tok, vec![]);
354        assert!(q.is_some());
355    }
356
357    #[test]
358    fn parse_triple() {
359        let context = Context::new();
360        let ctx = context.ctx();
361        let inp = r#"
362  ?x ns:discount ?discount .
363        "#;
364
365        let (q, tok) = parse_it(inp, triple(ctx));
366
367        assert_eq!(tok, vec![]);
368        assert!(q.is_some());
369    }
370
371    #[test]
372    fn parse_group_graph_pattern_sub() {
373        let context = Context::new();
374        let ctx = context.ctx();
375        let inp = r#"
376 ?x ns:price ?p .
377        "#;
378
379        let (q, tok) = parse_it(
380            inp,
381            group_graph_pattern_sub(group_graph_pattern(sub_select(ctx), ctx), ctx),
382        );
383
384        assert_eq!(tok, vec![]);
385        assert!(q.is_some());
386    }
387
388    #[test]
389    fn parse_group_graph_pattern() {
390        let context = Context::new();
391        let ctx = context.ctx();
392        let inp = r#"{
393    ?x ns:price ?p .
394}"#;
395
396        let (q, tok) = parse_it(inp, group_graph_pattern(sub_select(ctx), ctx));
397
398        assert_eq!(tok, vec![]);
399        assert!(q.is_some());
400    }
401
402    #[test]
403    fn simple_test() {
404        let context = Context::new();
405        let ctx = context.ctx();
406        let inp = r#"PREFIX  dc:  <http://purl.org/dc/elements/1.1/>
407PREFIX  ns:  <http://example.org/ns#>
408SELECT  ?title ?price
409{ ?x ns:price ?p .
410  ?x dc:title ?title . 
411  ?x ns:discount ?discount .
412}
413        "#;
414
415        let (q, tok) = parse_it(
416            inp,
417            query(lsp_types::Url::parse("memory://myFile.sq").unwrap(), ctx),
418        );
419
420        assert_eq!(tok, vec![]);
421        assert!(q.is_some());
422    }
423}