1use chumsky::{prelude::*, Error};
2use lang_turtle::lang::{
3 context::Ctx,
4 model::TurtlePrefix,
5 parser::{named_node, not, triple},
6};
7use lsp_core::prelude::{spanned, PToken, Spanned, SparqlExpr, SparqlKeyword, Token};
8
9use crate::lang::model::{
10 Base, Bind, DatasetClause, Expression, GroupGraphPattern, GroupGraphPatternSub, Modifier,
11 Prologue, Query, QueryClause, SelectClause, Solution, SubSelect, Variable, WhereClause,
12};
13
14fn j(token: Token) -> impl Parser<PToken, Token, Error = Simple<PToken>> + Clone {
15 just(PToken(token, 0)).map(|x| x.0)
16}
17
18fn sparql_kwd(
19 kwd: SparqlKeyword,
20) -> impl Parser<PToken, Spanned<SparqlKeyword>, Error = Simple<PToken>> + Clone {
21 just(PToken(Token::SparqlKeyword(kwd.clone()), 0))
22 .to(kwd)
23 .map_with_span(spanned)
24}
25
26fn prologue() -> impl Parser<PToken, Prologue, Error = Simple<PToken>> + Clone {
28 let base = j(Token::SparqlBase)
29 .map_with_span(spanned)
30 .then(named_node().map_with_span(spanned))
31 .map(|(token, iri)| Prologue::Base { token, iri });
32
33 let prefix = j(Token::SparqlPrefix).map_with_span(|_, s| s)
34 .then(select! { |span| PToken(Token::PNameLN(x, _), _) => Spanned(x.unwrap_or_default(), span)})
35 .then(named_node()
36 .map_with_span(spanned))
37 .map(|((span, prefix), value)| {
38 Prologue::Prefix(TurtlePrefix {
39 span,
40 prefix,
41 value,
42 })
43 });
44
45 base.or(prefix)
46}
47
48fn dataset_clause() -> impl Parser<PToken, DatasetClause, Error = Simple<PToken>> + Clone {
50 sparql_kwd(SparqlKeyword::From)
51 .then(sparql_kwd(SparqlKeyword::Named).or_not())
52 .then(named_node().map_with_span(spanned))
53 .map(|((from, named), iri)| DatasetClause { from, named, iri })
54}
55
56fn expression() -> impl Parser<PToken, Expression, Error = Simple<PToken>> + Clone {
57 todo()
58}
59
60fn bind() -> impl Parser<PToken, Bind, Error = Simple<PToken>> + Clone {
62 sparql_kwd(SparqlKeyword::Bind)
63 .ignore_then(expression())
64 .map_with_span(spanned)
65 .then(sparql_kwd(SparqlKeyword::As))
66 .then(variable().map_with_span(spanned))
67 .map(|((expr, kwd), var)| Bind { var, kwd, expr })
68}
69
70fn variable() -> impl Parser<PToken, Variable, Error = Simple<PToken>> + Clone {
71 select! {
72 PToken(Token::Variable(s), _) => Variable(s),
73 }
74}
75
76fn select_clause() -> impl Parser<PToken, SelectClause, Error = Simple<PToken>> + Clone {
78 let star = j(Token::SparqlExpr(SparqlExpr::Times))
79 .to(Solution::All)
80 .map_with_span(spanned)
81 .map(|x| vec![x]);
82
83 let others = bind()
84 .delimited_by(j(Token::CurlOpen), j(Token::CurlClose))
85 .map(Solution::VarAs)
86 .or(variable().map(Solution::Var))
87 .map_with_span(spanned)
88 .repeated();
89
90 sparql_kwd(SparqlKeyword::Select)
93 .then(
94 sparql_kwd(SparqlKeyword::Distinct)
95 .or(sparql_kwd(SparqlKeyword::Reduced))
96 .or_not(),
97 )
98 .then(star.or(others))
99 .map(|((kwd, modifier), solutions)| SelectClause {
100 kwd,
101 modifier,
102 solutions,
103 })
104}
105
106fn sub_select<'a>(
108 ctx: Ctx<'a>,
109) -> impl Parser<PToken, SubSelect, Error = Simple<PToken>> + Clone + use<'a> {
110 recursive(|sub_select| {
111 let modi = modifier().map_with_span(spanned).repeated();
112 select_clause()
113 .then(where_clause(sub_select, ctx))
114 .then(modi)
115 .map(|((select, where_clause), modifier)| SubSelect {
116 modifier,
117 where_clause,
118 select,
119 })
120 })
121}
122
123fn group_graph_pattern_sub<
125 'a,
126 T: Parser<PToken, GroupGraphPattern, Error = Simple<PToken>> + Clone,
127>(
128 ggp: T,
129 ctx: Ctx<'a>,
130) -> impl Parser<PToken, GroupGraphPatternSub, Error = Simple<PToken>> + Clone + use<'a, T> {
131 let next_check = not(Token::CurlClose).rewind();
132
133 let trip = triple(ctx)
134 .map_with_span(spanned)
135 .map(GroupGraphPatternSub::Triple)
136 .labelled("triple");
137
138 let kwd = sparql_kwd(SparqlKeyword::Minus)
139 .or(sparql_kwd(SparqlKeyword::Optional))
140 .clone()
141 .then_ignore(j(Token::CurlOpen).rewind())
142 .then(ggp.clone().map_with_span(spanned))
143 .map(|(kwd, ggp)| GroupGraphPatternSub::Kwd(kwd, ggp));
144
145 let union = j(Token::CurlOpen)
146 .rewind()
147 .ignore_then(ggp.clone())
148 .map_with_span(spanned)
149 .then(
150 sparql_kwd(SparqlKeyword::Union)
151 .then(ggp.map_with_span(spanned))
152 .repeated(),
153 )
154 .map(|(start, rest)| GroupGraphPatternSub::Union(start, rest));
155
156 next_check.ignore_then(trip.or(kwd).or(union).labelled("group_graph_pattern_sub"))
160}
161
162fn expect_it(
163 token: Token,
164 st: &'static str,
165) -> impl Parser<PToken, Token, Error = Simple<PToken>> + Clone {
166 j(token.clone()).or(not(token.clone())
167 .map(|x| x.0)
168 .try_map(move |x: Token, span| {
169 println!("{} didn't expect {}", st, x);
170 Err(Simple::expected_input_found(
171 span,
172 [Some(PToken(token.clone(), 0))],
173 Some(PToken(x.clone(), 0)),
174 ))
175 }))
176}
177
178fn group_graph_pattern<'a, T: Parser<PToken, SubSelect, Error = Simple<PToken>> + Clone + 'a>(
180 select: T,
181 ctx: Ctx<'a>,
182) -> impl Parser<PToken, GroupGraphPattern, Error = Simple<PToken>> + Clone + use<'a, T> {
183 let s = select.clone();
184 recursive(|ggp| {
185 let select = s
186 .clone()
187 .map(Box::from)
188 .map(GroupGraphPattern::SubSelect)
189 .labelled("sub_select");
190
191 let gg = group_graph_pattern_sub(ggp, ctx)
192 .map_with_span(spanned)
193 .repeated()
194 .map(GroupGraphPattern::GroupGraph);
195
196 let close = expect_it(Token::CurlClose, "close").labelled("CurlClose");
197 let open = expect_it(Token::CurlOpen, "open").labelled("CurlOpen");
198
199 open.ignore_then(gg.or(select)).then_ignore(close)
200 })
201}
202
203fn where_clause<'a, T: Parser<PToken, SubSelect, Error = Simple<PToken>> + Clone + 'a>(
205 select: T,
206 ctx: Ctx<'a>,
207) -> impl Parser<PToken, WhereClause, Error = Simple<PToken>> + Clone + use<'a, T> {
208 sparql_kwd(SparqlKeyword::Where)
209 .or_not()
210 .then(group_graph_pattern(select, ctx).map_with_span(spanned))
211 .map(|(kwd, ggp)| WhereClause { ggp, kwd })
212}
213
214fn modifier() -> impl Parser<PToken, Modifier, Error = Simple<PToken>> + Clone {
216 let num = select!(
217 PToken(Token::Number(x), _) => x,
218 )
219 .map_with_span(spanned);
220
221 let limit_offset = sparql_kwd(SparqlKeyword::Limit)
222 .or(sparql_kwd(SparqlKeyword::Offset))
223 .then(num)
224 .map(|(kwd, num)| Modifier::LimitOffset(kwd, num));
225 limit_offset
226}
227
228pub fn query<'a>(
229 base: lsp_types::Url,
230 ctx: Ctx<'a>,
231) -> impl Parser<PToken, Query, Error = Simple<PToken>> + Clone + use<'a> {
232 let prologues = prologue().map_with_span(spanned).repeated().map(|xs| {
233 let mut base = None;
234 let mut prefixes = vec![];
235 xs.into_iter().for_each(|Spanned(x, span)| match x {
236 Prologue::Base { token, iri } => base = Some(Spanned(Base { token, iri }, span)),
237 Prologue::Prefix(prefix) => prefixes.push(Spanned(prefix, span)),
238 });
239 (base, prefixes)
240 });
241 let kwds = select_clause().map(QueryClause::Select);
242 let datasets = dataset_clause().map_with_span(spanned).repeated();
243 let where_clause = where_clause(sub_select(ctx), ctx).map_with_span(spanned);
244 let modifiers = modifier().map_with_span(spanned).repeated();
245
246 prologues
247 .then(kwds)
248 .then(datasets)
249 .then(where_clause)
250 .then(modifiers)
251 .map(
252 move |(((((base_statement, prefixes), kwds), datasets), where_clause), modifier)| {
253 Query {
254 base_statement,
255 prefixes,
256 base: base.clone(),
257 modifier,
258 where_clause,
259 datasets,
260 kwds,
261 }
262 },
263 )
264}
265
266pub fn parse(
267 source: &str,
268 base: lsp_types::Url,
269 tokens: Vec<Spanned<Token>>,
270 ctx: Ctx<'_>,
271) -> (Spanned<Query>, Vec<Simple<PToken>>) {
272 let len = source.len();
273 let stream = chumsky::Stream::from_iter(
274 0..len,
275 tokens
276 .into_iter()
277 .enumerate()
278 .filter(|(_, x)| !x.is_comment())
279 .map(|(i, t)| t.map(|x| PToken(x, i)))
280 .map(|Spanned(x, s)| (x, s)),
281 );
282
283 let parser = query(base, ctx)
284 .map_with_span(spanned)
285 .then_ignore(end().recover_with(skip_then_retry_until([])));
286 let (json, json_errors) = parser.parse_recovery(stream);
287
288 (
289 json.unwrap_or(Spanned(Query::default(), 0..source.len())),
290 json_errors,
291 )
292}
293
294#[cfg(test)]
295mod tests {
296 use chumsky::Stream;
297 use lang_turtle::lang::context::Context;
298
299 use super::*;
300 use crate::lang::{
301 parsing::select_clause,
302 tokenizer::{parse_tokens_str},
303 };
304 pub fn parse_it<T, P: Parser<PToken, T, Error = Simple<PToken>>>(
305 turtle: &str,
306 parser: P,
307 ) -> (Option<T>, Vec<Simple<PToken>>) {
308 let (tokens, _) = parse_tokens_str(turtle);
309 for token in &tokens {
310 println!("token {:?}", token);
311 }
312 let end = turtle.len()..turtle.len();
313 let stream = Stream::from_iter(
314 end,
315 tokens
316 .into_iter()
317 .enumerate()
318 .filter(|x| !x.1.is_comment())
319 .map(|(i, t)| t.map(|x| PToken(x, i)))
320 .map(|Spanned(x, y)| (x, y)),
321 );
322
323 parser
324 .then_ignore(chumsky::prelude::end())
325 .parse_recovery(stream)
326 }
327 #[test]
328 fn parse_prologue() {
329 let inp = r#"
330PREFIX dc: <http://purl.org/dc/elements/1.1/>
331 "#;
332
333 let (q, tok) = parse_it(inp, prologue());
334
335 assert_eq!(tok, vec![]);
336 assert!(q.is_some());
337 }
338
339 #[test]
340 fn parse_select_clause() {
341 let inp = r#"
342SELECT ?title ?price
343 "#;
344
345 let (q, tok) = parse_it(inp, select_clause());
346
347 println!("q {:?}", q);
348
349 for t in &tok {
350 println!("t {:?}", t);
351 }
352
353 assert_eq!(tok, vec![]);
354 assert!(q.is_some());
355 }
356
357 #[test]
358 fn parse_triple() {
359 let context = Context::new();
360 let ctx = context.ctx();
361 let inp = r#"
362 ?x ns:discount ?discount .
363 "#;
364
365 let (q, tok) = parse_it(inp, triple(ctx));
366
367 assert_eq!(tok, vec![]);
368 assert!(q.is_some());
369 }
370
371 #[test]
372 fn parse_group_graph_pattern_sub() {
373 let context = Context::new();
374 let ctx = context.ctx();
375 let inp = r#"
376 ?x ns:price ?p .
377 "#;
378
379 let (q, tok) = parse_it(
380 inp,
381 group_graph_pattern_sub(group_graph_pattern(sub_select(ctx), ctx), ctx),
382 );
383
384 assert_eq!(tok, vec![]);
385 assert!(q.is_some());
386 }
387
388 #[test]
389 fn parse_group_graph_pattern() {
390 let context = Context::new();
391 let ctx = context.ctx();
392 let inp = r#"{
393 ?x ns:price ?p .
394}"#;
395
396 let (q, tok) = parse_it(inp, group_graph_pattern(sub_select(ctx), ctx));
397
398 assert_eq!(tok, vec![]);
399 assert!(q.is_some());
400 }
401
402 #[test]
403 fn simple_test() {
404 let context = Context::new();
405 let ctx = context.ctx();
406 let inp = r#"PREFIX dc: <http://purl.org/dc/elements/1.1/>
407PREFIX ns: <http://example.org/ns#>
408SELECT ?title ?price
409{ ?x ns:price ?p .
410 ?x dc:title ?title .
411 ?x ns:discount ?discount .
412}
413 "#;
414
415 let (q, tok) = parse_it(
416 inp,
417 query(lsp_types::Url::parse("memory://myFile.sq").unwrap(), ctx),
418 );
419
420 assert_eq!(tok, vec![]);
421 assert!(q.is_some());
422 }
423}