1use chumsky::{chain::Chain, prelude::*, Parser};
2use lsp_core::util::token::{StringStyle, Token};
3
4#[macro_export]
5macro_rules! t {
6 ($t:ty) => {
7 impl Parser<char, $t, Error = Simple<char>>
8 };
9}
10
11pub fn tok(st: &'static str, tok: Token) -> t!(Token) {
12 just::<char, &str, Simple<char>>(st).to(tok)
13}
14
15pub fn tokens_ext() -> t!(Token) {
16 choice((tok("{", Token::CurlOpen), tok("}", Token::CurlClose)))
17}
18
19pub fn keywords() -> t!(Token) {
20 choice((
21 just("PREFIX").to(Token::PrefixTag),
22 just("BASE").to(Token::BaseTag),
23 ))
24 .or(just('@').ignore_then(choice((
25 just("prefix").to(Token::PrefixTag),
26 just("base").to(Token::BaseTag),
27 ))))
28}
29
30pub fn tokens() -> t!(Token) {
31 choice((
32 tok("PREFIX", Token::SparqlPrefix),
35 tok("BASE", Token::SparqlBase),
36 tok("[", Token::SqOpen),
37 tok("]", Token::SqClose),
38 tok("(", Token::BracketOpen),
39 tok(")", Token::BracketClose),
40 tok("^^", Token::DataTypeDelim),
41 tok(".", Token::Stop),
42 tok(",", Token::Comma),
43 tok(";", Token::PredicateSplit),
44 tok("a", Token::PredType),
45 tok("true", Token::True),
46 tok("false", Token::False),
47 ))
48}
49
50pub fn comment() -> t!(Token) {
51 just('#')
52 .ignore_then(none_of("\n\r").repeated().collect())
53 .map(|x| Token::Comment(x))
54}
55
56pub fn invalid() -> t!(Token) {
57 none_of(" \n\r.,;[]")
58 .repeated()
59 .at_least(1)
60 .collect()
61 .map(Token::Invalid)
62}
63
64pub fn iri_ref() -> t!(Token) {
65 let letter = none_of("<>\"{}|^`\\").repeated().at_least(1).or(uchar());
66
67 letter
68 .repeated()
69 .flatten()
70 .collect()
71 .delimited_by(just('<'), just('>'))
72 .map(|x| Token::IRIRef(x))
73}
74
75pub fn pname_ns() -> t!(Token) {
76 pn_prefix()
77 .collect()
78 .or_not()
79 .then_ignore(just(':'))
80 .then(pn_local().collect().or_not())
81 .map(|(x, local)| {
82 if let Some(local) = local {
83 Token::PNameLN(x, local)
84 } else {
85 Token::PNameLN(x, String::new())
86 }
87 })
88}
89
90pub fn label_post() -> t!(Vec<char>) {
91 just('.')
92 .repeated()
93 .chain(pn_chars().repeated().at_least(1))
94}
95
96pub fn blank_node_label() -> t!(Token) {
97 let label = pn_chars()
98 .or(filter(|c: &char| c.is_numeric()))
99 .repeated()
100 .then(label_post().repeated().flatten())
101 .map(|(mut x, y)| {
102 x.extend(y);
103 x
104 });
105
106 just('_')
107 .then(just(':'))
108 .ignore_then(label.collect())
109 .map(|x| Token::BlankNodeLabel(x))
110}
111
112pub fn lang_tag() -> t!(Token) {
113 let rep = just('-').chain(filter(|c: &char| c.is_alphanumeric()).repeated());
114 just('@')
115 .ignore_then(filter(|c: &char| c.is_alphabetic()).repeated())
116 .then(rep.repeated().flatten())
117 .map(|(mut x, y)| {
118 y.append_to(&mut x);
119 x
120 })
121 .collect()
122 .map(|string| Token::LangTag(string))
123}
124
125pub fn integer() -> t!(Token) {
126 let before_dot = || {
127 one_of("+-")
128 .or_not()
129 .then(filter(|c: &char| c.is_numeric()).repeated().at_least(1))
130 .map(|(x, y)| {
131 let mut o: Vec<char> = Vec::with_capacity(x.is_some() as usize + y.len());
132 x.append_to(&mut o);
133 y.append_to(&mut o);
134 o
135 })
136 };
137
138 let no_dot = || {
139 filter(|c: &char| c.is_numeric())
140 .repeated()
141 .at_least(1)
142 .then(exponent().or_not())
143 .map(|(mut x, y)| {
144 if let Some(exp) = y {
145 exp.append_to(&mut x);
146 }
147 x
148 })
149 };
150
151 let with_dot = || {
152 just('.').then(no_dot()).map(|(x, y)| {
153 let mut o = Vec::with_capacity(1 + y.len());
154 o.push(x);
155 y.append_to(&mut o);
156 o
157 })
158 };
159
160 with_dot()
161 .or(before_dot().then(with_dot()).map(|(mut x, y)| {
162 y.append_to(&mut x);
163 x
164 }))
165 .or(no_dot())
166 .or(before_dot())
167 .collect()
168 .map(|x| Token::Number(x))
169}
170
171pub fn exponent() -> t!(Vec<char>) {
172 one_of("eE")
173 .then(one_of("+-").or_not())
174 .then(filter(|c: &char| c.is_numeric()).repeated().at_least(1))
175 .map(|((x, y), z)| {
176 let mut o = Vec::with_capacity(1 + y.is_some() as usize + z.len());
177 o.push(x);
178 y.append_to(&mut o);
179 z.append_to(&mut o);
180 o
181 })
182}
183
184pub fn parse_string<const C: char>() -> t!(String) {
185 let letter = e_char().or(uchar()).or(filter(|c: &char| {
186 *c != '\\' && *c != '\n' && *c != '\r' && *c != C
187 })
188 .repeated()
189 .at_least(1));
190
191 letter
192 .repeated()
193 .flatten()
194 .collect()
195 .delimited_by(just(C), just(C))
196}
197
198pub fn parse_long_string<const C: char>() -> t!(String) {
199 let si = || just::<char, char, Simple<char>>(C);
200 let delim = si().repeated().exactly(3);
201
202 let letter = delim.not();
203
204 delim
205 .ignore_then(letter.repeated().collect().map(|x| {
206 x
208 }))
209 .then_ignore(delim)
210 }
228
229pub fn strings() -> t!(Token) {
230 long_string_double()
231 .or(long_string_single())
232 .or(string_single())
233 .or(string_double())
234}
235
236pub fn string_single() -> t!(Token) {
237 parse_string::<'\''>().map(|x| Token::Str(x, StringStyle::Single))
238}
239pub fn string_double() -> t!(Token) {
240 parse_string::<'"'>().map(|x| Token::Str(x, StringStyle::Double))
241}
242
243pub fn long_string_single() -> t!(Token) {
244 parse_long_string::<'\''>().map(|x| Token::Str(x, StringStyle::SingleLong))
245}
246
247pub fn long_string_double() -> t!(Token) {
248 parse_long_string::<'"'>().map(|x| Token::Str(x, StringStyle::DoubleLong))
249}
250
251pub fn uchar() -> t!(Vec<char>) {
252 let small = just('\\')
253 .chain(just('u'))
254 .chain(hex())
255 .chain(hex())
256 .chain(hex())
257 .chain(hex());
258
259 let big = just('\\')
260 .chain(just('U'))
261 .chain(hex())
262 .chain(hex())
263 .chain(hex())
264 .chain(hex())
265 .chain(hex())
266 .chain(hex())
267 .chain(hex())
268 .chain(hex());
269
270 small.or(big)
271}
272
273pub fn e_char() -> t!(Vec<char>) {
274 just('\\')
275 .then(one_of("tbnrf\"'\\"))
276 .map(|(x, y)| vec![x, y])
277}
278
279pub fn pn_chars_base() -> t!(char) {
280 filter(|c: &char| c.is_alphabetic())
281}
282
283pub fn pn_chars_u() -> t!(char) {
284 pn_chars_base().or(just('_'))
285}
286pub fn varname() -> t!(char) {
287 pn_chars_u().or(filter(|c: &char| c.is_numeric()))
288}
289pub fn pn_chars() -> t!(char) {
290 pn_chars_u()
291 .or(just('-'))
292 .or(filter(|c: &char| c.is_numeric()))
293}
294pub fn pn_prefix() -> t!(Vec<char>) {
295 let ne = just('.')
296 .repeated()
297 .then(pn_chars().repeated().at_least(1))
298 .map(|(x, y)| {
299 let mut o: Vec<char> = Vec::with_capacity(x.len() + y.len());
300 x.append_to(&mut o);
301 y.append_to(&mut o);
302 o
303 })
304 .repeated()
305 .flatten();
306
307 pn_chars_base().then(ne.or_not()).map(|(x, y)| {
308 if let Some(y) = y {
309 let mut o = Vec::with_capacity(y.len() + 1);
310 o.push(x);
311 o.extend(y);
312 o
313 } else {
314 vec![x]
315 }
316 })
317}
318
319pub fn pn_local() -> t!(Vec<char>) {
320 let first_char = pn_chars_u()
321 .or(filter(|c: &char| *c == ':' || c.is_numeric()))
322 .repeated()
323 .at_least(1)
324 .or(plx());
325
326 let other = || pn_chars().or(just(':')).or(just('%'));
327
328 let rest = just('.')
329 .repeated()
330 .then(other().repeated().at_least(1))
331 .map(|(x, y)| {
332 let mut o: Vec<char> = Vec::with_capacity(x.len() + y.len());
333 x.append_to(&mut o);
334 y.append_to(&mut o);
335 o
336 })
337 .repeated()
338 .flatten();
339
340 first_char.then(rest.or_not()).map(|(mut x, y)| {
341 if let Some(y) = y {
342 y.append_to(&mut x);
343 }
344 x
345 })
346}
347
348pub fn plx() -> t!(Vec<char>) {
349 percent().or(pn_local_esc())
350}
351
352pub fn percent() -> t!(Vec<char>) {
353 just('%')
354 .ignore_then(hex().then(hex()))
355 .map(|(x, y)| vec![x, y])
356}
357
358pub fn hex() -> t!(char) {
359 filter(|c: &char| c.is_ascii_hexdigit())
360}
361
362pub fn pn_local_esc() -> t!(Vec<char>) {
363 just('\\')
364 .then(one_of("_~.-!$&'()*+,;=/?#@%"))
365 .map(|(x, y)| vec![x, y])
366}
367
368#[cfg(test)]
369mod tests {
370 use super::*;
371 #[test]
372 fn parse_keywords() {
373 assert!(keywords().parse("@prefix").is_ok());
374 assert!(tokens().parse(".").is_ok());
375 assert!(iri_ref().parse("<testing>").is_ok());
376 assert!(pname_ns().parse(":").is_ok());
377 assert!(pname_ns().parse("testing:").is_ok());
378 assert!(pname_ns().parse("testing:test").is_ok());
379 assert!(blank_node_label().parse("_:test").is_ok());
380 assert!(lang_tag().parse("@en").is_ok());
381 assert!(integer().parse("14").is_ok());
382 assert!(integer().parse("14.0").is_ok());
383 assert!(strings().parse("'testing'").is_ok());
384 assert!(strings().parse("\"testing\"").is_ok());
385 assert!(strings().parse("\"\"\"testing\"\"\"").is_ok());
386 assert!(comment().parse("# This is a nice comment").is_ok());
387 }
388
389 #[test]
390 fn parse_multiple_kws() {
391 assert!(tokens()
392 .padded()
393 .repeated()
394 .parse("@prefix @base . .")
395 .is_ok());
396 assert!(iri_ref()
397 .padded()
398 .repeated()
399 .parse("<testing> <testing>")
400 .is_ok());
401 assert!(pname_ns()
402 .padded()
403 .repeated()
404 .parse(": testing: testing:test")
405 .is_ok());
406 assert!(blank_node_label()
407 .padded()
408 .repeated()
409 .parse("_:b1 _:b0")
410 .is_ok());
411 assert!(lang_tag().padded().repeated().parse("@en @en-nl").is_ok());
412 assert!(integer().padded().repeated().parse("14 14").is_ok());
413 assert!(strings()
414 .padded()
415 .repeated()
416 .parse("\"testing\" 'testing'")
417 .is_ok());
418 }
419}