lang_jsonld/lang/
parser.rs

1use std::{
2    io::{self, Write},
3    ops::Range,
4};
5
6use chumsky::{prelude::*, Error, Parser, Stream};
7use enum_methods::{EnumIntoGetters, EnumIsA, EnumToGetters};
8use lsp_core::prelude::{spanned, MyTerm, Spanned, Token};
9use Token::*;
10
11struct ObjectMemberManager<'a> {
12    out: Vec<Spanned<ObjectMember>>,
13    full_start: usize,
14    start: usize,
15
16    seen_comma: bool,
17    seen_colon: bool,
18
19    current_key: Option<Spanned<Token>>,
20    current_value: Option<Spanned<Json>>,
21    emit: &'a mut dyn FnMut(Simple<Token>),
22}
23
24impl<'a> ObjectMemberManager<'a> {
25    fn new(span: &Range<usize>, emit: &'a mut dyn FnMut(Simple<Token>)) -> Self {
26        Self {
27            out: vec![],
28            full_start: span.start,
29            start: span.start,
30            seen_comma: false,
31            seen_colon: false,
32            current_key: None,
33            current_value: None,
34            emit,
35        }
36    }
37
38    #[allow(unused)]
39    fn print(&self) {
40        println!(
41            "key {:?} value {:?} (out {} len) (start {} full start {})",
42            self.current_key.as_ref().map(|x| x.value()),
43            self.current_value.as_ref().map(|x| x.value()),
44            self.out.len(),
45            self.start,
46            self.full_start
47        )
48    }
49
50    fn invalid(&mut self, span: Range<usize>) -> Spanned<Token> {
51        (self.emit)(Simple::custom(span.clone(), "Expected valid token"));
52        Spanned(Token::Invalid("".to_string()), span)
53    }
54
55    fn invalid_json(&mut self, span: Range<usize>) -> Spanned<Json> {
56        (self.emit)(Simple::custom(span.clone(), "Expected valid json"));
57        Spanned(Json::Invalid, span)
58    }
59
60    fn eat_json(&mut self, part: Spanned<Json>) {
61        if self.current_key.is_none() {
62            let span = part.span().clone();
63            match part {
64                Spanned(Json::Token(t), span) => {
65                    self.current_key = Some(Spanned(t, span));
66                }
67                x => {
68                    self.current_key = Some(self.invalid(self.start..span.start));
69                    self.current_value = Some(x);
70                }
71            }
72
73            self.full_start = span.start;
74            self.start = span.end + 1;
75            return;
76        }
77
78        if self.current_value.is_none() {
79            if !self.seen_colon {
80                (self.emit)(Simple::custom(
81                    self.start - 1..self.start,
82                    "expected colon, didn't find one",
83                ));
84            }
85
86            self.start = part.span().end + 1;
87            self.current_value = Some(part);
88            return;
89        }
90
91        // We didn't expect to flush a thing, but we did
92        self.flush(self.full_start..part.span().end, false);
93        self.eat_json(part);
94    }
95
96    fn eat_token(&mut self, token: Spanned<Token>) {
97        match token {
98            Spanned(Token::Colon, span) => {
99                (self.current_key, self.current_value) =
100                    match (self.current_key.take(), self.current_value.take()) {
101                        (Some(k), Some(Spanned(Json::Token(k2), r))) => {
102                            self.current_key = Some(k);
103                            // self.current_value = Some(self.invalid_json(r.clone()));
104                            self.flush(span.clone(), false);
105                            (Some(Spanned(k2, r)), None)
106                        }
107                        (k, v) => (k, v),
108                    };
109                if self.seen_colon {
110                    (self.emit)(Simple::custom(
111                        span.clone(),
112                        "Unexepected colon, already seen one",
113                    ));
114                }
115                self.seen_colon = true;
116                // we expect to set the second part
117                if self.current_key.is_none() {
118                    self.current_key = Some(self.invalid(self.start..span.start));
119                }
120                self.start = span.end;
121            }
122            Spanned(Token::Comma, span) => {
123                if self.seen_comma {
124                    (self.emit)(Simple::custom(
125                        span.clone(),
126                        "Unexepected comma, already seen one",
127                    ));
128                }
129                self.seen_comma = true;
130                self.flush(span, false);
131            }
132            Spanned(x, s) => {
133                (self.emit)(Simple::expected_input_found(
134                    s,
135                    [Some(Token::Colon), Some(Token::Comma)],
136                    Some(x),
137                ));
138            }
139        }
140    }
141    fn flush(&mut self, span: Range<usize>, end: bool) {
142        if !end && !self.seen_comma {
143            (self.emit)(Simple::custom(
144                span.end - 1..span.end,
145                "Expected comma, but didn't find one",
146            ))
147        }
148        let k = match self.current_key.take() {
149            Some(k) => k,
150            None => self.invalid(span.clone()),
151        };
152        let v = match self.current_value.take() {
153            Some(v) => v,
154            None => self.invalid_json(span.clone()),
155        };
156        self.out
157            .push(Spanned(ObjectMember::Full(k, v), self.full_start..span.end));
158        self.start = span.end + 1;
159        self.full_start = span.end + 1;
160        self.seen_colon = false;
161        self.seen_comma = false;
162    }
163}
164
165#[derive(Clone, PartialEq, Debug, EnumIntoGetters, EnumIsA, EnumToGetters)]
166pub enum ObjectMember {
167    Full(Spanned<Token>, Spanned<Json>),
168    Partial(Spanned<Token>, Option<Spanned<()>>, Option<Spanned<Json>>),
169}
170impl ObjectMember {
171    pub fn field(&self) -> &Spanned<Token> {
172        match self {
173            ObjectMember::Full(spanned, _) => spanned,
174            ObjectMember::Partial(spanned, _, _) => spanned,
175        }
176    }
177
178    pub fn json_value(&self) -> Option<&Spanned<Json>> {
179        match self {
180            ObjectMember::Full(_, spanned) => Some(spanned),
181            ObjectMember::Partial(_, _, spanned) => spanned.as_ref(),
182        }
183    }
184}
185
186#[derive(Clone, PartialEq, Debug, EnumIntoGetters, EnumIsA, EnumToGetters)]
187pub enum Json {
188    Invalid,
189    Token(Token),
190    Array(Vec<Spanned<Json>>),
191    Object(Vec<Spanned<ObjectMember>>),
192}
193
194impl Json {
195    pub fn extract_triples(&self) -> Vec<MyTerm<'static>> {
196        Vec::new()
197    }
198    pub fn token(&self) -> Option<&Token> {
199        match self {
200            Json::Token(t) => Some(t),
201            _ => None,
202        }
203    }
204}
205
206pub struct JsonFormatter {
207    pub indent: String,
208    pub inc: usize,
209}
210impl JsonFormatter {
211    pub fn inc(&mut self) {
212        self.inc += 1;
213    }
214
215    pub fn decr(&mut self) {
216        self.inc -= 1;
217    }
218
219    pub fn line(&mut self, writer: &mut impl Write) -> io::Result<()> {
220        write!(writer, "\n")?;
221        for _ in 0..self.inc {
222            write!(writer, "{}", &self.indent)?;
223        }
224        Ok(())
225    }
226
227    pub fn format(&mut self, json: &Json, writer: &mut impl Write) -> io::Result<()> {
228        use std::io::{Error, ErrorKind};
229        match json {
230            Json::Invalid => {
231                return Result::Err(Error::new(ErrorKind::Other, "cannot format invalid json"))
232            }
233            Json::Token(t) => write!(writer, "{}", t)?,
234            Json::Array(xs) => {
235                write!(writer, "[")?;
236                self.inc();
237                self.line(writer)?;
238                let mut first = true;
239                for t in xs {
240                    if !first {
241                        write!(writer, ",")?;
242                        self.line(writer)?;
243                    }
244                    self.format(&t.0, writer)?;
245                    first = false;
246                }
247                self.decr();
248                self.line(writer)?;
249                write!(writer, "]")?;
250            }
251            Json::Object(xs) => {
252                write!(writer, "{{")?;
253                self.inc();
254                self.line(writer)?;
255                let mut first = true;
256                for t in xs {
257                    if !first {
258                        write!(writer, ",")?;
259                        self.line(writer)?;
260                    }
261                    match &t.0 {
262                        ObjectMember::Full(x, y) => {
263                            write!(writer, "{}: ", x.0)?;
264                            self.format(y, writer)?;
265                        }
266                        ObjectMember::Partial(_, _, _) => {
267                            return Result::Err(Error::new(
268                                ErrorKind::Other,
269                                "cannot format invalid json",
270                            ))
271                        }
272                    }
273                    first = false;
274                }
275                self.decr();
276                self.line(writer)?;
277                write!(writer, "}}")?;
278            }
279        }
280        Ok(())
281    }
282}
283
284impl Default for Json {
285    fn default() -> Self {
286        Self::Invalid
287    }
288}
289
290pub fn parse(source: &str, tokens: Vec<Spanned<Token>>) -> (Spanned<Json>, Vec<Simple<Token>>) {
291    let stream = Stream::from_iter(
292        0..source.len() + 1,
293        tokens.into_iter().map(|Spanned(x, s)| (x, s)),
294    );
295
296    let parser = parser().then_ignore(end().recover_with(skip_then_retry_until([])));
297    let (json, json_errors) = parser.parse_recovery(stream);
298
299    (
300        json.unwrap_or(Spanned(Json::Invalid, 0..source.len())),
301        json_errors,
302    )
303}
304
305type S = std::ops::Range<usize>;
306fn expect_token(
307    token: Token,
308    not_allowed: Token,
309) -> impl Parser<Token, Token, Error = Simple<Token, S>> + Clone {
310    just(token.clone()).or(none_of([token.clone(), not_allowed]).rewind().validate(
311        move |x, span: S, emit| {
312            emit(Simple::expected_input_found(
313                span,
314                [Some(token.clone())],
315                Some(x),
316            ));
317            token.clone()
318        },
319    ))
320}
321
322fn parser() -> impl Parser<Token, Spanned<Json>, Error = Simple<Token>> {
323    recursive(|value| {
324        let array = value
325            .clone()
326            .separated_by(expect_token(Token::Comma, Token::SqClose))
327            .delimited_by(just(SqOpen), just(SqClose))
328            .map(Json::Array)
329            .labelled("array");
330
331        // let array = just(SqOpen).ignore_then(value.clone().separated_by(just(Comma))).then_ignore(just(SqClose)).map(Json::Array);
332
333        let member_part = value
334            .map(Result::Ok)
335            .or(one_of([Token::Comma, Token::Colon])
336                .map_with_span(spanned)
337                .map(Result::Err));
338        // let member_value = just(Token::Colon).ignore_then(value.clone());
339        // let member = filter(Token::is_str)
340        //     .map_with_span(spanned)
341        //     .then(member_value.or())
342        //     .validate(|(s, o), span, emit| match o {
343        //         Some(o) => ObjectMember::Full(s, o),
344        //         None => {
345        //             emit(Simple::custom(span, "Erroneous object member"));
346        //             ObjectMember::Partial(s, None, None)
347        //         }
348        //     })
349        //     .labelled("object member");
350
351        let obj = just(CurlOpen)
352            .ignore_then(member_part.repeated().validate(|parts, span, emit| {
353                let mut manager = ObjectMemberManager::new(&span, emit);
354
355                for part in parts {
356                    // manager.print();
357                    match part {
358                        Ok(e) => manager.eat_json(e),
359                        Err(e) => manager.eat_token(e),
360                    }
361                }
362                // manager.print();
363                manager.flush(span, true);
364                manager.out
365            }))
366            .then_ignore(just(CurlClose))
367            .map(Json::Object)
368            .labelled("object");
369
370        // let obj = member
371        //     .map_with_span(spanned)
372        //     .separated_by(just(Comma).recover_with(skip_then_retry_until([])))
373        //     .delimited_by(just(CuOpen), just(CuClose))
374        //     .map(Json::Object);
375
376        let leaves = chumsky::prelude::select! {
377            Null => Json::Token(Null),
378            True => Json::Token(True),
379            False => Json::Token(False),
380            Token::Str(x, st) => Json::Token(Token::Str(x, st)),
381            Token::Number(n) => Json::Token(Token::Number(n)),
382        }
383        .labelled("leaf");
384
385        choice((array, obj, leaves))
386            // .map(std::result::Result::Ok)
387            // .or(any().map(std::result::Result::Err))
388            // .validate(|t, span, emit| match t {
389            //     Ok(x) => x,
390            //     Err(v) => {
391            //         emit(Simple::custom(span, format!("Expected JSON found {:?}", v)));
392            //         Json::Invalid
393            //     }
394            // })
395            .map_with_span(spanned)
396    })
397}
398
399#[cfg(test)]
400mod tests {
401    use lsp_core::prelude::StringStyle;
402
403    use super::*;
404    use crate::lang::tokenizer::tokenize;
405
406    #[test]
407    fn parse_json_simple() {
408        let source = "\"test\"";
409        let (tokens, token_errors) = tokenize(source);
410        let (json, json_errors) = parse(source, tokens);
411
412        assert!(token_errors.is_empty());
413        assert!(json_errors.is_empty());
414
415        assert_eq!(
416            json.into_value(),
417            Json::Token(Token::Str("test".into(), StringStyle::Double))
418        );
419    }
420
421    #[test]
422    fn parse_json_array() {
423        let source = "[\"test\", 42]";
424        let (tokens, token_errors) = tokenize(source);
425        let (json, json_errors) = parse(source, tokens);
426
427        assert!(token_errors.is_empty());
428        assert!(json_errors.is_empty());
429
430        let arr: Vec<_> = match json.into_value() {
431            Json::Array(x) => x.into_iter().map(|x| x.into_value()).collect(),
432            _ => panic!("Expected json array"),
433        };
434
435        assert_eq!(
436            arr,
437            vec![
438                Json::Token(Token::Str("test".into(), StringStyle::Double)),
439                Json::Token(Token::Number("42".into()))
440            ]
441        );
442    }
443
444    #[test]
445    fn parse_json_object_no_comma() {
446        let source = r#"{
447  "@type": "foaf:Document"
448  "foaf:topic": "foaf:Document"
449}"#;
450
451        let (tokens, token_errors) = tokenize(source);
452        assert_eq!(token_errors, vec![]);
453
454        let (json, json_errors) = parse(source, tokens);
455
456        println!("json errors {:?}", json_errors);
457        assert_eq!(json_errors.len(), 1, "One json error");
458
459        let obj = match json.into_value() {
460            Json::Object(xs) => xs,
461            x => panic!("Expected json object, found {:?}", x),
462        };
463        assert_eq!(obj.len(), 2);
464    }
465
466    #[test]
467    fn parse_json_object_no_value() {
468        let source = r#"{
469  "something":
470  "foaf:topic": "foaf:Document"
471}"#;
472
473        let (tokens, token_errors) = tokenize(source);
474        assert_eq!(token_errors, vec![]);
475
476        let (json, json_errors) = parse(source, tokens);
477
478        for e in &json_errors {
479            println!("json errors {:?}", e);
480        }
481
482        let obj = match json.into_value() {
483            Json::Object(xs) => xs,
484            x => panic!("Expected json object, found {:?}", x),
485        };
486        assert_eq!(obj.len(), 2);
487
488        assert_eq!(
489            json_errors.len(),
490            2,
491            "Erroneous object member and expected comma"
492        );
493    }
494
495    #[test]
496    fn parse_json_object_no_colon_value() {
497        let source = r#"{
498  "something"
499  "foaf:topic": "foaf:Document"
500}"#;
501
502        let (tokens, token_errors) = tokenize(source);
503        assert_eq!(token_errors, vec![]);
504
505        let (json, json_errors) = parse(source, tokens);
506
507        for e in &json_errors {
508            println!("json errors {:?}", e);
509        }
510
511        let obj = match json.into_value() {
512            Json::Object(xs) => xs,
513            x => panic!("Expected json object, found {:?}", x),
514        };
515        assert_eq!(obj.len(), 2);
516
517        for e in &json_errors {
518            println!("e {:?}", e);
519        }
520
521        assert_eq!(
522            json_errors.len(),
523            3,
524            "Erroneous object member and expected comma"
525        );
526    }
527
528    #[ignore]
529    #[test]
530    fn parse_json_array_invalid() {
531        let source = "[\"test\" :  , 42 ]";
532        let (tokens, token_errors) = tokenize(source);
533        let (json, json_errors) = parse(source, tokens);
534
535        assert!(token_errors.is_empty());
536        // assert_eq!(json_errors.len(), 1);
537
538        println!("Error: {:?}", json_errors);
539        let arr: Vec<_> = match json.into_value() {
540            Json::Array(x) => x.into_iter().map(|x| x.into_value()).collect(),
541            x => panic!("Expected json array, got {:?}", x),
542        };
543
544        assert_eq!(
545            arr,
546            vec![
547                Json::Token(Token::Str("test".into(), StringStyle::Double)),
548                Json::Token(Token::Number("42".to_string())),
549            ]
550        );
551    }
552
553    #[test]
554    fn parse_failed() {
555        let source = r#"
556{
557  "@context": [
558    "https://data.vlaanderen.be/doc/applicatieprofiel/sensoren-en-bemonstering/kandidaatstandaard/2022-04-28/context/ap-sensoren-en-bemonstering.jsonld",
559    {
560      "foaf": "foaf_exp"
561    } 
562  ], "test": "test_exp"
563}
564"#;
565
566        let (tokens, token_errors) = tokenize(source);
567        let (_, json_errors) = parse(source, tokens);
568
569        assert!(token_errors.is_empty());
570        assert_eq!(json_errors.len(), 0);
571    }
572}