lang_turtle/lang/
formatter.rs

1use std::{
2    io::{self, Cursor, Write},
3    ops::Range,
4};
5
6use lsp_core::prelude::*;
7use lsp_types::FormattingOptions;
8use ropey::Rope;
9use tracing::info;
10
11use crate::lang::model::{Base, BlankNode, Term, Triple, Turtle, TurtlePrefix, PO};
12
13#[allow(unused)]
14pub fn format(tokens: &[&Token], options: FormattingOptions) -> String {
15    let mut indent_str = String::new();
16    for _ in 0..options.tab_size {
17        indent_str += " ";
18    }
19
20    let mut indent = 0;
21    let mut document = String::new();
22    let mut line = String::new();
23    let mut wants_newline = 0;
24    let mut needs_new_line = false;
25
26    let mut listings = vec![false];
27    let mut first = true;
28
29    let mut last_open_bnode = false;
30
31    for token in tokens {
32        if last_open_bnode && token.is_sq_close() {
33            wants_newline = 0;
34        }
35
36        let space = match token {
37            Token::Stop | Token::Comma | Token::PredicateSplit => false,
38            Token::DataTypeDelim | Token::LangTag(_) => false,
39            _ => true,
40        };
41
42        if wants_newline == 0 && space && !first {
43            line += " ";
44        }
45
46        if needs_new_line || (wants_newline > 0 && !token.is_comment()) {
47            document += "\n";
48            document += &line;
49            line = String::new();
50
51            for _ in 1..wants_newline {
52                line += "\n";
53            }
54
55            for _ in 0..indent {
56                line += &indent_str;
57            }
58
59            if listings.last().copied().unwrap_or_default() {
60                line += &indent_str;
61            }
62
63            wants_newline = 0;
64            needs_new_line = false;
65        }
66
67        last_open_bnode = token.is_sq_open();
68
69        match token {
70            Token::PrefixTag => line += "@prefix",
71            Token::BaseTag => line += "@base",
72            Token::SparqlPrefix => line += "PREFIX",
73            Token::SparqlBase => line += "BASE",
74            Token::PredType => line += "a",
75            Token::SqOpen => {
76                line += "[";
77                indent += 1;
78                wants_newline = 1;
79                listings.push(false);
80            }
81            Token::SqClose => {
82                line += "]";
83                indent -= 1;
84                listings.pop();
85            }
86            Token::BracketOpen => line += "(",
87            Token::BracketClose => line += ")",
88            Token::DataTypeDelim => {
89                line += "^^";
90            }
91            Token::Stop => {
92                line += ".";
93                wants_newline = 1;
94                if let Some(l) = listings.last_mut() {
95                    if *l {
96                        wants_newline = 2;
97                    }
98                    *l = false;
99                }
100            }
101            Token::PredicateSplit => {
102                line += ";";
103                wants_newline = 1;
104                if let Some(l) = listings.last_mut() {
105                    *l = true;
106                }
107            }
108            Token::Comma => line += ",",
109            Token::True => line += "true",
110            Token::False => line += "false",
111            Token::IRIRef(x) => {
112                line += "<";
113                line += x.as_str();
114                line += ">";
115            }
116            Token::PNameLN(x, y) => {
117                if let Some(x) = x {
118                    line += x.as_str();
119                }
120                line += ":";
121                line += y.as_str();
122            }
123            Token::BlankNodeLabel(x) => {
124                line += "_:";
125                line += x.as_str();
126            }
127            Token::LangTag(x) => {
128                line += "@";
129                line += x.as_str();
130            }
131            Token::Number(x) => line += x,
132            Token::Str(x, y) => {
133                line += y.quote();
134                line += x;
135                line += y.quote();
136            }
137            Token::ANON => line += "[]",
138            Token::Comment(x) => {
139                line += x;
140                needs_new_line = true;
141            }
142            Token::Invalid(x) => line += x,
143            _ => todo!(),
144        }
145
146        first = false;
147    }
148
149    document += "\n";
150    document += &line;
151    document + "\n"
152}
153
154type Buf = Cursor<Vec<u8>>;
155struct FormatState<'a> {
156    indent_level: usize,
157    indent: String,
158    buf: Buf,
159    line_start: u64,
160    comments: &'a [Spanned<String>],
161    comments_idx: usize,
162    tail: Spanned<String>,
163    line_count: usize,
164}
165
166impl<'a> FormatState<'a> {
167    fn new(
168        options: FormattingOptions,
169        buf: Buf,
170        comments: &'a [Spanned<String>],
171        source: &'a Rope,
172    ) -> Self {
173        let mut indent = String::new();
174        for _ in 0..options.tab_size {
175            indent.push(' ');
176        }
177
178        let tail = spanned(
179            String::new(),
180            source.len_chars() + 1..source.len_chars() + 1,
181        );
182        Self {
183            tail,
184            line_start: 0,
185            indent_level: 0,
186            indent,
187            buf,
188            comments,
189            comments_idx: 0,
190            line_count: 0,
191        }
192    }
193
194    fn check_comments(&mut self, span: &Range<usize>) -> io::Result<bool> {
195        println!("Checking comments with span {:?}", span);
196        let mut first = true;
197        loop {
198            let current = self.comments.get(self.comments_idx).unwrap_or(&self.tail);
199
200            if current.1.start > span.start {
201                break;
202            }
203
204            first = false;
205            write!(self.buf, "{}", current.0)?;
206            self.new_line()?;
207            self.comments_idx += 1;
208        }
209        Ok(!first)
210    }
211    fn current_line_length(&self) -> u64 {
212        self.buf.position() - self.line_start
213    }
214    fn new_line(&mut self) -> io::Result<()> {
215        self.line_count += 1;
216        write!(self.buf, "\n")?;
217        self.line_start = self.buf.position();
218        for _ in 0..self.indent_level {
219            write!(self.buf, "{}", &self.indent)?;
220        }
221        Ok(())
222    }
223    fn inc(&mut self) {
224        self.indent_level += 1;
225    }
226    fn decr(&mut self) {
227        self.indent_level -= 1;
228    }
229}
230
231impl FormatState<'_> {
232    fn write_turtle(&mut self, turtle: &Turtle) -> io::Result<()> {
233        if let Some(ref b) = turtle.base {
234            self.check_comments(&b.1)?;
235            self.write_base(b)?;
236            self.new_line()?;
237        }
238        for p in &turtle.prefixes {
239            self.check_comments(&p.1)?;
240            self.write_prefix(p)?;
241            self.new_line()?;
242        }
243
244        let mut prev_line = 0;
245
246        for t in &turtle.triples {
247            if prev_line + 1 < self.line_count {
248                self.new_line()?;
249            }
250            prev_line = self.line_count;
251            self.check_comments(&t.1)?;
252            self.write_triple(&t)?;
253            self.new_line()?;
254            // request_newline = t.0.po.len() > 1 || t.0.po[0].0.object.len() > 1;
255        }
256        self.new_line()?;
257
258        for i in self.comments_idx..self.comments.len() {
259            write!(self.buf, "{}", self.comments[i].0)?;
260            self.new_line()?;
261        }
262
263        Ok(())
264    }
265
266    fn write_prefix(&mut self, prefix: &TurtlePrefix) -> io::Result<()> {
267        write!(self.buf, "@prefix {}: {}.", prefix.prefix.0, prefix.value.0)
268    }
269
270    fn write_base(&mut self, base: &Base) -> io::Result<()> {
271        write!(self.buf, "@base {}.", base.1 .0)
272    }
273
274    fn write_bnode(&mut self, bnode: &BlankNode) -> io::Result<()> {
275        match bnode {
276            BlankNode::Named(x, _) => write!(self.buf, "_:{}", x)?,
277            BlankNode::Unnamed(pos, _, _) => {
278                if pos.len() == 0 {
279                    return write!(self.buf, "[ ]");
280                }
281                if pos.len() == 1 {
282                    write!(self.buf, "[ ")?;
283                    self.write_po(&pos[0])?;
284                    return write!(self.buf, " ]");
285                }
286                let is_first_of_line = self.current_line_length() == 0;
287                self.inc();
288                write!(self.buf, "[")?;
289                let should_skip = if is_first_of_line {
290                    write!(self.buf, " ")?;
291                    self.write_po(&pos[0])?;
292                    write!(self.buf, ";")?;
293                    1
294                } else {
295                    0
296                };
297                for po in pos.iter().skip(should_skip) {
298                    self.new_line()?;
299                    self.check_comments(&po.1)?;
300                    self.write_po(&po)?;
301                    write!(self.buf, ";")?;
302                }
303                self.decr();
304                self.new_line()?;
305                write!(self.buf, "]")?;
306            }
307            BlankNode::Invalid => return Err(io::Error::new(io::ErrorKind::Other, "")),
308        }
309        Ok(())
310    }
311
312    fn write_collection(&mut self, coll: &Vec<Spanned<Term>>) -> io::Result<()> {
313        if coll.is_empty() {
314            return write!(self.buf, "( )");
315        }
316
317        let mut should_indent = false;
318        let start = self.buf.position();
319        let current_line = self.line_count;
320
321        write!(self.buf, "( ")?;
322
323        self.check_comments(&coll[0].1)?;
324        self.write_term(&coll[0])?;
325
326        for po in coll.iter().skip(1) {
327            self.check_comments(&po.1)?;
328            write!(self.buf, " ")?;
329            self.write_term(&po)?;
330            if self.current_line_length() > 80 {
331                should_indent = true;
332                break;
333            }
334        }
335        write!(self.buf, " )")?;
336
337        if should_indent {
338            self.buf.set_position(start);
339            self.line_count = current_line;
340            write!(self.buf, "(")?;
341            self.inc();
342            for po in coll.iter() {
343                self.new_line()?;
344                self.check_comments(&po.1)?;
345                self.write_term(&po)?;
346            }
347            self.decr();
348            self.new_line()?;
349            write!(self.buf, ")")?;
350        }
351
352        Ok(())
353    }
354
355    fn write_term(&mut self, term: &Term) -> io::Result<()> {
356        match term {
357            Term::Literal(s) => write!(self.buf, "{}", s)?,
358            Term::BlankNode(b) => self.write_bnode(b)?,
359            Term::NamedNode(n) => write!(self.buf, "{}", n)?,
360            Term::Collection(ts) => self.write_collection(ts)?,
361            Term::Invalid => {
362                return Err(io::Error::new(
363                    io::ErrorKind::Other,
364                    "cannot format turtle with invalid terms",
365                ))
366            }
367            Term::Variable(_) => {
368                return Err(io::Error::new(
369                    io::ErrorKind::Other,
370                    "cannot format turtle with variables",
371                ))
372            }
373        }
374        Ok(())
375    }
376
377    fn write_po(&mut self, po: &PO) -> io::Result<()> {
378        write!(self.buf, "{} ", po.predicate.0)?;
379        self.write_term(&po.object[0])?;
380        let mut should_indent = false;
381
382        let start = self.buf.position();
383        let current_line = self.line_count;
384        for i in 1..po.object.len() {
385            write!(self.buf, ", ")?;
386            self.write_term(&po.object[i])?;
387
388            if self.current_line_length() > 80 {
389                should_indent = true;
390                break;
391            }
392        }
393
394        if should_indent {
395            self.buf.set_position(start);
396            self.line_count = current_line;
397            self.inc();
398            for i in 1..po.object.len() {
399                write!(self.buf, ",")?;
400                self.new_line()?;
401                self.check_comments(&po.object[i].1)?;
402                self.write_term(&po.object[i])?;
403            }
404            self.decr();
405        }
406
407        Ok(())
408    }
409
410    fn write_triple(&mut self, triple: &Triple) -> io::Result<()> {
411        match &triple.subject.0 {
412            Term::BlankNode(bn) => self.write_bnode(bn)?,
413            Term::NamedNode(n) => write!(self.buf, "{}", n)?,
414            _ => write!(self.buf, "invalid")?,
415        }
416        write!(self.buf, " ")?;
417        self.write_po(&triple.po[0])?;
418        if triple.po.len() == 1 {
419            write!(self.buf, ".")?;
420            return Ok(());
421        }
422        write!(self.buf, ";")?;
423        self.inc();
424
425        self.new_line()?;
426        self.check_comments(&triple.po[1].1)?;
427        self.write_po(&triple.po[1])?;
428
429        if triple.po.len() == 2 {
430            self.decr();
431            write!(self.buf, ".")?;
432            return Ok(());
433        }
434
435        for i in 2..triple.po.len() {
436            write!(self.buf, ";")?;
437            self.new_line()?;
438            self.check_comments(&triple.po[i].1)?;
439            self.write_po(&triple.po[i])?;
440        }
441
442        write!(self.buf, ".")?;
443        self.decr();
444        Ok(())
445    }
446}
447
448pub fn format_turtle(
449    turtle: &Turtle,
450    config: FormattingOptions,
451    comments: &[Spanned<String>],
452    source: &Rope,
453) -> Option<String> {
454    let buf: Buf = Cursor::new(Vec::new());
455    let mut state = FormatState::new(config, buf, comments, source);
456    match state.write_turtle(turtle) {
457        Ok(_) => info!("Format succesful"),
458        Err(e) => {
459            info!("Format unsuccesful {:?}", e);
460            return None;
461        }
462    }
463    String::from_utf8(state.buf.into_inner()).ok()
464}
465
466#[cfg(test)]
467mod tests {
468
469    use std::str::FromStr;
470
471    use lsp_core::prelude::{spanned, Spanned};
472    use ropey::Rope;
473
474    use crate::lang::{
475        context::Context, formatter::format_turtle, model::Turtle, parser as parser2,
476        tokenizer::parse_tokens_str_safe,
477    };
478
479    #[derive(Debug)]
480    pub enum Err {
481        Tokenizing,
482        Parsing,
483    }
484
485    fn parse_turtle(
486        inp: &str,
487        url: &lsp_types::Url,
488    ) -> Result<(Turtle, Vec<Spanned<String>>), Err> {
489        let context = Context::new();
490        let ctx = context.ctx();
491        let tokens = parse_tokens_str_safe(inp).map_err(|e| {
492            println!("Error {:?}", e);
493            Err::Tokenizing
494        })?;
495
496        let mut comments: Vec<_> = tokens
497            .iter()
498            .filter(|x| x.0.is_comment())
499            .cloned()
500            .map(|x| spanned(x.0.to_comment(), x.1))
501            .collect();
502        comments.sort_by_key(|x| x.1.start);
503
504        let (turtle, errs) = parser2::parse_turtle(&url, tokens, inp.len(), ctx);
505        for e in errs {
506            println!("Error {:?}", e);
507        }
508
509        Ok((turtle.into_value(), comments))
510    }
511
512    #[test]
513    fn easy_format() {
514        let txt = r#"
515@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
516@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
517@base <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
518
519[] a foaf:Name;
520   foaf:knows <abc>;.
521"#;
522
523        let expected = r#"@base <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
524@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
525@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>.
526
527[ ] a foaf:Name;
528  foaf:knows <abc>.
529
530"#;
531
532        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
533        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
534        let formatted = format_turtle(
535            &output,
536            lsp_types::FormattingOptions {
537                tab_size: 2,
538                ..Default::default()
539            },
540            &comments,
541            &Rope::from_str(txt),
542        )
543        .expect("formatting");
544        assert_eq!(formatted, expected);
545    }
546
547    #[test]
548    fn harder_format_pos() {
549        let txt = r#"
550[] a foaf:Name;
551   foaf:knows <abc>; foaf:knows2 <abc>.
552
553"#;
554
555        let expected = r#"[ ] a foaf:Name;
556  foaf:knows <abc>;
557  foaf:knows2 <abc>.
558
559"#;
560
561        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
562        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
563        let formatted = format_turtle(
564            &output,
565            lsp_types::FormattingOptions {
566                tab_size: 2,
567                ..Default::default()
568            },
569            &comments,
570            &Rope::from_str(txt),
571        )
572        .expect("formatting");
573        assert_eq!(formatted, expected);
574    }
575
576    #[test]
577    fn format_blanknodes() {
578        let txt = r#"
579        [ <a> foaf:Person; foaf:knows <abc>; foaf:knows <def> ] foaf:knows [
580        a foaf:Person;
581        foaf:knows <abc>;
582        foaf:knows <def>;
583        ] .
584
585"#;
586
587        let expected = r#"[ <a> foaf:Person;
588  foaf:knows <abc>;
589  foaf:knows <def>;
590] foaf:knows [
591  a foaf:Person;
592  foaf:knows <abc>;
593  foaf:knows <def>;
594].
595
596"#;
597
598        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
599        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
600        let formatted = format_turtle(
601            &output,
602            lsp_types::FormattingOptions {
603                tab_size: 2,
604                ..Default::default()
605            },
606            &comments,
607            &Rope::from_str(txt),
608        )
609        .expect("formatting");
610        assert_eq!(formatted, expected);
611    }
612
613    #[test]
614    fn long_objectlist() {
615        let txt = r#"
616        <abc> a <something-long>, <something-longer-still>, <something-longer>, <something-tes>, <soemthing-eeeellssee>.
617"#;
618
619        let expected = r#"<abc> a <something-long>,
620  <something-longer-still>,
621  <something-longer>,
622  <something-tes>,
623  <soemthing-eeeellssee>.
624
625"#;
626
627        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
628        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
629        let formatted = format_turtle(
630            &output,
631            lsp_types::FormattingOptions {
632                tab_size: 2,
633                ..Default::default()
634            },
635            &comments,
636            &Rope::from_str(txt),
637        )
638        .expect("formatting");
639        assert_eq!(formatted, expected);
640    }
641
642    #[test]
643    fn short_collection() {
644        let txt = r#"
645        <abc> a (), (<abc> <def>).
646"#;
647
648        let expected = r#"<abc> a ( ), ( <abc> <def> ).
649
650"#;
651
652        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
653        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
654        let formatted = format_turtle(
655            &output,
656            lsp_types::FormattingOptions {
657                tab_size: 2,
658                ..Default::default()
659            },
660            &comments,
661            &Rope::from_str(txt),
662        )
663        .expect("formatting");
664        assert_eq!(formatted, expected);
665    }
666
667    #[test]
668    fn long_collection() {
669        let txt = r#"
670        <abc> a (), (<somevery-very-very-long-item> <and-othersss> <and-ottteeehs> <wheeeeeeeeeeeee>).
671"#;
672
673        let expected = r#"<abc> a ( ), (
674  <somevery-very-very-long-item>
675  <and-othersss>
676  <and-ottteeehs>
677  <wheeeeeeeeeeeee>
678).
679
680"#;
681
682        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
683        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
684        let formatted = format_turtle(
685            &output,
686            lsp_types::FormattingOptions {
687                tab_size: 2,
688                ..Default::default()
689            },
690            &comments,
691            &Rope::from_str(txt),
692        )
693        .expect("formatting");
694        assert_eq!(formatted, expected);
695    }
696
697    #[test]
698    fn easy_comments() {
699        let txt = r#"
700# Test this is a cool test or something!
701            # Another comment!
702
703[] a foaf:Name;
704   foaf:knows <abc>; foaf:knows2 <abc>.
705
706"#;
707
708        let expected = r#"# Test this is a cool test or something!
709# Another comment!
710[ ] a foaf:Name;
711  foaf:knows <abc>;
712  foaf:knows2 <abc>.
713
714"#;
715
716        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
717        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
718        println!("OUtput {:?}", output);
719        let formatted = format_turtle(
720            &output,
721            lsp_types::FormattingOptions {
722                tab_size: 2,
723                ..Default::default()
724            },
725            &comments,
726            &Rope::from_str(txt),
727        )
728        .expect("formatting");
729        assert_eq!(formatted, expected);
730    }
731
732    #[test]
733    fn hard_comments() {
734        let txt = r#"
735
736[] a foaf:Name; # Nested comment
737   foaf:knows <abc>;     # Another comment!
738   foaf:knows2 <abc>.
739
740   #trailing comments
741"#;
742
743        let expected = r#"[ ] a foaf:Name;
744  # Nested comment
745  foaf:knows <abc>;
746  # Another comment!
747  foaf:knows2 <abc>.
748
749#trailing comments
750"#;
751        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
752        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
753        let formatted = format_turtle(
754            &output,
755            lsp_types::FormattingOptions {
756                tab_size: 2,
757                ..Default::default()
758            },
759            &comments,
760            &Rope::from_str(txt),
761        )
762        .expect("formatting");
763        assert_eq!(formatted, expected);
764    }
765
766    #[test]
767    fn bug_1() {
768        let txt = r#"
769[] a sh:NodeShape;
770  sh:targetClass js:Echo;
771  sh:property [
772    sh:class :ReaderChannel;
773    sh:path js:input;
774    sh:name "Input Channel"
775  ], [
776    sh:class :WriterChannel;
777    sh:path js:output;
778    sh:name "Output Channel"
779  ].
780
781"#;
782
783        let expected = r#"[ ] a sh:NodeShape;
784  sh:targetClass js:Echo;
785  sh:property [
786    sh:class :ReaderChannel;
787    sh:path js:input;
788    sh:name "Input Channel";
789  ], [
790    sh:class :WriterChannel;
791    sh:path js:output;
792    sh:name "Output Channel";
793  ].
794
795"#;
796
797        let url = lsp_types::Url::from_str("http://example.com/ns#").unwrap();
798        let (output, comments) = parse_turtle(txt, &url).expect("Simple");
799        let formatted = format_turtle(
800            &output,
801            lsp_types::FormattingOptions {
802                tab_size: 2,
803                ..Default::default()
804            },
805            &comments,
806            &Rope::from_str(txt),
807        )
808        .expect("formatting");
809        assert_eq!(formatted, expected);
810    }
811}