sophia_turtle/serializer/
_pretty.rs

1//! Utility code for pretty-printing Turtle and TriG.
2//!
3//! Possible improvements:
4//! 1. PrettifiableDataset should encapsulate some of the "indexes" built by Prettifier
5//! (labelled, subject_types, named_graphs)
6//! and build directly in CollectibleDataset::from_quad_source().
7//!
8//! 2. Instead of writing directly to the output,
9//! generate a hierarchical structure,
10//! and decide on line breaks and indentation based on the overall structure,
11//! rather than a priori.
12
13use super::turtle::TurtleConfig;
14use regex::Regex;
15use sophia_api::dataset::Dataset;
16use sophia_api::ns::{rdf, xsd};
17use sophia_api::prefix::PrefixMap;
18use sophia_api::quad::{iter_spog, Gspo, Quad, Spog};
19use sophia_api::term::matcher::Any;
20use sophia_api::term::{GraphName, SimpleTerm, Term, TermKind};
21use sophia_api::triple::Triple;
22use sophia_api::MownStr;
23use sophia_iri::{Iri, IriRef};
24use std::cmp::Ordering;
25use std::collections::btree_map::Entry::{Occupied, Vacant};
26use std::collections::{BTreeMap, BTreeSet};
27use std::io::{self, Write};
28use std::ops::Range;
29
30pub type PrettifiableDataset<'a> = BTreeSet<Gspo<SimpleTerm<'a>>>;
31
32/// Serialize `dataset` in pretty TriG on `write`, using the given `config`.
33///
34/// NB: if dataset only contains a default graph,
35/// the resulting TriG will be valid Turtle.
36pub fn prettify<W>(
37    dataset: PrettifiableDataset<'_>,
38    mut write: W,
39    config: &TurtleConfig,
40    base_indent: &str,
41) -> io::Result<()>
42where
43    W: io::Write,
44{
45    assert!(base_indent.chars().all(char::is_whitespace));
46    write_prefixes(&mut write, &config.prefix_map[..])?;
47
48    let mut p = Prettifier::new(&dataset, &mut write, base_indent.into(), config);
49    p.write_all()?;
50    write.flush()?;
51    Ok(())
52}
53
54/// write the prefix declarations of the given prefix_map, using SPARQL style.
55fn write_prefixes<W, P>(mut write: W, prefix_map: &P) -> io::Result<()>
56where
57    W: io::Write,
58    P: PrefixMap + ?Sized,
59{
60    for (pre, iri) in prefix_map.iter() {
61        writeln!(&mut write, "PREFIX {}: <{}>", pre.as_str(), iri.as_str())?;
62    }
63    Ok(())
64}
65
66struct Prettifier<'a, W> {
67    dataset: &'a PrettifiableDataset<'a>,
68    write: W,
69    indent: String,
70    config: &'a TurtleConfig,
71    labelled: BTreeSet<&'a SimpleTerm<'a>>,
72    subject_types: Vec<(
73        GraphName<&'a SimpleTerm<'a>>,
74        &'a SimpleTerm<'a>,
75        SubjectType,
76    )>,
77    lists: BTreeMap<&'a SimpleTerm<'a>, Vec<&'a SimpleTerm<'a>>>,
78    graph_range: Range<usize>,
79}
80
81type SubjectsWithType<'a> = [(
82    GraphName<&'a SimpleTerm<'a>>,
83    &'a SimpleTerm<'a>,
84    SubjectType,
85)];
86
87impl<'a, W: Write> Prettifier<'a, W> {
88    fn new(
89        dataset: &'a PrettifiableDataset<'a>,
90        write: W,
91        indent: String,
92        config: &'a TurtleConfig,
93    ) -> Self {
94        let labelled = build_labelled(dataset);
95        let mut subject_types = build_subject_types(dataset, &labelled);
96        let lists = build_lists(dataset, &mut subject_types);
97        let subject_types: Vec<_> = subject_types
98            .into_iter()
99            .map(|((g, s), st)| (g, s, st))
100            .collect();
101        // see how many subjects are in the default graph,
102        // using the fact that the default graph (None) comes first in the sort order
103        let upper = subject_types
104            .iter()
105            .take_while(|(g, _, _)| g.is_none())
106            .count();
107        let graph_range = 0..upper;
108
109        Self {
110            dataset,
111            write,
112            indent,
113            config,
114            labelled,
115            subject_types,
116            lists,
117            graph_range,
118        }
119    }
120
121    fn write_all(&mut self) -> io::Result<()> {
122        if self.subject_types.is_empty() {
123            return Ok(());
124        }
125        if self.graph_range.end > 0 {
126            // default graph is not empty
127            self.write_graph()?;
128        }
129        // then process named graphs
130        while let Some(g) = self.next_graph() {
131            self.write_newline()?;
132            self.write_bytes(b"GRAPH ")?;
133            self.write_term(g)?;
134            self.write_bytes(b" {")?;
135            self.indent();
136            self.write_graph()?;
137            self.unindent();
138            self.write_bytes(b"}\n")?;
139        }
140        Ok(())
141    }
142
143    /// Pre-condition:
144    /// swt is not empty;
145    /// all its elements have the same graph name,
146    /// and all subjects of that graph are contained in it.
147    fn write_graph(&mut self) -> io::Result<()> {
148        for i in self.graph_range.clone() {
149            let (_, s, st) = &self.subject_types[i];
150            if *st != SubjectType::Root {
151                continue;
152            }
153            self.write_tree(s)?;
154            self.subject_types[i].2 = SubjectType::Done;
155        }
156        /*
157        // some blank node cycles can cause all of them to be SubTree;
158        // here we detect and break these cycles
159        for i in self.graph_range.clone() {
160            let (_, s, st) = &self.subject_types[i];
161                if *st == SubjectType::Done {
162                continue
163            }
164            assert!(*st == SubjectType::SubTree);
165            self.write_tree(*s)?;
166            self.subject_types[i].2 = SubjectType::Done;
167        }
168        */
169        Ok(())
170    }
171
172    fn write_tree(&mut self, root: &'a SimpleTerm<'a>) -> io::Result<()> {
173        self.write_newline()?;
174        self.write_term(root)?;
175        self.write_properties(root)?;
176        self.write_bytes(b".\n")?;
177        Ok(())
178    }
179
180    fn write_properties(&mut self, subject: &'a SimpleTerm<'a>) -> io::Result<()> {
181        let mut predicate = None;
182        self.indent(); // to predicate-level
183        let g = self.current_graph_name();
184        let types: Vec<_> = self
185            .dataset
186            .quads_matching([subject], [rdf::type_], Any, [g])
187            .map(Result::unwrap)
188            .inspect(|q| {
189                if predicate.is_none() {
190                    predicate = Some(q.p());
191                }
192            })
193            .map(|q| q.o())
194            .collect();
195        if !types.is_empty() {
196            self.write_bytes(b" a ")?;
197            self.indent(); // to object-level
198            self.write_objects(subject, predicate.unwrap(), &types)?;
199        }
200        // NB: we know that PrettifiableDataset<'_> iterates triples grouped by predicate
201        // (it uses a GSPO index)
202        for t in self
203            .dataset
204            .quads_matching([subject], Any, Any, [g])
205            .map(Result::unwrap)
206        {
207            let p = t.p();
208            if rdf::type_ == p {
209                continue;
210            }
211            if Some(p) != predicate {
212                if predicate.is_some() {
213                    self.write_bytes(b";")?;
214                    self.unindent(); // back to predicate-level
215                }
216                predicate = Some(p);
217                self.write_newline()?;
218                self.write_term(p)?;
219                self.write_bytes(b" ")?;
220                self.indent(); // to object-level
221            } else {
222                self.write_bytes(b",")?;
223                self.write_newline()?;
224            }
225            self.write_object(subject, predicate.unwrap(), t.o())?;
226        }
227        if predicate.is_some() {
228            self.unindent(); // back to predicate-level
229        }
230        self.unindent(); // back to original level
231        Ok(())
232    }
233
234    fn write_objects(
235        &mut self,
236        subject: &'a SimpleTerm<'a>,
237        predicate: &'a SimpleTerm<'a>,
238        objects: &[&'a SimpleTerm<'a>],
239    ) -> io::Result<()> {
240        self.write_object(subject, predicate, objects[0])?;
241        for obj in &objects[1..] {
242            self.write_bytes(b",")?;
243            self.write_newline()?;
244            self.write_object(subject, predicate, obj)?;
245        }
246        Ok(())
247    }
248
249    fn write_object(
250        &mut self,
251        subject: &'a SimpleTerm<'a>,
252        predicate: &'a SimpleTerm<'a>,
253        object: &'a SimpleTerm<'a>,
254    ) -> io::Result<()> {
255        self.write_term(object)?;
256        let tr = SimpleTerm::Triple(Box::new([
257            subject.clone(),
258            predicate.clone(),
259            object.clone(),
260        ]));
261        if let Some(i) = self.find_st_index(tr) {
262            let (_, s, st) = self.subject_types[i];
263            if st == SubjectType::Annotation {
264                self.write_bytes(b" {|")?;
265                self.write_properties(s)?;
266                self.write_bytes(b" |}")?;
267                self.subject_types[i].2 = SubjectType::Done;
268            }
269        }
270        Ok(())
271    }
272
273    fn write_term(&mut self, term: &'a SimpleTerm<'a>) -> io::Result<()> {
274        use TermKind::*;
275        match term.kind() {
276            Iri => self.write_iri(&term.iri().unwrap()),
277            BlankNode => self.write_bnode(term),
278            Literal => self.write_literal(term),
279            Variable => {
280                write!(&mut self.write, "?{}", term.variable().unwrap().as_str())
281            }
282            Triple => {
283                self.write_bytes(b"<< ")?;
284                for t in term.triple().unwrap() {
285                    self.write_term(t)?;
286                    self.write_bytes(b" ")?;
287                }
288                self.write_bytes(b">>")
289            }
290        }
291    }
292
293    fn write_iri(&mut self, iri: &IriRef<MownStr>) -> io::Result<()> {
294        if rdf::nil == iri {
295            return self.write_bytes(b"()");
296        }
297        let Some(iri) = Iri::new(iri.as_str()).ok() else {
298            return write!(self.write, "<{}>", iri.as_str());
299        };
300        match self
301            .config
302            .prefix_map
303            .get_checked_prefixed_pair(iri, |txt| PN_LOCAL.is_match(txt))
304        {
305            Some((pre, suf)) => {
306                write!(self.write, "{}:{}", pre.as_str(), suf)
307            }
308            None => {
309                write!(self.write, "<{}>", iri.as_str())
310            }
311        }
312    }
313
314    fn write_bnode(&mut self, bn: &'a SimpleTerm<'a>) -> io::Result<()> {
315        if let Some(items) = self.lists.remove(&bn) {
316            self.write_bytes(b"(")?;
317            self.indent();
318            for item in items {
319                self.write_newline()?;
320                self.write_term(item)?;
321            }
322            self.unindent();
323            self.write_newline()?;
324            self.write_bytes(b")")?;
325        } else if self.labelled.contains(&bn) {
326            write!(self.write, "_:{}", bn.bnode_id().unwrap().as_str())?;
327        } else if let Some(i) = self.find_st_index(bn) {
328            let (_, s, st) = self.subject_types[i];
329            match st {
330                SubjectType::SubTree => {
331                    self.write_bytes(b"[")?;
332                    self.write_properties(s)?;
333                    self.write_bytes(b"]")?;
334                    self.subject_types[i].2 = SubjectType::Done;
335                }
336                SubjectType::Root => {
337                    self.write_bytes(b"[]")?;
338                }
339                _ => {}
340            }
341        } else {
342            self.write_bytes(b"[]")?;
343        }
344        Ok(())
345    }
346
347    fn write_literal(&mut self, lit: &'a SimpleTerm<'a>) -> io::Result<()> {
348        debug_assert!(lit.kind() == TermKind::Literal);
349        let datatype = lit.datatype().unwrap();
350        let value = lit.lexical_form().unwrap();
351        if xsd::integer == datatype && INTEGER.is_match(&value)
352            || xsd::decimal == datatype && DECIMAL.is_match(&value)
353            || xsd::double == datatype && DOUBLE.is_match(&value)
354            || xsd::boolean == datatype && BOOLEAN.is_match(&value)
355        {
356            self.write_bytes(value.as_bytes())?;
357        } else {
358            self.write_bytes(b"\"")?;
359            super::nt::quoted_string(&mut self.write, value.as_bytes())?;
360            self.write_bytes(b"\"")?;
361            if let Some(tag) = lit.language_tag() {
362                write!(self.write, "@{}", tag.as_str())?;
363            } else if xsd::string != datatype {
364                self.write_bytes(b"^^")?;
365                self.write_iri(&datatype)?;
366            }
367        }
368        Ok(())
369    }
370
371    fn write_newline(&mut self) -> io::Result<()> {
372        self.write_bytes(b"\n")?;
373        self.write.write_all(self.indent.as_bytes())
374    }
375
376    fn write_bytes(&mut self, bytes: &[u8]) -> io::Result<()> {
377        self.write.write_all(bytes)
378    }
379
380    fn indent(&mut self) {
381        self.indent.push_str(self.config.indentation());
382    }
383
384    fn unindent(&mut self) {
385        let ilen = self.config.indentation().len();
386        self.indent.truncate(self.indent.len() - ilen);
387    }
388
389    fn next_graph(&mut self) -> Option<&'a SimpleTerm<'a>> {
390        if self.graph_range.end >= self.subject_types.len() {
391            None
392        } else {
393            let start = self.graph_range.end;
394            let g1 = self.subject_types[start].0;
395            let c = self.subject_types[start..]
396                .iter()
397                .take_while(|(g2, _, _)| g1 == *g2)
398                .count();
399            self.graph_range = start..(start + c);
400            Some(g1.unwrap())
401        }
402    }
403
404    fn current_graph_name(&self) -> GraphName<&'a SimpleTerm<'a>> {
405        self.subject_types[self.graph_range.start].0
406    }
407
408    fn find_st_index<T: Term>(&self, term: T) -> Option<usize> {
409        find_subject(term, &self.subject_types[self.graph_range.clone()])
410            .map(|i| i + self.graph_range.start)
411    }
412}
413
414/// blank nodes MUST be labelled (as opposed to described with square brackets) if
415/// - they are used in several named graphs, or
416/// - they are used several times as object, or
417/// - they are used as predicate or graph_name, or
418/// - they are used in a quoted triple, or
419/// - they are involved in a blank node cycle.
420///
421/// NB1: actually, blank nodes that are the subject of a quoted triple that is also
422/// asserted (in the same graph) are not forced to be labelled,
423/// because the quoted triple can be "hidden" by an annotating the asserted one.
424///
425/// NB2: there would be other cases where a bnode in a quoted triples could,
426/// theory, be written using the square brackets, but the added value is not worth
427/// the trouble of identifying those cases.
428fn build_labelled<'a>(d: &'a PrettifiableDataset) -> BTreeSet<&'a SimpleTerm<'a>> {
429    let mut profiles = BTreeMap::new();
430    for q in d.quads() {
431        let q = q.unwrap();
432        for (i, t) in iter_spog(q).enumerate() {
433            match t.kind() {
434                TermKind::BlankNode => {
435                    profiles
436                        .entry(t)
437                        .and_modify(|profile: &mut BnodeProfile| {
438                            if !profile.bad {
439                                profile.add_named_graph(q.g());
440                                profile.update_positions(i, &q);
441                            }
442                        })
443                        .or_insert_with(|| BnodeProfile {
444                            bad: (i == 1 || i == 3),
445                            named_graphs: [q.g()].into_iter().collect(),
446                            out_degree: usize::from(i == 0),
447                            predecessor: if i == 2 { Some(q.s()) } else { None },
448                            visited: false,
449                        });
450                }
451                TermKind::Triple => {
452                    let mut atoms = t.atoms();
453                    let [s, p, o] = t.triple().unwrap().spo();
454                    if s.is_blank_node() && Dataset::contains(d, s, p, o, q.g()).unwrap() {
455                        atoms.next(); // skip the subject blank nodes in atoms
456                                      // and leave it to the "asserted" blank node to determine
457                                      // if it must be labelled or not
458                    }
459                    for a in t.atoms().filter(Term::is_blank_node) {
460                        profiles
461                            .entry(a)
462                            .and_modify(|profile| profile.bad = true)
463                            .or_insert_with(|| BnodeProfile {
464                                bad: true,
465                                named_graphs: Default::default(),
466                                out_degree: 0,
467                                predecessor: None,
468                                visited: false,
469                            });
470                    }
471                }
472                _ => (),
473            }
474        }
475    }
476    // detect blank node cycles
477    let keys: Vec<_> = profiles.keys().cloned().collect();
478    for key in keys {
479        let profile = profiles.get_mut(&key).unwrap();
480        if profile.bad || profile.visited {
481            continue;
482        }
483        profile.visited = true;
484        let mut current = profile.predecessor;
485        while let Some(t) = current {
486            if let Some(p) = profiles.get_mut(&t) {
487                if t == key {
488                    p.bad = true;
489                    break;
490                } else if p.bad || p.visited {
491                    break;
492                } else {
493                    p.visited = true;
494                    current = p.predecessor;
495                }
496            } else {
497                break;
498            }
499        }
500    }
501    profiles
502        .into_iter()
503        .filter_map(|(key, profile)| profile.bad.then_some(key))
504        .collect()
505}
506
507struct BnodeProfile<'a> {
508    bad: bool,
509    named_graphs: BTreeSet<GraphName<&'a SimpleTerm<'a>>>,
510    out_degree: usize,
511    predecessor: Option<&'a SimpleTerm<'a>>,
512    visited: bool,
513}
514
515impl<'a> BnodeProfile<'a> {
516    fn add_named_graph(&mut self, g: GraphName<&'a SimpleTerm<'a>>) {
517        self.named_graphs.insert(g);
518        if self.named_graphs.len() > 1 {
519            self.bad = true;
520        }
521    }
522    fn update_positions(&mut self, pos: usize, quad: &Spog<&'a SimpleTerm>) {
523        if pos == 0 {
524            self.out_degree += 1;
525        } else if pos == 2 {
526            if self.predecessor.is_none() {
527                self.predecessor = Some(quad.s());
528            } else {
529                self.bad = true;
530            }
531        } else {
532            debug_assert!(pos == 1 || pos == 3);
533            self.bad = true;
534        }
535    }
536}
537
538/// For each pair (graph-name, subject), determine the subject type
539fn build_subject_types<'a>(
540    d: &'a PrettifiableDataset,
541    labelled: &BTreeSet<&'a SimpleTerm<'a>>,
542) -> BTreeMap<(GraphName<&'a SimpleTerm<'a>>, &'a SimpleTerm<'a>), SubjectType> {
543    d.iter()
544        .map(|q| (q.g(), q.s()))
545        .dedup()
546        .map(|(g, s)| {
547            use TermKind::*;
548            let st = match s.kind() {
549                BlankNode => {
550                    if !labelled.contains(&s)
551                        && d.quads_matching(Any, Any, [s], [g]).take(2).count() == 1
552                    {
553                        SubjectType::SubTree
554                    } else {
555                        SubjectType::Root
556                    }
557                }
558                Triple => {
559                    let tr = s.triple().unwrap();
560                    if rdf::first != tr.p()
561                        && rdf::rest != tr.p()
562                        && Dataset::contains(d, tr.s(), tr.p(), tr.o(), g).unwrap()
563                    {
564                        SubjectType::Annotation
565                    } else {
566                        SubjectType::Root
567                    }
568                }
569                _ => SubjectType::Root,
570            };
571            ((g, s), st)
572        })
573        .collect()
574}
575
576/// Categorization of triple subjects
577#[derive(Copy, Clone, Debug, PartialEq)]
578enum SubjectType {
579    /// A node that must be the root of a "tree"
580    Root,
581    /// A node that can be used as a subtree (square brackets with property list)
582    SubTree,
583    /// A quoted triple that is also asserted
584    Annotation,
585    /// A dummy subject type, to indicate that this subject has been serialized already
586    Done,
587}
588
589/// Find all well-formed lists in this dataset
590fn build_lists<'a>(
591    d: &'a PrettifiableDataset,
592    subject_types: &mut BTreeMap<(GraphName<&'a SimpleTerm<'a>>, &'a SimpleTerm<'a>), SubjectType>,
593) -> BTreeMap<&'a SimpleTerm<'a>, Vec<&'a SimpleTerm<'a>>> {
594    let mut preds = BTreeMap::new();
595    let mut seeds = vec![];
596    use TermKind::BlankNode;
597    for q in d.quads_matching(BlankNode, [rdf::rest], Any, Any) {
598        let ([s, _, o], g) = q.unwrap().spog();
599        if subject_types.get(&(g, s)) != Some(&SubjectType::SubTree) {
600            continue;
601        }
602        if rdf::nil == o {
603            if let Some(val) = list_item(s, d) {
604                seeds.push(((g, s), vec![val]));
605                subject_types.remove(&(g, s));
606            }
607        } else if o.is_blank_node() {
608            match preds.entry(o) {
609                Vacant(e) => {
610                    e.insert(s);
611                }
612                Occupied(e) => {
613                    e.remove();
614                }
615            }
616        }
617    }
618    seeds
619        .into_iter()
620        .map(|((g, mut bn), mut items)| {
621            loop {
622                if let Some(pred) = preds.get(&bn).copied() {
623                    if let Some(val) = list_item(pred, d) {
624                        bn = pred;
625                        items.push(val);
626                        subject_types.remove(&(g, pred));
627                        continue;
628                    }
629                }
630                break;
631            }
632            items.reverse();
633            (bn, items)
634        })
635        .collect()
636}
637
638fn list_item<'a>(s: &'a SimpleTerm<'a>, d: &'a PrettifiableDataset) -> Option<&'a SimpleTerm<'a>> {
639    let mut ret = None;
640    for q in d.quads_matching([s], Any, Any, Any) {
641        let q = q.unwrap();
642        if rdf::rest == q.p() {
643            continue;
644        } else if rdf::first == q.p() && ret.is_none() {
645            ret = Some(q.o());
646        } else {
647            return None;
648        }
649    }
650    ret
651}
652
653fn find_subject<T: Term>(s: T, swt: &SubjectsWithType) -> Option<usize> {
654    if swt.is_empty() {
655        None
656    } else {
657        let m = swt.len() / 2;
658        match Term::cmp(&swt[m].1, s.borrow_term()) {
659            Ordering::Less => find_subject(s, &swt[m + 1..]).map(|i| i + m + 1),
660            Ordering::Equal => Some(m),
661            Ordering::Greater => find_subject(s, &swt[..m]),
662        }
663    }
664}
665
666// ---------------------------------------------------------------------------------
667//                                      inners
668// ---------------------------------------------------------------------------------
669
670lazy_static::lazy_static! {
671    /// Match an absolute IRI reference.
672    pub(crate) static ref PN_LOCAL: Regex = Regex::new(r"(?x)^
673        #(PN_CHARS_U | ':' | [0-9] | PLX)
674        (
675            [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}_:0-9]
676            # | PLX
677            | \\ [_~.!$&'()*+,;=/?\#@%-]
678            | % [0-9A-Fa-f]{2}
679        )
680        # ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
681        (
682            (
683                [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}_0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}.:-]
684                | \\ [_~.!$&'()*+,;=/?\#@%-]
685                | % [0-9A-Fa-f]{2}
686            )*
687            (
688                [A-Za-z\u{00C0}-\u{00D6}\u{00D8}-\u{00F6}\u{00F8}-\u{02FF}\u{0370}-\u{037D}\u{037F}-\u{1FFF}\u{200C}-\u{200D}\u{2070}-\u{218F}\u{2C00}-\u{2FEF}\u{3001}-\u{D7FF}\u{F900}-\u{FDCF}\u{FDF0}-\u{FFFD}\u{10000}-\u{EFFFF}_0-9\u{00B7}\u{0300}-\u{036F}\u{203F}-\u{2040}:-]
689                | \\ [_~.!$&'()*+,;=/?\#@%-]
690                | % [0-9A-Fa-f]{2}
691            )
692        )?
693    $").unwrap();
694    pub(crate) static ref INTEGER: Regex = Regex::new(r"^[+-]?[0-9]+$").unwrap();
695    pub(crate) static ref DECIMAL: Regex = Regex::new(r"^[+-]?[0-9]*.[0-9]+$").unwrap();
696    pub(crate) static ref DOUBLE: Regex = Regex::new(r"(?x)^
697      [+-]? ( [0-9]+ ( . [0-9]* )? | . [0-9]+ ) [eE] [+-]? [0-9]+
698    $").unwrap();
699    pub(crate) static ref BOOLEAN: Regex = Regex::new(r"^(true|false)$").unwrap();
700}
701
702trait Dedup: Iterator + Sized {
703    fn dedup(self) -> DedupIterator<Self> {
704        DedupIterator {
705            previous: None,
706            inner: self,
707        }
708    }
709}
710
711impl<I: Iterator> Dedup for I {}
712
713struct DedupIterator<I: Iterator> {
714    previous: Option<I::Item>,
715    inner: I,
716}
717
718impl<I: Iterator> Iterator for DedupIterator<I>
719where
720    I::Item: Clone + Eq,
721{
722    type Item = I::Item;
723
724    fn next(&mut self) -> Option<Self::Item> {
725        loop {
726            let some_item = self.inner.next();
727            #[allow(clippy::question_mark)]
728            if some_item.is_none() {
729                return None;
730            }
731            if some_item != self.previous {
732                self.previous = some_item.clone();
733                return some_item;
734            }
735        }
736    }
737
738    fn size_hint(&self) -> (usize, Option<usize>) {
739        let (lower, upper) = self.inner.size_hint();
740        (lower.max(1), upper)
741    }
742}
743
744// ---------------------------------------------------------------------------------
745//                                      tests
746// ---------------------------------------------------------------------------------
747
748#[cfg(test)]
749pub(crate) mod test {
750    use super::*;
751
752    #[test]
753    fn dedup() {
754        let v1 = [1, 1, 1, 2, 2, 1, 3, 3];
755        let v2: Vec<_> = v1.into_iter().dedup().collect();
756        assert_eq!(&v2, &[1, 2, 1, 3]);
757    }
758
759    #[test]
760    fn pn_local() {
761        for positive in [
762            "a",
763            "aBc",
764            "éàïsophia_api::graph::",
765            ":::",
766            "123",
767            "%20%21%22",
768            "\\%\\?\\&",
769        ] {
770            assert!(PN_LOCAL.is_match(positive), "{}", positive);
771        }
772        for negative in [" ", ".a", "a."] {
773            assert!(!PN_LOCAL.is_match(negative), "{}", negative);
774        }
775    }
776
777    #[test]
778    fn double() {
779        for positive in [
780            "3.14e0",
781            "+3.14e0",
782            "-3.14e0",
783            "3.14e+0",
784            "3.14e-0",
785            "0000e0000",
786            ".1E0",
787            "1.e+3",
788            "1E-3",
789        ] {
790            assert!(DOUBLE.is_match(positive), "{}", positive);
791        }
792    }
793
794    #[test]
795    fn relative_iri() -> Result<(), Box<dyn std::error::Error>> {
796        let iri = IriRef::new_unchecked("");
797        let graph = vec![[iri, iri, iri]];
798        let config = TurtleConfig::new().with_pretty(true);
799        use sophia_api::prelude::*;
800        let pretty =
801            crate::serializer::turtle::TurtleSerializer::new_stringifier_with_config(config)
802                .serialize_triples(graph.triples())?
803                .to_string();
804        assert!(pretty.contains("<>"));
805        // the goal is not to check the exact serialization,
806        // but only that relative IRIs are supported even in debug mode
807        Ok(())
808    }
809}