sophia_turtle/serializer/
nt.rs

1//! Serializer for the [N-Triples] concrete syntax of RDF.
2//!
3//! **Important**:
4//! the methods in this module accepting a [`Write`]
5//! make no effort to minimize the number of write operations.
6//! Hence, in most cased, they should be passed a [`BufWriter`].
7//!
8//! [N-Triples]: https://www.w3.org/TR/n-triples/
9//! [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
10//! [`BufWriter`]: https://doc.rust-lang.org/std/io/struct.BufWriter.html
11
12use sophia_api::ns::xsd;
13use sophia_api::serializer::*;
14use sophia_api::source::{StreamResult, TripleSource};
15use sophia_api::term::{Term, TermKind};
16use sophia_api::triple::Triple;
17use std::io;
18
19/// N-Triples serializer configuration.
20#[derive(Clone, Debug, Default)]
21pub struct NtConfig {
22    pub(super) ascii: bool,
23}
24
25impl NtConfig {
26    /// Set the ascii configuration.
27    pub fn set_ascii(&mut self, ascii: bool) -> &mut Self {
28        self.ascii = ascii;
29        self
30    }
31}
32
33/// N-Triples serializer.
34pub struct NtSerializer<W> {
35    config: NtConfig,
36    write: W,
37}
38
39impl<W> NtSerializer<W>
40where
41    W: io::Write,
42{
43    /// Build a new N-Triples serializer writing to `write`, with the default config.
44    #[inline]
45    pub fn new(write: W) -> NtSerializer<W> {
46        Self::new_with_config(write, NtConfig::default())
47    }
48
49    /// Build a new N-Triples serializer writing to `write`, with the given config.
50    pub fn new_with_config(write: W, config: NtConfig) -> NtSerializer<W> {
51        NtSerializer { config, write }
52    }
53
54    /// Borrow this serializer's configuration.
55    pub fn config(&self) -> &NtConfig {
56        &self.config
57    }
58}
59
60impl<W> TripleSerializer for NtSerializer<W>
61where
62    W: io::Write,
63{
64    type Error = io::Error;
65
66    fn serialize_triples<TS>(
67        &mut self,
68        mut source: TS,
69    ) -> StreamResult<&mut Self, TS::Error, Self::Error>
70    where
71        TS: TripleSource,
72    {
73        if self.config.ascii {
74            todo!("Pure-ASCII N-Triples is not implemented yet")
75        }
76        source
77            .try_for_each_triple(|t| {
78                {
79                    let w = &mut self.write;
80                    write_triple(w, t)?;
81                    w.write_all(b".\n")
82                }
83                .map_err(|e| io::Error::new(io::ErrorKind::Other, e))
84            })
85            .map(|_| self)
86    }
87}
88
89impl NtSerializer<Vec<u8>> {
90    /// Create a new serializer which targets a `String`.
91    #[inline]
92    pub fn new_stringifier() -> Self {
93        NtSerializer::new(Vec::new())
94    }
95    /// Create a new serializer which targets a `String` with a custom config.
96    #[inline]
97    pub fn new_stringifier_with_config(config: NtConfig) -> Self {
98        NtSerializer::new_with_config(Vec::new(), config)
99    }
100}
101
102impl Stringifier for NtSerializer<Vec<u8>> {
103    fn as_utf8(&self) -> &[u8] {
104        &self.write[..]
105    }
106}
107
108/// Write the given term into the given write in the N-Triples format.
109pub fn write_triple<W, T>(w: &mut W, t: T) -> io::Result<()>
110where
111    W: io::Write,
112    T: Triple,
113{
114    write_term(w, t.s())?;
115    w.write_all(b" ")?;
116    write_term(w, t.p())?;
117    w.write_all(b" ")?;
118    write_term(w, t.o())?;
119    Ok(())
120}
121
122/// Write the given term into the given write in the N-Triples format.
123pub fn write_term<W, T>(w: &mut W, t: T) -> io::Result<()>
124where
125    W: io::Write,
126    T: Term,
127{
128    use TermKind::*;
129    match t.kind() {
130        Iri => {
131            w.write_all(b"<")?;
132            w.write_all(t.iri().unwrap().as_bytes())?;
133            w.write_all(b">")?;
134        }
135        BlankNode => {
136            w.write_all(b"_:")?;
137            w.write_all(t.bnode_id().unwrap().as_bytes())?;
138        }
139        Literal => {
140            w.write_all(b"\"")?;
141            quoted_string(w, t.lexical_form().unwrap().as_bytes())?;
142            match t.language_tag() {
143                Some(tag) => {
144                    w.write_all(b"\"@")?;
145                    w.write_all(tag.as_bytes())?;
146                }
147                None => {
148                    let dt = t.datatype().unwrap();
149                    if xsd::string != dt {
150                        w.write_all(b"\"^^<")?;
151                        w.write_all(dt.as_bytes())?;
152                        w.write_all(b">")?;
153                    } else {
154                        w.write_all(b"\"")?;
155                    }
156                }
157            }
158        }
159        Triple => {
160            w.write_all(b"<<")?;
161            write_triple(w, t.to_triple().unwrap())?;
162            w.write_all(b">>")?;
163        }
164        Variable => {
165            w.write_all(b"?")?;
166            w.write_all(t.variable().unwrap().as_bytes())?;
167        }
168    }
169    Ok(())
170}
171
172pub(crate) fn quoted_string<W: io::Write>(w: &mut W, txt: &[u8]) -> io::Result<()> {
173    let mut cut = txt.len();
174    let mut cutchar = b'\0';
175    for (pos, chr) in txt.iter().enumerate() {
176        let chr = *chr;
177        if chr <= b'\\' && (chr == b'\n' || chr == b'\r' || chr == b'\\' || chr == b'"') {
178            cut = pos;
179            cutchar = chr;
180            break;
181        }
182    }
183    w.write_all(&txt[..cut])?;
184    if cut < txt.len() {
185        match cutchar {
186            b'\n' => {
187                w.write_all(b"\\n")?;
188            }
189            b'\r' => {
190                w.write_all(b"\\r")?;
191            }
192            b'"' => {
193                w.write_all(b"\\\"")?;
194            }
195            b'\\' => {
196                w.write_all(b"\\\\")?;
197            }
198            _ => unreachable!(),
199        }
200    };
201    if cut + 1 >= txt.len() {
202        Ok(())
203    } else {
204        quoted_string(w, &txt[cut + 1..])
205    }
206}
207
208// ---------------------------------------------------------------------------------
209//                                      tests
210// ---------------------------------------------------------------------------------
211
212#[cfg(test)]
213pub(crate) mod test {
214    use super::*;
215    use sophia_api::graph::MutableGraph;
216    use sophia_api::ns::*;
217    use sophia_api::term::{BnodeId, LanguageTag, SimpleTerm, VarName};
218    use sophia_iri::Iri;
219
220    #[test]
221    fn graph() -> Result<(), Box<dyn std::error::Error>> {
222        let me = BnodeId::new_unchecked("me");
223        let mut g: Vec<[SimpleTerm<'static>; 3]> = vec![];
224        MutableGraph::insert(
225            &mut g,
226            me,
227            rdf::type_,
228            Iri::new_unchecked("http://schema.org/Person"),
229        )?;
230        MutableGraph::insert(
231            &mut g,
232            me,
233            Iri::new_unchecked("http://schema.org/name"),
234            "Pierre-Antoine",
235        )?;
236        MutableGraph::insert(
237            &mut g,
238            me,
239            Iri::new_unchecked("http://example.org/value"),
240            42,
241        )?;
242        MutableGraph::insert(
243            &mut g,
244            me,
245            Iri::new_unchecked("http://example.org/message"),
246            SimpleTerm::LiteralLanguage(
247                "hello\nworld".into(),
248                LanguageTag::new_unchecked("en".into()),
249            ),
250        )?;
251        let tr = g[0].clone();
252        MutableGraph::insert(
253            &mut g,
254            SimpleTerm::Triple(Box::new(tr)),
255            Iri::new_unchecked("http://schema.org/creator"),
256            VarName::new_unchecked("x"),
257        )?;
258
259        let s = NtSerializer::new_stringifier()
260            .serialize_graph(&g)
261            .unwrap()
262            .to_string();
263        assert_eq!(
264            &s,
265            r#"_:me <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person>.
266_:me <http://schema.org/name> "Pierre-Antoine".
267_:me <http://example.org/value> "42"^^<http://www.w3.org/2001/XMLSchema#integer>.
268_:me <http://example.org/message> "hello\nworld"@en.
269<<_:me <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Person>>> <http://schema.org/creator> ?x.
270"#
271        );
272        Ok(())
273    }
274}