rio_turtle/
gtriple_allocator.rs

1//! I define [`GeneralizedTripleAllocator`]
2
3#![allow(unsafe_code)]
4use crate::utils::StringBufferStack;
5use rio_api::model::*;
6use std::mem::transmute;
7
8/// A stack allocator for storing generalized RDF and RDF-star triples.
9///
10/// # Implementation
11/// This type uses `&'static str` internally to reference text that it allocates.
12/// It therefore contains unsafe code to cheat the borrow checker,
13/// but ensures that the referenced data lives as long as the referencing struct.
14pub struct GeneralizedTripleAllocator {
15    incomplete_stack: Vec<[GeneralizedTerm<'static>; 3]>,
16    incomplete_len: usize,
17    #[allow(clippy::vec_box)]
18    complete_stack: Vec<Box<[GeneralizedTerm<'static>; 3]>>,
19    complete_len: usize,
20    string_stack: StringBufferStack,
21}
22
23impl GeneralizedTripleAllocator {
24    pub fn new() -> GeneralizedTripleAllocator {
25        GeneralizedTripleAllocator {
26            incomplete_stack: Vec::with_capacity(1),
27            incomplete_len: 0,
28            complete_stack: Vec::with_capacity(1),
29            complete_len: 0,
30            string_stack: StringBufferStack::with_capacity(4),
31        }
32    }
33
34    pub fn complete_len(&self) -> usize {
35        self.complete_len
36    }
37
38    pub fn incomplete_len(&self) -> usize {
39        self.incomplete_len
40    }
41
42    /// Return the last completed triple
43    pub fn top(&self) -> &[GeneralizedTerm<'_>; 3] {
44        debug_assert!(self.complete_len > 0);
45        &self.complete_stack[self.complete_len - 1]
46    }
47
48    /// Return the subject of the current (in-progress) triple, if any.
49    pub fn current_subject(&self) -> Option<GeneralizedTerm> {
50        if self.incomplete_len == 0 {
51            None
52        } else {
53            debug_assert!(!dummy(self.incomplete_stack[self.incomplete_len - 1][0]));
54            Some(self.incomplete_stack[self.incomplete_len - 1][0])
55        }
56    }
57
58    /// Generate a quad from the top completed triple,
59    /// with the given graph name.
60    pub fn top_quad<'s>(&'s self, graph_name: Option<GeneralizedTerm<'s>>) -> GeneralizedQuad<'s> {
61        debug_assert!(self.complete_len > 0);
62        let [subject, predicate, object] = *self.top();
63        GeneralizedQuad {
64            subject,
65            predicate,
66            object,
67            graph_name,
68        }
69    }
70
71    pub fn push_triple_start(&mut self) {
72        let dummy: GeneralizedTerm = DUMMY_IRI.into();
73        if self.incomplete_len == self.incomplete_stack.len() {
74            self.incomplete_stack.push([dummy, dummy, dummy])
75        }
76        #[cfg(debug_assertions)] // to ensure that dummy() assertions work
77        {
78            self.incomplete_stack[self.incomplete_len] = [dummy, dummy, dummy];
79        }
80        self.incomplete_len += 1;
81    }
82
83    /// Push an atomic term,
84    /// produced by `subject_factory`,
85    /// at the position `pos` of the current triple.
86    ///
87    /// # Pre-condition
88    /// In standard RDF, any term is acceptable.
89    /// In RDF-star embedded triples [`GeneralizedTerm::Triple`] are *not allowed*.
90    /// For adding an embedded triple, use [`TripleAllocator::push_quoted_triple`] instead.
91    pub fn try_push_atom<E, F>(&mut self, pos: usize, term_factory: F) -> Result<(), E>
92    where
93        F: for<'x> FnOnce(&'x mut String, &'x mut String) -> Result<GeneralizedTerm<'x>, E>,
94    {
95        debug_assert!(pos < 3);
96        debug_assert!(pos == 0 || !dummy(self.current()[pos - 1]));
97        debug_assert!(dummy(self.current()[pos]));
98        let buffers = self.string_stack.push2();
99        let atom = term_factory(buffers.0, buffers.1)?;
100        debug_assert!(!matches!(atom, GeneralizedTerm::Triple(_)));
101        let atom: GeneralizedTerm<'static> = unsafe { transmute(atom) };
102        // The unsafe code above changes the lifetime parameter of atom to `'static`.
103        // This is ok because:
104        // * we will only expose it with a shorter lifetime, and
105        // * this implementation guarantees that the pointed `str` lives as long as the atom
106        if pos < 2 {
107            self.current()[pos] = atom;
108        } else {
109            self.complete_triple(atom);
110        }
111        Ok(())
112    }
113
114    /// Use the [top](TripleAllocator::top) triple of this stash at the position `pos` of the current triple.
115    pub fn push_quoted_triple(&mut self, pos: usize) {
116        debug_assert!(pos < 3);
117        debug_assert!(dummy(self.current()[pos]));
118        debug_assert!(self.complete_len > 0);
119        let triple = &*self.complete_stack[self.complete_len - 1];
120
121        debug_assert!((0..pos).all(
122            |i| match self.incomplete_stack[self.incomplete_len - 1][i] {
123                GeneralizedTerm::Triple(s) => {
124                    let ptr_s: *const _ = s;
125                    ptr_s != triple
126                }
127                _ => true,
128            }
129        ));
130
131        let triple: &'static [GeneralizedTerm<'static>; 3] = unsafe { transmute(triple) };
132        // The unsafe code above changes the lifetime of the ref to `'static`.
133        // This is ok because:
134        // * we will only expose it with a shorter lifetime, and
135        // * this implementation guarantees that the pointed `Triple` lives as long as the `Term` embedding it
136        let quoted = GeneralizedTerm::Triple(triple);
137        if pos < 2 {
138            self.current()[pos] = quoted;
139        } else {
140            self.complete_triple(quoted);
141        }
142    }
143
144    pub fn pop_term(&mut self, pos: usize) {
145        debug_assert!(pos < 3);
146        if pos == 2 {
147            debug_assert!(self.complete_len > 0);
148            self.complete_len -= 1;
149            let inc_triple = *self.complete_stack[self.complete_len];
150            if self.incomplete_len == self.incomplete_stack.len() {
151                self.incomplete_stack.push(inc_triple)
152            } else {
153                self.incomplete_stack[self.incomplete_len] = inc_triple;
154            }
155            self.incomplete_len += 1;
156        } else {
157            debug_assert!(dummy(self.current()[pos + 1]));
158        };
159
160        match self.current()[pos] {
161            GeneralizedTerm::Triple(_) => self.pop_top_triple(),
162            _ => {
163                // we allocate two buffers for any atomic object, not just literals
164                self.string_stack.pop();
165                self.string_stack.pop()
166            }
167        }
168        #[cfg(debug_assertions)] // to ensure that dummy() assertions work
169        {
170            self.current()[pos] = DUMMY_IRI.into();
171        }
172    }
173
174    #[inline(always)]
175    /// Pops the latest complete triple, and recursively pops all its constituent triples.
176    /// Equivalent to pop_object, pop_predicate, pop_subject, pop_empty_triple
177    pub fn pop_top_triple(&mut self) {
178        self.pop_term(2);
179        self.pop_term(1);
180        self.pop_term(0);
181        self.incomplete_len -= 1;
182    }
183
184    /// Pops the top-most empty triple, created with push_triple_start,
185    /// but with all its components having been popped (or never pushed)
186    #[inline(always)]
187    pub fn pop_top_empty_triple(&mut self) {
188        debug_assert!(self.incomplete_len > 0);
189        debug_assert!(dummy(self.current()[1]));
190        debug_assert!(dummy(self.current()[0]));
191        self.incomplete_len -= 1;
192    }
193
194    /// Pops the top-most annotation triple, i.e.
195    /// a triple on the incomplete stack with only its subject pushed,
196    /// and that subject is an embedded triple.
197    ///
198    /// The goal is to remove this triple *without* freeing the subject triple.
199    #[inline(always)]
200    pub fn pop_annotation_triple(&mut self) {
201        debug_assert!(self.incomplete_len > 0);
202        debug_assert!(dummy(self.current()[1]));
203        debug_assert!(!dummy(self.current()[0]));
204        debug_assert!(matches!(self.current()[0], GeneralizedTerm::Triple(_)));
205        self.incomplete_len -= 1;
206    }
207
208    pub fn clear(&mut self) {
209        self.incomplete_len = 0;
210        self.incomplete_stack.clear();
211        self.complete_len = 0;
212        self.complete_stack.clear();
213        self.string_stack.clear();
214    }
215
216    fn complete_triple(&mut self, object: GeneralizedTerm<'static>) {
217        self.incomplete_len -= 1;
218        let mut triple = self.incomplete_stack[self.incomplete_len];
219        triple[2] = object;
220        if self.complete_len == self.complete_stack.len() {
221            self.complete_stack.push(Box::new(triple))
222        } else {
223            *self.complete_stack[self.complete_len] = triple;
224        }
225        self.complete_len += 1;
226    }
227
228    fn current(&mut self) -> &mut [GeneralizedTerm<'static>; 3] {
229        debug_assert!(self.incomplete_len > 0);
230        &mut self.incomplete_stack[self.incomplete_len - 1]
231    }
232}
233
234#[cfg(debug_assertions)] // debug assertions need DUMMY to have a static address
235static DUMMY_IRI: NamedNode<'static> = NamedNode { iri: "" };
236#[cfg(not(debug_assertions))] // otherwise, a const is sufficient
237const DUMMY_IRI: NamedNode<'static> = NamedNode { iri: "" };
238
239fn dummy<'a, T: std::fmt::Debug + Into<GeneralizedTerm<'a>>>(t: T) -> bool {
240    match t.into() {
241        GeneralizedTerm::NamedNode(n) => {
242            let ptr_iri: *const str = n.iri;
243            ptr_iri == DUMMY_IRI.iri
244        }
245        _ => false,
246    }
247}
248
249#[cfg(test)]
250mod test {
251    use super::*;
252    use std::convert::Infallible;
253
254    #[cfg(debug_assertions)]
255    #[test]
256    fn dummy_works() {
257        assert!(dummy(DUMMY_IRI));
258        let b = "foo".to_string();
259        let n = NamedNode { iri: &b[..0] };
260        assert!(DUMMY_IRI.iri == n.iri);
261        // and yet:
262        assert!(!dummy(n));
263    }
264
265    fn iri<'a, T: From<NamedNode<'a>>>(
266        buffer: &'a mut String,
267        value: &str,
268    ) -> Result<T, Infallible> {
269        buffer.push_str(value);
270        Ok(NamedNode { iri: &*buffer }.into())
271    }
272
273    fn bn<'a, T: From<BlankNode<'a>>>(
274        buffer: &'a mut String,
275        value: &str,
276    ) -> Result<T, Infallible> {
277        buffer.push_str(value);
278        Ok(BlankNode { id: &*buffer }.into())
279    }
280
281    fn sl<'a, T: From<Literal<'a>>>(buffer: &'a mut String, value: &str) -> Result<T, Infallible> {
282        buffer.push_str(value);
283        Ok(Literal::Simple { value: &*buffer }.into())
284    }
285
286    fn lt<'a, T: From<Literal<'a>>>(
287        buffer1: &'a mut String,
288        buffer2: &'a mut String,
289        value: &str,
290        tag: &str,
291    ) -> Result<T, Infallible> {
292        buffer1.push_str(value);
293        buffer2.push_str(tag);
294        Ok(Literal::LanguageTaggedString {
295            value: &*buffer1,
296            language: &*buffer2,
297        }
298        .into())
299    }
300
301    fn dt<'a, T: From<Literal<'a>>>(
302        buffer1: &'a mut String,
303        buffer2: &'a mut String,
304        value: &str,
305        dt: &str,
306    ) -> Result<T, Infallible> {
307        buffer1.push_str(value);
308        buffer2.push_str(dt);
309        Ok(Literal::Typed {
310            value: &*buffer1,
311            datatype: NamedNode { iri: &*buffer2 },
312        }
313        .into())
314    }
315
316    #[test]
317    fn simple_triple_w_named() -> Result<(), Infallible> {
318        let mut ta = GeneralizedTripleAllocator::new();
319        ta.push_triple_start();
320        ta.try_push_atom(0, |b, _| iri(b, "a"))?;
321        ta.try_push_atom(1, |b, _| iri(b, "b"))?;
322        ta.try_push_atom(2, |b, _| iri(b, "c"))?;
323        assert_eq!(display(ta.top()), r#"<a> <b> <c>"#);
324        Ok(())
325    }
326
327    #[test]
328    fn simple_triple_w_blank() -> Result<(), Infallible> {
329        let mut ta = GeneralizedTripleAllocator::new();
330        ta.push_triple_start();
331        ta.try_push_atom(0, |b, _| bn(b, "a"))?;
332        ta.try_push_atom(1, |b, _| bn(b, "b"))?;
333        ta.try_push_atom(2, |b, _| bn(b, "c"))?;
334        assert_eq!(display(ta.top()), r#"_:a _:b _:c"#);
335        Ok(())
336    }
337
338    #[test]
339    fn simple_triple_w_simple_lit() -> Result<(), Infallible> {
340        let mut ta = GeneralizedTripleAllocator::new();
341        ta.push_triple_start();
342        ta.try_push_atom(0, |b, _| sl(b, "a"))?;
343        ta.try_push_atom(1, |b, _| sl(b, "b"))?;
344        ta.try_push_atom(2, |b, _| sl(b, "c"))?;
345        assert_eq!(display(ta.top()), r#""a" "b" "c""#);
346        Ok(())
347    }
348
349    #[test]
350    fn simple_triple_w_lang_lit() -> Result<(), Infallible> {
351        let mut ta = GeneralizedTripleAllocator::new();
352        ta.push_triple_start();
353        ta.try_push_atom(0, |b1, b2| lt(b1, b2, "a", "en"))?;
354        ta.try_push_atom(1, |b1, b2| lt(b1, b2, "b", "fr"))?;
355        ta.try_push_atom(2, |b1, b2| lt(b1, b2, "c", "ge"))?;
356        assert_eq!(display(ta.top()), r#""a"@en "b"@fr "c"@ge"#);
357        Ok(())
358    }
359
360    #[test]
361    fn simple_triple_w_typed_lit() -> Result<(), Infallible> {
362        let mut ta = GeneralizedTripleAllocator::new();
363        ta.push_triple_start();
364        ta.try_push_atom(0, |b1, b2| dt(b1, b2, "a", "b"))?;
365        ta.try_push_atom(1, |b1, b2| dt(b1, b2, "c", "d"))?;
366        ta.try_push_atom(2, |b1, b2| dt(b1, b2, "e", "f"))?;
367        assert_eq!(display(ta.top()), r#""a"^^<b> "c"^^<d> "e"^^<f>"#);
368        Ok(())
369    }
370
371    #[test]
372    fn simple_triples_pop() -> Result<(), Infallible> {
373        let mut ta = GeneralizedTripleAllocator::new();
374        ta.push_triple_start();
375        ta.try_push_atom(0, |b, _| iri(b, "a"))?;
376        ta.try_push_atom(1, |b, _| iri(b, "b"))?;
377        ta.try_push_atom(2, |b, _| iri(b, "c"))?;
378        assert_eq!(display(ta.top()), r#"<a> <b> <c>"#);
379        ta.pop_term(2);
380        ta.try_push_atom(2, |b, _| iri(b, "d"))?;
381        assert_eq!(display(ta.top()), r#"<a> <b> <d>"#);
382        ta.pop_term(2);
383        ta.pop_term(1);
384        ta.try_push_atom(1, |b, _| iri(b, "e"))?;
385        ta.try_push_atom(2, |b, _| iri(b, "f"))?;
386        assert_eq!(display(ta.top()), r#"<a> <e> <f>"#);
387        ta.pop_term(2);
388        ta.try_push_atom(2, |b, _| iri(b, "g"))?;
389        assert_eq!(display(ta.top()), r#"<a> <e> <g>"#);
390        ta.pop_term(2);
391        ta.pop_term(1);
392        ta.pop_term(0);
393        ta.try_push_atom(0, |b, _| iri(b, "h"))?;
394        ta.try_push_atom(1, |b, _| iri(b, "i"))?;
395        ta.try_push_atom(2, |b, _| iri(b, "j"))?;
396        assert_eq!(display(ta.top()), r#"<h> <i> <j>"#);
397        Ok(())
398    }
399
400    #[test]
401    fn simple_triples_stacked() -> Result<(), Infallible> {
402        let mut ta = GeneralizedTripleAllocator::new();
403        ta.push_triple_start();
404        ta.try_push_atom(0, |b, _| iri(b, "a"))?;
405        ta.try_push_atom(1, |b, _| iri(b, "b"))?;
406        ta.try_push_atom(2, |b, _| iri(b, "c"))?;
407        assert_eq!(display(ta.top()), r#"<a> <b> <c>"#);
408        ta.push_triple_start();
409        ta.try_push_atom(0, |b, _| iri(b, "d"))?;
410        ta.try_push_atom(1, |b, _| iri(b, "e"))?;
411        ta.try_push_atom(2, |b, _| iri(b, "f"))?;
412        assert_eq!(display(ta.top()), r#"<d> <e> <f>"#);
413        ta.pop_top_triple();
414        assert_eq!(display(ta.top()), r#"<a> <b> <c>"#);
415        ta.pop_top_triple();
416        assert_eq!(ta.complete_len, 0);
417        assert_eq!(ta.incomplete_len, 0);
418        Ok(())
419    }
420
421    #[test]
422    fn quoted_triple_as_subject() -> Result<(), Infallible> {
423        let mut ta = GeneralizedTripleAllocator::new();
424        ta.push_triple_start();
425        {
426            ta.push_triple_start();
427            ta.try_push_atom(0, |b, _| bn(b, "a"))?;
428            ta.try_push_atom(1, |b, _| iri(b, "b"))?;
429            ta.try_push_atom(2, |b, _| sl(b, "c"))?;
430            assert_eq!(display(ta.top()), r#"_:a <b> "c""#);
431        }
432        ta.push_quoted_triple(0);
433        ta.try_push_atom(1, |b, _| iri(b, "d"))?;
434        ta.try_push_atom(2, |b, _| sl(b, "e"))?;
435        assert_eq!(display(ta.top()), r#"<< _:a <b> "c" >> <d> "e""#);
436        Ok(())
437    }
438
439    #[test]
440    fn quoted_triple_as_predicate() -> Result<(), Infallible> {
441        let mut ta = GeneralizedTripleAllocator::new();
442        ta.push_triple_start();
443        ta.try_push_atom(0, |b, _| bn(b, "a"))?;
444        {
445            ta.push_triple_start();
446            ta.try_push_atom(0, |b, _| bn(b, "b"))?;
447            ta.try_push_atom(1, |b, _| iri(b, "c"))?;
448            ta.try_push_atom(2, |b, _| sl(b, "d"))?;
449            assert_eq!(display(ta.top()), r#"_:b <c> "d""#);
450        }
451        ta.push_quoted_triple(1);
452        ta.try_push_atom(2, |b, _| bn(b, "e"))?;
453        assert_eq!(display(ta.top()), r#"_:a << _:b <c> "d" >> _:e"#);
454        Ok(())
455    }
456
457    #[test]
458    fn quoted_triple_as_object() -> Result<(), Infallible> {
459        let mut ta = GeneralizedTripleAllocator::new();
460        ta.push_triple_start();
461        ta.try_push_atom(0, |b, _| bn(b, "a"))?;
462        ta.try_push_atom(1, |b, _| iri(b, "b"))?;
463        {
464            ta.push_triple_start();
465            ta.try_push_atom(0, |b, _| bn(b, "c"))?;
466            ta.try_push_atom(1, |b, _| iri(b, "d"))?;
467            ta.try_push_atom(2, |b, _| sl(b, "e"))?;
468            assert_eq!(display(ta.top()), r#"_:c <d> "e""#);
469        }
470        ta.push_quoted_triple(2);
471        assert_eq!(display(ta.top()), r#"_:a <b> << _:c <d> "e" >>"#);
472        Ok(())
473    }
474
475    #[test]
476    fn quoted_triple_as_both() -> Result<(), Infallible> {
477        let mut ta = GeneralizedTripleAllocator::new();
478        ta.push_triple_start();
479        {
480            ta.push_triple_start();
481            ta.try_push_atom(0, |b, _| bn(b, "a"))?;
482            ta.try_push_atom(1, |b, _| iri(b, "b"))?;
483            ta.try_push_atom(2, |b, _| sl(b, "c"))?;
484            assert_eq!(display(ta.top()), r#"_:a <b> "c""#);
485        }
486        ta.push_quoted_triple(0);
487        ta.try_push_atom(1, |b, _| iri(b, "d"))?;
488        {
489            ta.push_triple_start();
490            ta.try_push_atom(0, |b, _| bn(b, "e"))?;
491            ta.try_push_atom(1, |b, _| iri(b, "f"))?;
492            ta.try_push_atom(2, |b, _| sl(b, "g"))?;
493            assert_eq!(display(ta.top()), r#"_:e <f> "g""#);
494        }
495        ta.push_quoted_triple(2);
496        assert_eq!(
497            display(ta.top()),
498            r#"<< _:a <b> "c" >> <d> << _:e <f> "g" >>"#
499        );
500        Ok(())
501    }
502
503    #[test]
504    fn quoted_triple_deep() -> Result<(), Infallible> {
505        let mut ta = GeneralizedTripleAllocator::new();
506        ta.push_triple_start();
507        {
508            ta.push_triple_start();
509            ta.try_push_atom(0, |b, _| bn(b, "a"))?;
510            ta.try_push_atom(1, |b, _| iri(b, "b"))?;
511            {
512                ta.push_triple_start();
513                ta.try_push_atom(0, |b, _| bn(b, "c"))?;
514                ta.try_push_atom(1, |b, _| iri(b, "d"))?;
515                ta.try_push_atom(2, |b, _| sl(b, "e"))?;
516                assert_eq!(display(ta.top()), r#"_:c <d> "e""#);
517            }
518            ta.push_quoted_triple(2);
519            assert_eq!(display(ta.top()), r#"_:a <b> << _:c <d> "e" >>"#);
520        }
521        ta.push_quoted_triple(0);
522        ta.try_push_atom(1, |b, _| iri(b, "f"))?;
523        {
524            ta.push_triple_start();
525            {
526                ta.push_triple_start();
527                ta.try_push_atom(0, |b, _| bn(b, "g"))?;
528                ta.try_push_atom(1, |b, _| iri(b, "h"))?;
529                ta.try_push_atom(2, |b, _| sl(b, "i"))?;
530                assert_eq!(display(ta.top()), r#"_:g <h> "i""#);
531            }
532            ta.push_quoted_triple(0);
533            ta.try_push_atom(1, |b, _| iri(b, "j"))?;
534            ta.try_push_atom(2, |b, _| sl(b, "k"))?;
535            assert_eq!(display(ta.top()), r#"<< _:g <h> "i" >> <j> "k""#);
536        }
537        ta.push_quoted_triple(2);
538        assert_eq!(
539            display(ta.top()),
540            r#"<< _:a <b> << _:c <d> "e" >> >> <f> << << _:g <h> "i" >> <j> "k" >>"#
541        );
542
543        ta.pop_top_triple();
544        assert_eq!(ta.complete_len, 0);
545        assert_eq!(ta.incomplete_len, 0);
546        Ok(())
547    }
548
549    fn display(triple: &[GeneralizedTerm<'_>; 3]) -> String {
550        format!("{} {} {}", triple[0], triple[1], triple[2])
551    }
552}