rio_turtle/
triple_allocator.rs

1//! I define [`TripleAllocator`]
2
3#![allow(unsafe_code)]
4use crate::utils::StringBufferStack;
5use rio_api::model::*;
6use std::mem::transmute;
7
8/// A stack allocator for storing RDF and RDF-star triples.
9///
10/// # Implementation
11/// This type uses `&'static str` internally to reference text that it allocates.
12/// It therefore contains unsafe code to cheat the borrow checker,
13/// but ensures that the referenced data lives as long as the referencing struct.
14pub struct TripleAllocator {
15    incomplete_stack: Vec<Triple<'static>>,
16    incomplete_len: usize,
17    #[allow(clippy::vec_box)]
18    complete_stack: Vec<Box<Triple<'static>>>,
19    complete_len: usize,
20    string_stack: StringBufferStack,
21}
22
23impl TripleAllocator {
24    pub fn new() -> TripleAllocator {
25        TripleAllocator {
26            incomplete_stack: Vec::with_capacity(1),
27            incomplete_len: 0,
28            complete_stack: Vec::with_capacity(1),
29            complete_len: 0,
30            string_stack: StringBufferStack::with_capacity(4),
31        }
32    }
33
34    pub fn complete_len(&self) -> usize {
35        self.complete_len
36    }
37
38    pub fn incomplete_len(&self) -> usize {
39        self.incomplete_len
40    }
41
42    pub fn top(&self) -> &Triple<'_> {
43        debug_assert!(self.complete_len > 0);
44        &self.complete_stack[self.complete_len - 1]
45    }
46
47    pub fn top_quad<'s>(&'s self, graph_name: Option<GraphName<'s>>) -> Quad<'s> {
48        debug_assert!(self.complete_len > 0);
49        let Triple {
50            subject,
51            predicate,
52            object,
53        } = *self.top();
54        Quad {
55            subject,
56            predicate,
57            object,
58            graph_name,
59        }
60    }
61
62    pub fn push_triple_start(&mut self) {
63        if self.incomplete_len == self.incomplete_stack.len() {
64            self.incomplete_stack.push(Triple {
65                subject: DUMMY_IRI.into(),
66                predicate: DUMMY_IRI,
67                object: DUMMY_IRI.into(),
68            })
69        }
70        #[cfg(debug_assertions)] // to ensure that dummy() assertions work
71        {
72            self.incomplete_stack[self.incomplete_len] = Triple {
73                subject: DUMMY_IRI.into(),
74                predicate: DUMMY_IRI,
75                object: DUMMY_IRI.into(),
76            }
77        }
78        self.incomplete_len += 1;
79    }
80
81    /// Push an atomic term, produced by `subject_factory`, as the subject of the current triple.
82    ///
83    /// # Pre-condition
84    /// In standard RDF, any subject is acceptable.
85    /// In RDF-star quoted triples [`Subject::Triple`] are *not allowed*.
86    /// For adding an quoted triple, use [`TripleAllocator::push_subject_triple`] instead.
87    pub fn try_push_subject<E, F>(&mut self, subject_factory: F) -> Result<(), E>
88    where
89        F: FnOnce(&mut String) -> Result<Subject<'_>, E>,
90    {
91        debug_assert!(dummy(self.current().subject));
92        let buffer = self.string_stack.push();
93        let subject = subject_factory(buffer)?;
94        debug_assert!(matches!(
95            subject,
96            Subject::NamedNode(_) | Subject::BlankNode(_)
97        ));
98        let subject: Subject<'static> = unsafe { transmute(subject) };
99        // The unsafe code above changes the lifetime parameter of subject to `'static`.
100        // This is ok because:
101        // * we will only expose it with a shorter lifetime, and
102        // * this implementation guarantees that the pointed `str` lives as long as the subject
103        self.current().subject = subject;
104        Ok(())
105    }
106
107    /// Push an atomic term, produced by `predicate_factory`, as the predicate of the current triple.
108    pub fn try_push_predicate<E, F>(&mut self, predicate_factory: F) -> Result<(), E>
109    where
110        F: FnOnce(&mut String) -> Result<NamedNode<'_>, E>,
111    {
112        debug_assert!(!dummy(self.current().subject));
113        debug_assert!(dummy(self.current().predicate));
114        let buffer = self.string_stack.push();
115        let predicate = predicate_factory(buffer)?;
116        let predicate: NamedNode<'static> = unsafe { transmute(predicate) };
117        // The unsafe code above changes the lifetime parameter of predicate to `'static`.
118        // This is ok because:
119        // * we will only expose it with a shorter lifetime, and
120        // * this implementation guarantees that the pointed `str` lives as long as the predicate
121        self.current().predicate = predicate;
122        Ok(())
123    }
124
125    /// Push an atomic term, produced by `object_factory`, as the object of the current triple.
126    ///
127    /// # Pre-condition
128    /// In standard RDF, any object is acceptable.
129    /// In RDF-star quoted triples [`Subject::Triple`] are *not allowed*.
130    /// For adding an quoted triple, use [`TripleAllocator::push_object_triple`] instead.
131    pub fn try_push_object<E, F>(&mut self, object_factory: F) -> Result<(), E>
132    where
133        F: for<'x> FnOnce(&'x mut String, &'x mut String) -> Result<Term<'x>, E>,
134    {
135        debug_assert!(!dummy(self.current().predicate));
136        debug_assert!(dummy(self.current().object));
137        let buffers = self.string_stack.push2();
138        let object = object_factory(buffers.0, buffers.1)?;
139        debug_assert!(matches!(
140            object,
141            Term::NamedNode(_) | Term::BlankNode(_) | Term::Literal(_)
142        ));
143        let object: Term<'static> = unsafe { transmute(object) };
144        // The unsafe code above changes the lifetime parameter of object to `'static`.
145        // This is ok because:
146        // * we will only expose it with a shorter lifetime, and
147        // * this implementation guarantees that the pointed `str` lives as long as the object
148        self.complete_triple(object);
149        Ok(())
150    }
151
152    /// Use the [top](TripleAllocator::top) triple of this stash as the subject of the current triple.
153    pub fn push_subject_triple(&mut self) {
154        debug_assert!(dummy(self.current().subject));
155        debug_assert!(self.complete_len > 0);
156        let triple = &*self.complete_stack[self.complete_len - 1];
157        let triple: &'static Triple<'static> = unsafe { transmute(triple) };
158        // The unsafe code above changes the lifetime of the ref to `'static`.
159        // This is ok because:
160        // * we will only expose it with a shorter lifetime, and
161        // * this implementation guarantees that the pointed `Triple` lives as long as the `Term` embedding it
162        self.current().subject = Subject::Triple(triple);
163    }
164
165    /// Use the [top](TripleAllocator::top) triple of this stash as the object of the current triple.
166    ///
167    /// # Pre-condition
168    /// The top triple must not have been pushed already as the subject.
169    pub fn push_object_triple(&mut self) {
170        debug_assert!(!dummy(self.current().predicate));
171        debug_assert!(dummy(self.current().object));
172        debug_assert!(self.complete_len > 0);
173        let triple = &*self.complete_stack[self.complete_len - 1];
174
175        #[cfg(debug_assertions)] // the subject triple, if any, must not be top()
176        debug_assert!(
177            match self.incomplete_stack[self.incomplete_len - 1].subject {
178                Subject::Triple(s) => {
179                    let ptr_s: *const _ = s;
180                    ptr_s != triple
181                }
182                _ => true,
183            }
184        );
185
186        let triple: &'static Triple<'static> = unsafe { transmute(triple) };
187        // The unsafe code above changes the lifetime of the ref to `'static`.
188        // This is ok because:
189        // * we will only expose it with a shorter lifetime, and
190        // * this implementation guarantees that the pointed `Triple` lives as long as the `Term` embedding it
191        self.complete_triple(Term::Triple(triple));
192    }
193
194    pub fn pop_object(&mut self) {
195        debug_assert!(self.complete_len > 0);
196        self.complete_len -= 1;
197        let inc_triple = *self.complete_stack[self.complete_len];
198        if self.incomplete_len == self.incomplete_stack.len() {
199            self.incomplete_stack.push(inc_triple)
200        } else {
201            self.incomplete_stack[self.incomplete_len] = inc_triple;
202        }
203        self.incomplete_len += 1;
204
205        match inc_triple.object {
206            Term::NamedNode(_) | Term::BlankNode(_) | Term::Literal(_) => {
207                // we allocate two buffers for any atomic object, even named or blank node
208                self.string_stack.pop();
209                self.string_stack.pop()
210            }
211            Term::Triple(_) => self.pop_top_triple(),
212        }
213        #[cfg(debug_assertions)] // to ensure that dummy() assertions work
214        {
215            self.current().object = DUMMY_IRI.into();
216        }
217    }
218
219    pub fn pop_predicate(&mut self) {
220        debug_assert!(dummy(self.current().object));
221        debug_assert!(!dummy(self.current().predicate));
222        self.string_stack.pop();
223        #[cfg(debug_assertions)] // to ensure that dummy() assertions work
224        {
225            self.current().predicate = DUMMY_IRI;
226        }
227    }
228
229    pub fn pop_subject(&mut self) {
230        debug_assert!(dummy(self.current().predicate));
231        debug_assert!(!dummy(self.current().subject));
232        match self.current().subject {
233            Subject::NamedNode(_) | Subject::BlankNode(_) => self.string_stack.pop(),
234            Subject::Triple(_) => self.pop_top_triple(),
235        }
236        #[cfg(debug_assertions)] // to ensure that dummy() assertions work
237        {
238            self.current().subject = DUMMY_IRI.into();
239        }
240    }
241
242    #[inline(always)]
243    /// Pops the latest complete triple, and recursively pops all its constituent triples.
244    /// Equivalent to pop_object, pop_predicate, pop_subject, pop_empty_triple
245    pub fn pop_top_triple(&mut self) {
246        self.pop_object();
247        self.pop_predicate();
248        self.pop_subject();
249        self.incomplete_len -= 1;
250    }
251
252    /// Pops the top-most empty triple, created with push_triple_start,
253    /// but with all its components having been popped (or never pushed)
254    #[inline(always)]
255    pub fn pop_top_empty_triple(&mut self) {
256        debug_assert!(self.incomplete_len > 0);
257        debug_assert!(dummy(self.current().predicate));
258        debug_assert!(dummy(self.current().subject));
259        self.incomplete_len -= 1;
260    }
261
262    /// Pops the top-most annotation triple, i.e.
263    /// a triple on the incomplete stack with only its subject pushed,
264    /// and that subject is an quoted triple.
265    ///
266    /// The goal is to remove this triple *without* freeing the subject triple.
267    #[inline(always)]
268    pub fn pop_annotation_triple(&mut self) {
269        debug_assert!(self.incomplete_len > 0);
270        debug_assert!(dummy(self.current().predicate));
271        debug_assert!(!dummy(self.current().subject));
272        debug_assert!(matches!(self.current().subject, Subject::Triple(_)));
273        self.incomplete_len -= 1;
274    }
275
276    pub fn clear(&mut self) {
277        self.incomplete_len = 0;
278        self.incomplete_stack.clear();
279        self.complete_len = 0;
280        self.complete_stack.clear();
281        self.string_stack.clear();
282    }
283
284    fn complete_triple(&mut self, object: Term<'static>) {
285        self.incomplete_len -= 1;
286        let mut triple = self.incomplete_stack[self.incomplete_len];
287        triple.object = object;
288        if self.complete_len == self.complete_stack.len() {
289            self.complete_stack.push(Box::new(triple))
290        } else {
291            *self.complete_stack[self.complete_len] = triple;
292        }
293        self.complete_len += 1;
294    }
295
296    fn current(&mut self) -> &mut Triple<'static> {
297        debug_assert!(self.incomplete_len > 0);
298        &mut self.incomplete_stack[self.incomplete_len - 1]
299    }
300}
301
302#[cfg(debug_assertions)] // debug assertions need DUMMY to have a static address
303static DUMMY_IRI: NamedNode<'static> = NamedNode { iri: "" };
304#[cfg(not(debug_assertions))] // otherwise, a const is sufficient
305const DUMMY_IRI: NamedNode<'static> = NamedNode { iri: "" };
306
307fn dummy<'a, T: std::fmt::Debug + Into<Term<'a>>>(t: T) -> bool {
308    match t.into() {
309        Term::NamedNode(n) => {
310            let ptr_iri: *const str = n.iri;
311            ptr_iri == DUMMY_IRI.iri
312        }
313        _ => false,
314    }
315}
316
317#[cfg(test)]
318mod test {
319    use super::*;
320    use std::convert::Infallible;
321
322    #[cfg(debug_assertions)]
323    #[test]
324    fn dummy_works() {
325        assert!(dummy(DUMMY_IRI));
326        let b = "foo".to_string();
327        let n = NamedNode { iri: &b[..0] };
328        assert!(DUMMY_IRI.iri == n.iri);
329        // and yet:
330        assert!(!dummy(n));
331    }
332
333    fn iri<'a, T: From<NamedNode<'a>>>(
334        buffer: &'a mut String,
335        value: &str,
336    ) -> Result<T, Infallible> {
337        buffer.push_str(value);
338        Ok(NamedNode { iri: &*buffer }.into())
339    }
340
341    fn bn<'a, T: From<BlankNode<'a>>>(
342        buffer: &'a mut String,
343        value: &str,
344    ) -> Result<T, Infallible> {
345        buffer.push_str(value);
346        Ok(BlankNode { id: &*buffer }.into())
347    }
348
349    fn sl<'a, T: From<Literal<'a>>>(buffer: &'a mut String, value: &str) -> Result<T, Infallible> {
350        buffer.push_str(value);
351        Ok(Literal::Simple { value: &*buffer }.into())
352    }
353
354    fn lt<'a, T: From<Literal<'a>>>(
355        buffer1: &'a mut String,
356        buffer2: &'a mut String,
357        value: &str,
358        tag: &str,
359    ) -> Result<T, Infallible> {
360        buffer1.push_str(value);
361        buffer2.push_str(tag);
362        Ok(Literal::LanguageTaggedString {
363            value: &*buffer1,
364            language: &*buffer2,
365        }
366        .into())
367    }
368
369    fn dt<'a, T: From<Literal<'a>>>(
370        buffer1: &'a mut String,
371        buffer2: &'a mut String,
372        value: &str,
373        dt: &str,
374    ) -> Result<T, Infallible> {
375        buffer1.push_str(value);
376        buffer2.push_str(dt);
377        Ok(Literal::Typed {
378            value: &*buffer1,
379            datatype: NamedNode { iri: &*buffer2 },
380        }
381        .into())
382    }
383
384    #[test]
385    fn simple_triple_w_named() -> Result<(), Infallible> {
386        let mut ta = TripleAllocator::new();
387        ta.push_triple_start();
388        ta.try_push_subject(|b| iri(b, "a"))?;
389        ta.try_push_predicate(|b| iri(b, "b"))?;
390        ta.try_push_object(|b, _| iri(b, "c"))?;
391        assert_eq!(format!("{}", ta.top()), r#"<a> <b> <c>"#);
392        Ok(())
393    }
394
395    #[test]
396    fn simple_triple_w_blank() -> Result<(), Infallible> {
397        let mut ta = TripleAllocator::new();
398        ta.push_triple_start();
399        ta.try_push_subject(|b| bn(b, "a"))?;
400        ta.try_push_predicate(|b| iri(b, "b"))?;
401        ta.try_push_object(|b, _| bn(b, "c"))?;
402        assert_eq!(format!("{}", ta.top()), r#"_:a <b> _:c"#);
403        Ok(())
404    }
405
406    #[test]
407    fn simple_triple_w_simple_lit() -> Result<(), Infallible> {
408        let mut ta = TripleAllocator::new();
409        ta.push_triple_start();
410        ta.try_push_subject(|b| bn(b, "a"))?;
411        ta.try_push_predicate(|b| iri(b, "b"))?;
412        ta.try_push_object(|b, _| sl(b, "c"))?;
413        assert_eq!(format!("{}", ta.top()), r#"_:a <b> "c""#);
414        Ok(())
415    }
416
417    #[test]
418    fn simple_triple_w_lang_lit() -> Result<(), Infallible> {
419        let mut ta = TripleAllocator::new();
420        ta.push_triple_start();
421        ta.try_push_subject(|b| bn(b, "a"))?;
422        ta.try_push_predicate(|b| iri(b, "b"))?;
423        ta.try_push_object(|b1, b2| lt(b1, b2, "c", "en"))?;
424        assert_eq!(format!("{}", ta.top()), r#"_:a <b> "c"@en"#);
425        Ok(())
426    }
427
428    #[test]
429    fn simple_triple_w_typed_lit() -> Result<(), Infallible> {
430        let mut ta = TripleAllocator::new();
431        ta.push_triple_start();
432        ta.try_push_subject(|b| bn(b, "a"))?;
433        ta.try_push_predicate(|b| iri(b, "b"))?;
434        ta.try_push_object(|b1, b2| dt(b1, b2, "c", "d"))?;
435        assert_eq!(format!("{}", ta.top()), r#"_:a <b> "c"^^<d>"#);
436        Ok(())
437    }
438
439    #[test]
440    fn simple_triples_pop() -> Result<(), Infallible> {
441        let mut ta = TripleAllocator::new();
442        ta.push_triple_start();
443        ta.try_push_subject(|b| iri(b, "a"))?;
444        ta.try_push_predicate(|b| iri(b, "b"))?;
445        ta.try_push_object(|b, _| iri(b, "c"))?;
446        assert_eq!(format!("{}", ta.top()), r#"<a> <b> <c>"#);
447        ta.pop_object();
448        ta.try_push_object(|b, _| iri(b, "d"))?;
449        assert_eq!(format!("{}", ta.top()), r#"<a> <b> <d>"#);
450        ta.pop_object();
451        ta.pop_predicate();
452        ta.try_push_predicate(|b| iri(b, "e"))?;
453        ta.try_push_object(|b, _| iri(b, "f"))?;
454        assert_eq!(format!("{}", ta.top()), r#"<a> <e> <f>"#);
455        ta.pop_object();
456        ta.try_push_object(|b, _| iri(b, "g"))?;
457        assert_eq!(format!("{}", ta.top()), r#"<a> <e> <g>"#);
458        ta.pop_object();
459        ta.pop_predicate();
460        ta.pop_subject();
461        ta.try_push_subject(|b| iri(b, "h"))?;
462        ta.try_push_predicate(|b| iri(b, "i"))?;
463        ta.try_push_object(|b, _| iri(b, "j"))?;
464        assert_eq!(format!("{}", ta.top()), r#"<h> <i> <j>"#);
465        Ok(())
466    }
467
468    #[test]
469    fn simple_triples_stacked() -> Result<(), Infallible> {
470        let mut ta = TripleAllocator::new();
471        ta.push_triple_start();
472        ta.try_push_subject(|b| iri(b, "a"))?;
473        ta.try_push_predicate(|b| iri(b, "b"))?;
474        ta.try_push_object(|b, _| iri(b, "c"))?;
475        assert_eq!(format!("{}", ta.top()), r#"<a> <b> <c>"#);
476        ta.push_triple_start();
477        ta.try_push_subject(|b| iri(b, "d"))?;
478        ta.try_push_predicate(|b| iri(b, "e"))?;
479        ta.try_push_object(|b, _| iri(b, "f"))?;
480        assert_eq!(format!("{}", ta.top()), r#"<d> <e> <f>"#);
481        ta.pop_top_triple();
482        assert_eq!(format!("{}", ta.top()), r#"<a> <b> <c>"#);
483        ta.pop_top_triple();
484        assert_eq!(ta.complete_len, 0);
485        assert_eq!(ta.incomplete_len, 0);
486        Ok(())
487    }
488
489    #[test]
490    fn quoted_triple_as_subject() -> Result<(), Infallible> {
491        let mut ta = TripleAllocator::new();
492        ta.push_triple_start();
493        {
494            ta.push_triple_start();
495            ta.try_push_subject(|b| bn(b, "a"))?;
496            ta.try_push_predicate(|b| iri(b, "b"))?;
497            ta.try_push_object(|b, _| sl(b, "c"))?;
498            assert_eq!(format!("{}", ta.top()), r#"_:a <b> "c""#);
499        }
500        ta.push_subject_triple();
501        ta.try_push_predicate(|b| iri(b, "d"))?;
502        ta.try_push_object(|b, _| sl(b, "e"))?;
503        assert_eq!(format!("{}", ta.top()), r#"<< _:a <b> "c" >> <d> "e""#);
504        Ok(())
505    }
506
507    #[test]
508    fn quoted_triple_as_object() -> Result<(), Infallible> {
509        let mut ta = TripleAllocator::new();
510        ta.push_triple_start();
511        ta.try_push_subject(|b| bn(b, "a"))?;
512        ta.try_push_predicate(|b| iri(b, "b"))?;
513        {
514            ta.push_triple_start();
515            ta.try_push_subject(|b| bn(b, "c"))?;
516            ta.try_push_predicate(|b| iri(b, "d"))?;
517            ta.try_push_object(|b, _| sl(b, "e"))?;
518            assert_eq!(format!("{}", ta.top()), r#"_:c <d> "e""#);
519        }
520        ta.push_object_triple();
521        assert_eq!(format!("{}", ta.top()), r#"_:a <b> << _:c <d> "e" >>"#);
522        Ok(())
523    }
524
525    #[test]
526    fn quoted_triple_as_both() -> Result<(), Infallible> {
527        let mut ta = TripleAllocator::new();
528        ta.push_triple_start();
529        {
530            ta.push_triple_start();
531            ta.try_push_subject(|b| bn(b, "a"))?;
532            ta.try_push_predicate(|b| iri(b, "b"))?;
533            ta.try_push_object(|b, _| sl(b, "c"))?;
534            assert_eq!(format!("{}", ta.top()), r#"_:a <b> "c""#);
535        }
536        ta.push_subject_triple();
537        ta.try_push_predicate(|b| iri(b, "d"))?;
538        {
539            ta.push_triple_start();
540            ta.try_push_subject(|b| bn(b, "e"))?;
541            ta.try_push_predicate(|b| iri(b, "f"))?;
542            ta.try_push_object(|b, _| sl(b, "g"))?;
543            assert_eq!(format!("{}", ta.top()), r#"_:e <f> "g""#);
544        }
545        ta.push_object_triple();
546        assert_eq!(
547            format!("{}", ta.top()),
548            r#"<< _:a <b> "c" >> <d> << _:e <f> "g" >>"#
549        );
550        Ok(())
551    }
552
553    #[test]
554    fn quoted_triple_deep() -> Result<(), Infallible> {
555        let mut ta = TripleAllocator::new();
556        ta.push_triple_start();
557        {
558            ta.push_triple_start();
559            ta.try_push_subject(|b| bn(b, "a"))?;
560            ta.try_push_predicate(|b| iri(b, "b"))?;
561            {
562                ta.push_triple_start();
563                ta.try_push_subject(|b| bn(b, "c"))?;
564                ta.try_push_predicate(|b| iri(b, "d"))?;
565                ta.try_push_object(|b, _| sl(b, "e"))?;
566                assert_eq!(format!("{}", ta.top()), r#"_:c <d> "e""#);
567            }
568            ta.push_object_triple();
569            assert_eq!(format!("{}", ta.top()), r#"_:a <b> << _:c <d> "e" >>"#);
570        }
571        ta.push_subject_triple();
572        ta.try_push_predicate(|b| iri(b, "f"))?;
573        {
574            ta.push_triple_start();
575            {
576                ta.push_triple_start();
577                ta.try_push_subject(|b| bn(b, "g"))?;
578                ta.try_push_predicate(|b| iri(b, "h"))?;
579                ta.try_push_object(|b, _| sl(b, "i"))?;
580                assert_eq!(format!("{}", ta.top()), r#"_:g <h> "i""#);
581            }
582            ta.push_subject_triple();
583            ta.try_push_predicate(|b| iri(b, "j"))?;
584            ta.try_push_object(|b, _| sl(b, "k"))?;
585            assert_eq!(format!("{}", ta.top()), r#"<< _:g <h> "i" >> <j> "k""#);
586        }
587        ta.push_object_triple();
588        assert_eq!(
589            format!("{}", ta.top()),
590            r#"<< _:a <b> << _:c <d> "e" >> >> <f> << << _:g <h> "i" >> <j> "k" >>"#
591        );
592
593        ta.pop_top_triple();
594        assert_eq!(ta.complete_len, 0);
595        assert_eq!(ta.incomplete_len, 0);
596        Ok(())
597    }
598}