1use crate::interning::*;
33use crate::*;
34use std::collections::hash_map::DefaultHasher;
35use std::collections::{BTreeSet, HashMap, HashSet};
36use std::fmt;
37use std::hash::{Hash, Hasher};
38
39#[derive(Debug, Default, Clone)]
67pub struct Dataset {
68 interner: Interner,
69 gspo: BTreeSet<(
70 InternedGraphName,
71 InternedSubject,
72 InternedNamedNode,
73 InternedTerm,
74 )>,
75 gpos: BTreeSet<(
76 InternedGraphName,
77 InternedNamedNode,
78 InternedTerm,
79 InternedSubject,
80 )>,
81 gosp: BTreeSet<(
82 InternedGraphName,
83 InternedTerm,
84 InternedSubject,
85 InternedNamedNode,
86 )>,
87 spog: BTreeSet<(
88 InternedSubject,
89 InternedNamedNode,
90 InternedTerm,
91 InternedGraphName,
92 )>,
93 posg: BTreeSet<(
94 InternedNamedNode,
95 InternedTerm,
96 InternedSubject,
97 InternedGraphName,
98 )>,
99 ospg: BTreeSet<(
100 InternedTerm,
101 InternedSubject,
102 InternedNamedNode,
103 InternedGraphName,
104 )>,
105}
106
107impl Dataset {
108 pub fn new() -> Self {
110 Self::default()
111 }
112
113 pub fn graph<'a, 'b>(&'a self, graph_name: impl Into<GraphNameRef<'b>>) -> GraphView<'a> {
127 let graph_name = self
128 .encoded_graph_name(graph_name)
129 .unwrap_or_else(InternedGraphName::impossible);
130 GraphView {
131 dataset: self,
132 graph_name,
133 }
134 }
135
136 pub fn graph_mut<'a, 'b>(
158 &'a mut self,
159 graph_name: impl Into<GraphNameRef<'b>>,
160 ) -> GraphViewMut<'a> {
161 let graph_name = InternedGraphName::encoded_into(graph_name.into(), &mut self.interner);
162 GraphViewMut {
163 dataset: self,
164 graph_name,
165 }
166 }
167
168 pub fn iter(&self) -> Iter<'_> {
170 let iter = self.spog.iter();
171 Iter {
172 dataset: self,
173 inner: iter,
174 }
175 }
176
177 pub fn quads_for_subject<'a, 'b>(
178 &'a self,
179 subject: impl Into<SubjectRef<'b>>,
180 ) -> impl Iterator<Item = QuadRef<'a>> + 'a {
181 let subject = self
182 .encoded_subject(subject)
183 .unwrap_or_else(InternedSubject::impossible);
184 self.interned_quads_for_subject(&subject)
185 .map(move |q| self.decode_spog(q))
186 }
187
188 #[allow(clippy::map_identity)]
189 fn interned_quads_for_subject(
190 &self,
191 subject: &InternedSubject,
192 ) -> impl Iterator<
193 Item = (
194 &InternedSubject,
195 &InternedNamedNode,
196 &InternedTerm,
197 &InternedGraphName,
198 ),
199 > + '_ {
200 self.spog
201 .range(
202 &(
203 subject.clone(),
204 InternedNamedNode::first(),
205 InternedTerm::first(),
206 InternedGraphName::first(),
207 )
208 ..&(
209 subject.next(),
210 InternedNamedNode::first(),
211 InternedTerm::first(),
212 InternedGraphName::first(),
213 ),
214 )
215 .map(|(s, p, o, g)| (s, p, o, g))
216 }
217
218 pub fn quads_for_predicate<'a, 'b>(
219 &'a self,
220 predicate: impl Into<NamedNodeRef<'b>>,
221 ) -> impl Iterator<Item = QuadRef<'a>> + 'a {
222 let predicate = self
223 .encoded_named_node(predicate)
224 .unwrap_or_else(InternedNamedNode::impossible);
225 self.interned_quads_for_predicate(predicate)
226 .map(move |q| self.decode_spog(q))
227 }
228
229 fn interned_quads_for_predicate(
230 &self,
231 predicate: InternedNamedNode,
232 ) -> impl Iterator<
233 Item = (
234 &InternedSubject,
235 &InternedNamedNode,
236 &InternedTerm,
237 &InternedGraphName,
238 ),
239 > + '_ {
240 self.posg
241 .range(
242 &(
243 predicate,
244 InternedTerm::first(),
245 InternedSubject::first(),
246 InternedGraphName::first(),
247 )
248 ..&(
249 predicate.next(),
250 InternedTerm::first(),
251 InternedSubject::first(),
252 InternedGraphName::first(),
253 ),
254 )
255 .map(|(p, o, s, g)| (s, p, o, g))
256 }
257
258 pub fn quads_for_object<'a, 'b>(
259 &'a self,
260 object: impl Into<TermRef<'b>>,
261 ) -> impl Iterator<Item = QuadRef<'a>> + 'a {
262 let object = self
263 .encoded_term(object)
264 .unwrap_or_else(InternedTerm::impossible);
265
266 self.interned_quads_for_object(&object)
267 .map(move |q| self.decode_spog(q))
268 }
269
270 fn interned_quads_for_object(
271 &self,
272 object: &InternedTerm,
273 ) -> impl Iterator<
274 Item = (
275 &InternedSubject,
276 &InternedNamedNode,
277 &InternedTerm,
278 &InternedGraphName,
279 ),
280 > + '_ {
281 self.ospg
282 .range(
283 &(
284 object.clone(),
285 InternedSubject::first(),
286 InternedNamedNode::first(),
287 InternedGraphName::first(),
288 )
289 ..&(
290 object.next(),
291 InternedSubject::first(),
292 InternedNamedNode::first(),
293 InternedGraphName::first(),
294 ),
295 )
296 .map(|(o, s, p, g)| (s, p, o, g))
297 }
298
299 pub fn quads_for_graph_name<'a, 'b>(
300 &'a self,
301 graph_name: impl Into<GraphNameRef<'b>>,
302 ) -> impl Iterator<Item = QuadRef<'a>> + 'a {
303 let graph_name = self
304 .encoded_graph_name(graph_name)
305 .unwrap_or_else(InternedGraphName::impossible);
306
307 self.interned_quads_for_graph_name(&graph_name)
308 .map(move |q| self.decode_spog(q))
309 }
310
311 fn interned_quads_for_graph_name(
312 &self,
313 graph_name: &InternedGraphName,
314 ) -> impl Iterator<
315 Item = (
316 &InternedSubject,
317 &InternedNamedNode,
318 &InternedTerm,
319 &InternedGraphName,
320 ),
321 > + '_ {
322 self.gspo
323 .range(
324 &(
325 graph_name.clone(),
326 InternedSubject::first(),
327 InternedNamedNode::first(),
328 InternedTerm::first(),
329 )
330 ..&(
331 graph_name.next(),
332 InternedSubject::first(),
333 InternedNamedNode::first(),
334 InternedTerm::first(),
335 ),
336 )
337 .map(|(g, s, p, o)| (s, p, o, g))
338 }
339
340 pub fn contains<'a>(&self, quad: impl Into<QuadRef<'a>>) -> bool {
342 if let Some(q) = self.encoded_quad(quad.into()) {
343 self.spog.contains(&q)
344 } else {
345 false
346 }
347 }
348
349 pub fn len(&self) -> usize {
351 self.gspo.len()
352 }
353
354 pub fn is_empty(&self) -> bool {
356 self.gspo.is_empty()
357 }
358
359 pub fn insert<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> bool {
361 let quad = self.encode_quad(quad.into());
362 self.insert_encoded(quad)
363 }
364
365 fn insert_encoded(
366 &mut self,
367 quad: (
368 InternedSubject,
369 InternedNamedNode,
370 InternedTerm,
371 InternedGraphName,
372 ),
373 ) -> bool {
374 let (s, p, o, g) = quad;
375 self.gspo.insert((g.clone(), s.clone(), p, o.clone()));
376 self.gpos.insert((g.clone(), p, o.clone(), s.clone()));
377 self.gosp.insert((g.clone(), o.clone(), s.clone(), p));
378 self.spog.insert((s.clone(), p, o.clone(), g.clone()));
379 self.posg.insert((p, o.clone(), s.clone(), g.clone()));
380 self.ospg.insert((o, s, p, g))
381 }
382
383 pub fn remove<'a>(&mut self, quad: impl Into<QuadRef<'a>>) -> bool {
385 if let Some(quad) = self.encoded_quad(quad.into()) {
386 self.remove_encoded(quad)
387 } else {
388 false
389 }
390 }
391
392 fn remove_encoded(
393 &mut self,
394 quad: (
395 InternedSubject,
396 InternedNamedNode,
397 InternedTerm,
398 InternedGraphName,
399 ),
400 ) -> bool {
401 let (s, p, o, g) = quad;
402 self.gspo.remove(&(g.clone(), s.clone(), p, o.clone()));
403 self.gpos.remove(&(g.clone(), p, o.clone(), s.clone()));
404 self.gosp.remove(&(g.clone(), o.clone(), s.clone(), p));
405 self.spog.remove(&(s.clone(), p, o.clone(), g.clone()));
406 self.posg.remove(&(p, o.clone(), s.clone(), g.clone()));
407 self.ospg.remove(&(o, s, p, g))
408 }
409
410 pub fn clear(&mut self) {
412 self.gspo.clear();
413 self.gpos.clear();
414 self.gosp.clear();
415 self.spog.clear();
416 self.posg.clear();
417 self.ospg.clear();
418 }
419
420 fn encode_quad(
421 &mut self,
422 quad: QuadRef<'_>,
423 ) -> (
424 InternedSubject,
425 InternedNamedNode,
426 InternedTerm,
427 InternedGraphName,
428 ) {
429 (
430 InternedSubject::encoded_into(quad.subject, &mut self.interner),
431 InternedNamedNode::encoded_into(quad.predicate, &mut self.interner),
432 InternedTerm::encoded_into(quad.object, &mut self.interner),
433 InternedGraphName::encoded_into(quad.graph_name, &mut self.interner),
434 )
435 }
436
437 fn encoded_quad(
438 &self,
439 quad: QuadRef<'_>,
440 ) -> Option<(
441 InternedSubject,
442 InternedNamedNode,
443 InternedTerm,
444 InternedGraphName,
445 )> {
446 Some((
447 self.encoded_subject(quad.subject)?,
448 self.encoded_named_node(quad.predicate)?,
449 self.encoded_term(quad.object)?,
450 self.encoded_graph_name(quad.graph_name)?,
451 ))
452 }
453
454 pub(super) fn encoded_named_node<'a>(
455 &self,
456 node: impl Into<NamedNodeRef<'a>>,
457 ) -> Option<InternedNamedNode> {
458 InternedNamedNode::encoded_from(node.into(), &self.interner)
459 }
460
461 pub(super) fn encoded_subject<'a>(
462 &self,
463 node: impl Into<SubjectRef<'a>>,
464 ) -> Option<InternedSubject> {
465 InternedSubject::encoded_from(node.into(), &self.interner)
466 }
467
468 pub(super) fn encoded_term<'a>(&self, term: impl Into<TermRef<'a>>) -> Option<InternedTerm> {
469 InternedTerm::encoded_from(term.into(), &self.interner)
470 }
471
472 pub(super) fn encoded_graph_name<'a>(
473 &self,
474 graph_name: impl Into<GraphNameRef<'a>>,
475 ) -> Option<InternedGraphName> {
476 InternedGraphName::encoded_from(graph_name.into(), &self.interner)
477 }
478
479 fn decode_spog(
480 &self,
481 quad: (
482 &InternedSubject,
483 &InternedNamedNode,
484 &InternedTerm,
485 &InternedGraphName,
486 ),
487 ) -> QuadRef<'_> {
488 QuadRef {
489 subject: quad.0.decode_from(&self.interner),
490 predicate: quad.1.decode_from(&self.interner),
491 object: quad.2.decode_from(&self.interner),
492 graph_name: quad.3.decode_from(&self.interner),
493 }
494 }
495
496 fn decode_spo(
497 &self,
498 triple: (&InternedSubject, &InternedNamedNode, &InternedTerm),
499 ) -> TripleRef<'_> {
500 TripleRef {
501 subject: triple.0.decode_from(&self.interner),
502 predicate: triple.1.decode_from(&self.interner),
503 object: triple.2.decode_from(&self.interner),
504 }
505 }
506
507 pub fn canonicalize(&mut self, algorithm: CanonicalizationAlgorithm) {
540 let bnode_mapping = self.canonicalize_interned_blank_nodes(algorithm);
541 let new_quads = self.map_blank_nodes(&bnode_mapping);
542 self.clear();
543 for quad in new_quads {
544 self.insert_encoded(quad);
545 }
546 }
547
548 pub fn canonicalize_blank_nodes(
553 &self,
554 algorithm: CanonicalizationAlgorithm,
555 ) -> HashMap<BlankNodeRef<'_>, BlankNode> {
556 self.canonicalize_interned_blank_nodes(algorithm)
557 .into_iter()
558 .map(|(from, to)| (from.decode_from(&self.interner), to))
559 .collect()
560 }
561
562 fn canonicalize_interned_blank_nodes(
563 &self,
564 algorithm: CanonicalizationAlgorithm,
565 ) -> HashMap<InternedBlankNode, BlankNode> {
566 match algorithm {
567 CanonicalizationAlgorithm::Unstable => {
568 let bnodes = self.blank_nodes();
569 let quads_per_blank_node = self.quads_per_blank_nodes();
570 let (hash, partition) = self.hash_bnodes(
571 bnodes.into_iter().map(|bnode| (bnode, 0)).collect(),
572 &quads_per_blank_node,
573 );
574 self.distinguish(hash, &partition, &quads_per_blank_node)
575 .into_iter()
576 .map(|(from, to)| (from, BlankNode::new_from_unique_id(to.into())))
577 .collect()
578 }
579 }
580 }
581
582 fn blank_nodes(&self) -> HashSet<InternedBlankNode> {
583 let mut bnodes = HashSet::new();
584 for (g, s, _, o) in &self.gspo {
585 if let InternedSubject::BlankNode(bnode) = s {
586 bnodes.insert(*bnode);
587 }
588 #[cfg(feature = "rdf-star")]
589 if let InternedSubject::Triple(triple) = s {
590 Self::triple_blank_nodes(triple, &mut bnodes);
591 }
592 if let InternedTerm::BlankNode(bnode) = o {
593 bnodes.insert(*bnode);
594 }
595 #[cfg(feature = "rdf-star")]
596 if let InternedTerm::Triple(triple) = o {
597 Self::triple_blank_nodes(triple, &mut bnodes);
598 }
599 if let InternedGraphName::BlankNode(bnode) = g {
600 bnodes.insert(*bnode);
601 }
602 }
603 bnodes
604 }
605
606 #[cfg(feature = "rdf-star")]
607 fn triple_blank_nodes(triple: &InternedTriple, bnodes: &mut HashSet<InternedBlankNode>) {
608 if let InternedSubject::BlankNode(bnode) = &triple.subject {
609 bnodes.insert(*bnode);
610 } else if let InternedSubject::Triple(t) = &triple.subject {
611 Self::triple_blank_nodes(t, bnodes);
612 }
613 if let InternedTerm::BlankNode(bnode) = &triple.object {
614 bnodes.insert(*bnode);
615 } else if let InternedTerm::Triple(t) = &triple.object {
616 Self::triple_blank_nodes(t, bnodes);
617 }
618 }
619
620 fn quads_per_blank_nodes(&self) -> QuadsPerBlankNode {
621 let mut map: HashMap<_, Vec<_>> = HashMap::new();
622 for quad in &self.spog {
623 if let InternedSubject::BlankNode(bnode) = &quad.0 {
624 map.entry(*bnode).or_default().push(quad.clone());
625 }
626 #[cfg(feature = "rdf-star")]
627 if let InternedSubject::Triple(t) = &quad.0 {
628 Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
629 }
630 if let InternedTerm::BlankNode(bnode) = &quad.2 {
631 map.entry(*bnode).or_default().push(quad.clone());
632 }
633 #[cfg(feature = "rdf-star")]
634 if let InternedTerm::Triple(t) = &quad.2 {
635 Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, &mut map);
636 }
637 if let InternedGraphName::BlankNode(bnode) = &quad.3 {
638 map.entry(*bnode).or_default().push(quad.clone());
639 }
640 }
641 map
642 }
643
644 #[cfg(feature = "rdf-star")]
645 fn add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(
646 quad: &(
647 InternedSubject,
648 InternedNamedNode,
649 InternedTerm,
650 InternedGraphName,
651 ),
652 triple: &InternedTriple,
653 map: &mut QuadsPerBlankNode,
654 ) {
655 if let InternedSubject::BlankNode(bnode) = &triple.subject {
656 map.entry(*bnode).or_default().push(quad.clone());
657 }
658 if let InternedSubject::Triple(t) = &triple.subject {
659 Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
660 }
661 if let InternedTerm::BlankNode(bnode) = &triple.object {
662 map.entry(*bnode).or_default().push(quad.clone());
663 }
664 if let InternedTerm::Triple(t) = &triple.object {
665 Self::add_quad_with_quoted_triple_to_quad_per_blank_nodes_map(quad, t, map);
666 }
667 }
668
669 fn hash_bnodes(
670 &self,
671 mut hashes: HashMap<InternedBlankNode, u64>,
672 quads_per_blank_node: &QuadsPerBlankNode,
673 ) -> (
674 HashMap<InternedBlankNode, u64>,
675 Vec<(u64, Vec<InternedBlankNode>)>,
676 ) {
677 let mut to_hash = Vec::new();
678 let mut to_do = hashes
679 .keys()
680 .map(|bnode| (*bnode, true))
681 .collect::<HashMap<_, _>>();
682 let mut partition = HashMap::<_, Vec<_>>::with_capacity(hashes.len());
683 let mut old_partition_count = usize::MAX;
684 while old_partition_count != partition.len() {
685 old_partition_count = partition.len();
686 partition.clear();
687 let mut new_hashes = hashes.clone();
688 for bnode in hashes.keys() {
689 let hash = if to_do.contains_key(bnode) {
690 for (s, p, o, g) in &quads_per_blank_node[bnode] {
691 to_hash.push((
692 self.hash_subject(s, *bnode, &hashes),
693 self.hash_named_node(*p),
694 self.hash_term(o, *bnode, &hashes),
695 self.hash_graph_name(g, *bnode, &hashes),
696 ));
697 }
698 to_hash.sort_unstable();
699 let hash = Self::hash_tuple((&to_hash, hashes[bnode]));
700 to_hash.clear();
701 if hash == hashes[bnode] {
702 to_do.insert(*bnode, false);
703 } else {
704 new_hashes.insert(*bnode, hash);
705 }
706 hash
707 } else {
708 hashes[bnode]
709 };
710 partition.entry(hash).or_default().push(*bnode);
711 }
712 hashes = new_hashes;
713 }
714 let mut partition: Vec<_> = partition.into_iter().collect();
715 partition.sort_unstable_by(|(h1, b1), (h2, b2)| (b1.len(), h1).cmp(&(b2.len(), h2)));
716 (hashes, partition)
717 }
718
719 fn hash_named_node(&self, node: InternedNamedNode) -> u64 {
720 Self::hash_tuple(node.decode_from(&self.interner))
721 }
722
723 fn hash_blank_node(
724 node: InternedBlankNode,
725 current_blank_node: InternedBlankNode,
726 bnodes_hash: &HashMap<InternedBlankNode, u64>,
727 ) -> u64 {
728 if node == current_blank_node {
729 u64::MAX
730 } else {
731 bnodes_hash[&node]
732 }
733 }
734
735 fn hash_subject(
736 &self,
737 node: &InternedSubject,
738 current_blank_node: InternedBlankNode,
739 bnodes_hash: &HashMap<InternedBlankNode, u64>,
740 ) -> u64 {
741 match node {
742 InternedSubject::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)),
743 InternedSubject::BlankNode(bnode) => {
744 Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
745 }
746 #[cfg(feature = "rdf-star")]
747 InternedSubject::Triple(triple) => {
748 self.hash_triple(triple, current_blank_node, bnodes_hash)
749 }
750 }
751 }
752
753 fn hash_term(
754 &self,
755 term: &InternedTerm,
756 current_blank_node: InternedBlankNode,
757 bnodes_hash: &HashMap<InternedBlankNode, u64>,
758 ) -> u64 {
759 match term {
760 InternedTerm::NamedNode(node) => Self::hash_tuple(node.decode_from(&self.interner)),
761 InternedTerm::BlankNode(bnode) => {
762 Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
763 }
764 InternedTerm::Literal(literal) => Self::hash_tuple(literal.decode_from(&self.interner)),
765 #[cfg(feature = "rdf-star")]
766 InternedTerm::Triple(triple) => {
767 self.hash_triple(triple, current_blank_node, bnodes_hash)
768 }
769 }
770 }
771
772 fn hash_graph_name(
773 &self,
774 graph_name: &InternedGraphName,
775 current_blank_node: InternedBlankNode,
776 bnodes_hash: &HashMap<InternedBlankNode, u64>,
777 ) -> u64 {
778 match graph_name {
779 InternedGraphName::NamedNode(node) => {
780 Self::hash_tuple(node.decode_from(&self.interner))
781 }
782 InternedGraphName::BlankNode(bnode) => {
783 Self::hash_blank_node(*bnode, current_blank_node, bnodes_hash)
784 }
785 InternedGraphName::DefaultGraph => 0,
786 }
787 }
788
789 #[cfg(feature = "rdf-star")]
790 fn hash_triple(
791 &self,
792 triple: &InternedTriple,
793 current_blank_node: InternedBlankNode,
794 bnodes_hash: &HashMap<InternedBlankNode, u64>,
795 ) -> u64 {
796 Self::hash_tuple((
797 self.hash_subject(&triple.subject, current_blank_node, bnodes_hash),
798 self.hash_named_node(triple.predicate),
799 self.hash_term(&triple.object, current_blank_node, bnodes_hash),
800 ))
801 }
802
803 fn hash_tuple(v: impl Hash) -> u64 {
804 let mut hasher = DefaultHasher::new();
805 v.hash(&mut hasher);
806 hasher.finish()
807 }
808
809 fn distinguish(
810 &self,
811 hash: HashMap<InternedBlankNode, u64>,
812 partition: &[(u64, Vec<InternedBlankNode>)],
813 quads_per_blank_node: &QuadsPerBlankNode,
814 ) -> HashMap<InternedBlankNode, u64> {
815 let b_prime = partition.iter().map(|(_, b)| b).find(|b| b.len() > 1);
816 if let Some(b_prime) = b_prime {
817 b_prime
818 .iter()
819 .map(|b| {
820 let mut hash_prime = hash.clone();
821 hash_prime.insert(*b, Self::hash_tuple((hash_prime[b], 22)));
822 let (hash_prime_prime, partition_prime) =
823 self.hash_bnodes(hash_prime, quads_per_blank_node);
824 self.distinguish(hash_prime_prime, &partition_prime, quads_per_blank_node)
825 })
826 .reduce(|a, b| {
827 let mut a_hashes = a.values().collect::<Vec<_>>();
828 a_hashes.sort();
829 let mut b_hashes = a.values().collect::<Vec<_>>();
830 b_hashes.sort();
831 if a_hashes <= b_hashes {
832 a
833 } else {
834 b
835 }
836 })
837 .unwrap_or_default()
838 } else {
839 hash
840 }
841 }
842
843 #[allow(clippy::needless_collect)]
844 fn map_blank_nodes(
845 &mut self,
846 bnode_mapping: &HashMap<InternedBlankNode, BlankNode>,
847 ) -> Vec<(
848 InternedSubject,
849 InternedNamedNode,
850 InternedTerm,
851 InternedGraphName,
852 )> {
853 let old_quads: Vec<_> = self.spog.iter().cloned().collect();
854 old_quads
855 .into_iter()
856 .map(|(s, p, o, g)| {
857 (
858 match s {
859 InternedSubject::NamedNode(_) => s,
860 InternedSubject::BlankNode(bnode) => {
861 InternedSubject::BlankNode(InternedBlankNode::encoded_into(
862 bnode_mapping[&bnode].as_ref(),
863 &mut self.interner,
864 ))
865 }
866 #[cfg(feature = "rdf-star")]
867 InternedSubject::Triple(triple) => {
868 InternedSubject::Triple(Box::new(InternedTriple::encoded_into(
869 self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(),
870 &mut self.interner,
871 )))
872 }
873 },
874 p,
875 match o {
876 InternedTerm::NamedNode(_) | InternedTerm::Literal(_) => o,
877 InternedTerm::BlankNode(bnode) => {
878 InternedTerm::BlankNode(InternedBlankNode::encoded_into(
879 bnode_mapping[&bnode].as_ref(),
880 &mut self.interner,
881 ))
882 }
883 #[cfg(feature = "rdf-star")]
884 InternedTerm::Triple(triple) => {
885 InternedTerm::Triple(Box::new(InternedTriple::encoded_into(
886 self.map_triple_blank_nodes(&triple, bnode_mapping).as_ref(),
887 &mut self.interner,
888 )))
889 }
890 },
891 match g {
892 InternedGraphName::NamedNode(_) | InternedGraphName::DefaultGraph => g,
893 InternedGraphName::BlankNode(bnode) => {
894 InternedGraphName::BlankNode(InternedBlankNode::encoded_into(
895 bnode_mapping[&bnode].as_ref(),
896 &mut self.interner,
897 ))
898 }
899 },
900 )
901 })
902 .collect()
903 }
904
905 #[cfg(feature = "rdf-star")]
906 fn map_triple_blank_nodes(
907 &mut self,
908 triple: &InternedTriple,
909 bnode_mapping: &HashMap<InternedBlankNode, BlankNode>,
910 ) -> Triple {
911 Triple {
912 subject: if let InternedSubject::BlankNode(bnode) = &triple.subject {
913 bnode_mapping[bnode].clone().into()
914 } else if let InternedSubject::Triple(t) = &triple.subject {
915 self.map_triple_blank_nodes(t, bnode_mapping).into()
916 } else {
917 triple.subject.decode_from(&self.interner).into_owned()
918 },
919 predicate: triple.predicate.decode_from(&self.interner).into_owned(),
920 object: if let InternedTerm::BlankNode(bnode) = &triple.object {
921 bnode_mapping[bnode].clone().into()
922 } else if let InternedTerm::Triple(t) = &triple.object {
923 self.map_triple_blank_nodes(t, bnode_mapping).into()
924 } else {
925 triple.object.decode_from(&self.interner).into_owned()
926 },
927 }
928 }
929}
930
931impl PartialEq for Dataset {
932 fn eq(&self, other: &Self) -> bool {
933 if self.len() != other.len() {
934 return false;
935 }
936 for q in self {
937 if !other.contains(q) {
938 return false;
939 }
940 }
941 true
942 }
943}
944
945impl Eq for Dataset {}
946
947impl<'a> IntoIterator for &'a Dataset {
948 type Item = QuadRef<'a>;
949 type IntoIter = Iter<'a>;
950
951 fn into_iter(self) -> Self::IntoIter {
952 self.iter()
953 }
954}
955
956impl FromIterator<Quad> for Dataset {
957 fn from_iter<I: IntoIterator<Item = Quad>>(iter: I) -> Self {
958 let mut g = Self::new();
959 g.extend(iter);
960 g
961 }
962}
963
964impl<'a, T: Into<QuadRef<'a>>> FromIterator<T> for Dataset {
965 fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
966 let mut g = Self::new();
967 g.extend(iter);
968 g
969 }
970}
971
972impl Extend<Quad> for Dataset {
973 fn extend<I: IntoIterator<Item = Quad>>(&mut self, iter: I) {
974 for t in iter {
975 self.insert(&t);
976 }
977 }
978}
979
980impl<'a, T: Into<QuadRef<'a>>> Extend<T> for Dataset {
981 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
982 for t in iter {
983 self.insert(t);
984 }
985 }
986}
987
988impl fmt::Display for Dataset {
989 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
990 for t in self {
991 writeln!(f, "{t} .")?;
992 }
993 Ok(())
994 }
995}
996
997#[derive(Clone, Debug)]
1014pub struct GraphView<'a> {
1015 dataset: &'a Dataset,
1016 graph_name: InternedGraphName,
1017}
1018
1019impl<'a> GraphView<'a> {
1020 pub fn iter(&self) -> GraphViewIter<'a> {
1022 let iter = self.dataset.gspo.range(
1023 &(
1024 self.graph_name.clone(),
1025 InternedSubject::first(),
1026 InternedNamedNode::first(),
1027 InternedTerm::first(),
1028 )
1029 ..&(
1030 self.graph_name.next(),
1031 InternedSubject::first(),
1032 InternedNamedNode::first(),
1033 InternedTerm::first(),
1034 ),
1035 );
1036 GraphViewIter {
1037 dataset: self.dataset,
1038 inner: iter,
1039 }
1040 }
1041
1042 pub fn triples_for_subject<'b>(
1043 &self,
1044 subject: impl Into<SubjectRef<'b>>,
1045 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1046 self.triples_for_interned_subject(self.dataset.encoded_subject(subject))
1047 }
1048
1049 pub(super) fn triples_for_interned_subject(
1050 &self,
1051 subject: Option<InternedSubject>,
1052 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1053 let subject = subject.unwrap_or_else(InternedSubject::impossible);
1054 let ds = self.dataset;
1055 self.dataset
1056 .gspo
1057 .range(
1058 &(
1059 self.graph_name.clone(),
1060 subject.clone(),
1061 InternedNamedNode::first(),
1062 InternedTerm::first(),
1063 )
1064 ..&(
1065 self.graph_name.clone(),
1066 subject.next(),
1067 InternedNamedNode::first(),
1068 InternedTerm::first(),
1069 ),
1070 )
1071 .map(move |q| {
1072 let (_, s, p, o) = q;
1073 ds.decode_spo((s, p, o))
1074 })
1075 }
1076
1077 pub fn objects_for_subject_predicate<'b>(
1078 &self,
1079 subject: impl Into<SubjectRef<'b>>,
1080 predicate: impl Into<NamedNodeRef<'b>>,
1081 ) -> impl Iterator<Item = TermRef<'a>> + 'a {
1082 self.objects_for_interned_subject_predicate(
1083 self.dataset.encoded_subject(subject),
1084 self.dataset.encoded_named_node(predicate),
1085 )
1086 }
1087
1088 pub(super) fn objects_for_interned_subject_predicate(
1089 &self,
1090 subject: Option<InternedSubject>,
1091 predicate: Option<InternedNamedNode>,
1092 ) -> impl Iterator<Item = TermRef<'a>> + 'a {
1093 let subject = subject.unwrap_or_else(InternedSubject::impossible);
1094 let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible);
1095 let ds = self.dataset;
1096 self.dataset
1097 .gspo
1098 .range(
1099 &(
1100 self.graph_name.clone(),
1101 subject.clone(),
1102 predicate,
1103 InternedTerm::first(),
1104 )
1105 ..&(
1106 self.graph_name.clone(),
1107 subject,
1108 predicate.next(),
1109 InternedTerm::first(),
1110 ),
1111 )
1112 .map(move |q| q.3.decode_from(&ds.interner))
1113 }
1114
1115 pub fn object_for_subject_predicate<'b>(
1116 &self,
1117 subject: impl Into<SubjectRef<'b>>,
1118 predicate: impl Into<NamedNodeRef<'b>>,
1119 ) -> Option<TermRef<'a>> {
1120 self.objects_for_subject_predicate(subject, predicate)
1121 .next()
1122 }
1123
1124 pub fn predicates_for_subject_object<'b>(
1125 &self,
1126 subject: impl Into<SubjectRef<'b>>,
1127 object: impl Into<TermRef<'b>>,
1128 ) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a {
1129 self.predicates_for_interned_subject_object(
1130 self.dataset.encoded_subject(subject),
1131 self.dataset.encoded_term(object),
1132 )
1133 }
1134
1135 pub(super) fn predicates_for_interned_subject_object(
1136 &self,
1137 subject: Option<InternedSubject>,
1138 object: Option<InternedTerm>,
1139 ) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a {
1140 let subject = subject.unwrap_or_else(InternedSubject::impossible);
1141 let object = object.unwrap_or_else(InternedTerm::impossible);
1142 let ds = self.dataset;
1143 self.dataset
1144 .gosp
1145 .range(
1146 &(
1147 self.graph_name.clone(),
1148 object.clone(),
1149 subject.clone(),
1150 InternedNamedNode::first(),
1151 )
1152 ..&(
1153 self.graph_name.clone(),
1154 object,
1155 subject.next(),
1156 InternedNamedNode::first(),
1157 ),
1158 )
1159 .map(move |q| q.3.decode_from(&ds.interner))
1160 }
1161
1162 pub fn triples_for_predicate<'b>(
1163 &self,
1164 predicate: impl Into<NamedNodeRef<'b>>,
1165 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1166 self.triples_for_interned_predicate(self.dataset.encoded_named_node(predicate))
1167 }
1168
1169 pub(super) fn triples_for_interned_predicate(
1170 &self,
1171 predicate: Option<InternedNamedNode>,
1172 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1173 let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible);
1174 let ds = self.dataset;
1175 self.dataset
1176 .gpos
1177 .range(
1178 &(
1179 self.graph_name.clone(),
1180 predicate,
1181 InternedTerm::first(),
1182 InternedSubject::first(),
1183 )
1184 ..&(
1185 self.graph_name.clone(),
1186 predicate.next(),
1187 InternedTerm::first(),
1188 InternedSubject::first(),
1189 ),
1190 )
1191 .map(move |(_, p, o, s)| ds.decode_spo((s, p, o)))
1192 }
1193
1194 pub fn subjects_for_predicate_object<'b>(
1195 &self,
1196 predicate: impl Into<NamedNodeRef<'b>>,
1197 object: impl Into<TermRef<'b>>,
1198 ) -> impl Iterator<Item = SubjectRef<'a>> + 'a {
1199 self.subjects_for_interned_predicate_object(
1200 self.dataset.encoded_named_node(predicate),
1201 self.dataset.encoded_term(object),
1202 )
1203 }
1204
1205 pub(super) fn subjects_for_interned_predicate_object(
1206 &self,
1207 predicate: Option<InternedNamedNode>,
1208 object: Option<InternedTerm>,
1209 ) -> impl Iterator<Item = SubjectRef<'a>> + 'a {
1210 let predicate = predicate.unwrap_or_else(InternedNamedNode::impossible);
1211 let object = object.unwrap_or_else(InternedTerm::impossible);
1212 let ds = self.dataset;
1213 self.dataset
1214 .gpos
1215 .range(
1216 &(
1217 self.graph_name.clone(),
1218 predicate,
1219 object.clone(),
1220 InternedSubject::first(),
1221 )
1222 ..&(
1223 self.graph_name.clone(),
1224 predicate,
1225 object.next(),
1226 InternedSubject::first(),
1227 ),
1228 )
1229 .map(move |q| q.3.decode_from(&ds.interner))
1230 }
1231
1232 pub fn subject_for_predicate_object<'b>(
1233 &self,
1234 predicate: impl Into<NamedNodeRef<'b>>,
1235 object: impl Into<TermRef<'b>>,
1236 ) -> Option<SubjectRef<'a>> {
1237 self.subjects_for_predicate_object(predicate, object).next()
1238 }
1239
1240 pub fn triples_for_object<'b>(
1241 &self,
1242 object: impl Into<TermRef<'b>>,
1243 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1244 self.triples_for_interned_object(self.dataset.encoded_term(object))
1245 }
1246
1247 pub(super) fn triples_for_interned_object(
1248 &self,
1249 object: Option<InternedTerm>,
1250 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1251 let object = object.unwrap_or_else(InternedTerm::impossible);
1252 let ds = self.dataset;
1253 self.dataset
1254 .gosp
1255 .range(
1256 &(
1257 self.graph_name.clone(),
1258 object.clone(),
1259 InternedSubject::first(),
1260 InternedNamedNode::first(),
1261 )
1262 ..&(
1263 self.graph_name.clone(),
1264 object.next(),
1265 InternedSubject::first(),
1266 InternedNamedNode::first(),
1267 ),
1268 )
1269 .map(move |(_, o, s, p)| ds.decode_spo((s, p, o)))
1270 }
1271
1272 pub fn contains<'b>(&self, triple: impl Into<TripleRef<'b>>) -> bool {
1274 if let Some(triple) = self.encoded_triple(triple.into()) {
1275 self.dataset.gspo.contains(&(
1276 self.graph_name.clone(),
1277 triple.subject,
1278 triple.predicate,
1279 triple.object,
1280 ))
1281 } else {
1282 false
1283 }
1284 }
1285
1286 pub fn len(&self) -> usize {
1288 self.iter().count()
1289 }
1290
1291 pub fn is_empty(&self) -> bool {
1293 self.iter().next().is_none()
1294 }
1295
1296 fn encoded_triple(&self, triple: TripleRef<'_>) -> Option<InternedTriple> {
1297 Some(InternedTriple {
1298 subject: self.dataset.encoded_subject(triple.subject)?,
1299 predicate: self.dataset.encoded_named_node(triple.predicate)?,
1300 object: self.dataset.encoded_term(triple.object)?,
1301 })
1302 }
1303}
1304
1305impl<'a> IntoIterator for GraphView<'a> {
1306 type Item = TripleRef<'a>;
1307 type IntoIter = GraphViewIter<'a>;
1308
1309 fn into_iter(self) -> Self::IntoIter {
1310 self.iter()
1311 }
1312}
1313
1314impl<'a> IntoIterator for &GraphView<'a> {
1315 type Item = TripleRef<'a>;
1316 type IntoIter = GraphViewIter<'a>;
1317
1318 fn into_iter(self) -> Self::IntoIter {
1319 self.iter()
1320 }
1321}
1322
1323impl fmt::Display for GraphView<'_> {
1324 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1325 for t in self {
1326 writeln!(f, "{t} .")?;
1327 }
1328 Ok(())
1329 }
1330}
1331
1332#[derive(Debug)]
1357pub struct GraphViewMut<'a> {
1358 dataset: &'a mut Dataset,
1359 graph_name: InternedGraphName,
1360}
1361
1362impl<'a> GraphViewMut<'a> {
1363 fn read(&self) -> GraphView<'_> {
1364 GraphView {
1365 dataset: self.dataset,
1366 graph_name: self.graph_name.clone(),
1367 }
1368 }
1369
1370 pub fn insert<'b>(&mut self, triple: impl Into<TripleRef<'b>>) -> bool {
1372 let triple = self.encode_triple(triple.into());
1373 self.dataset.insert_encoded((
1374 triple.subject,
1375 triple.predicate,
1376 triple.object,
1377 self.graph_name.clone(),
1378 ))
1379 }
1380
1381 pub fn remove<'b>(&mut self, triple: impl Into<TripleRef<'b>>) -> bool {
1383 if let Some(triple) = self.read().encoded_triple(triple.into()) {
1384 self.dataset.remove_encoded((
1385 triple.subject,
1386 triple.predicate,
1387 triple.object,
1388 self.graph_name.clone(),
1389 ))
1390 } else {
1391 false
1392 }
1393 }
1394
1395 fn encode_triple(&mut self, triple: TripleRef<'_>) -> InternedTriple {
1396 InternedTriple {
1397 subject: InternedSubject::encoded_into(triple.subject, &mut self.dataset.interner),
1398 predicate: InternedNamedNode::encoded_into(
1399 triple.predicate,
1400 &mut self.dataset.interner,
1401 ),
1402 object: InternedTerm::encoded_into(triple.object, &mut self.dataset.interner),
1403 }
1404 }
1405
1406 pub fn iter(&'a self) -> GraphViewIter<'a> {
1408 self.read().iter()
1409 }
1410
1411 pub fn triples_for_subject<'b>(
1412 &'a self,
1413 subject: impl Into<SubjectRef<'b>>,
1414 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1415 self.read()
1416 .triples_for_interned_subject(self.dataset.encoded_subject(subject))
1417 }
1418
1419 pub fn objects_for_subject_predicate<'b>(
1420 &'a self,
1421 subject: impl Into<SubjectRef<'b>>,
1422 predicate: impl Into<NamedNodeRef<'b>>,
1423 ) -> impl Iterator<Item = TermRef<'a>> + 'a {
1424 self.read().objects_for_interned_subject_predicate(
1425 self.dataset.encoded_subject(subject),
1426 self.dataset.encoded_named_node(predicate),
1427 )
1428 }
1429
1430 pub fn object_for_subject_predicate<'b>(
1431 &'a self,
1432 subject: impl Into<SubjectRef<'b>>,
1433 predicate: impl Into<NamedNodeRef<'b>>,
1434 ) -> Option<TermRef<'a>> {
1435 self.read().object_for_subject_predicate(subject, predicate)
1436 }
1437
1438 pub fn predicates_for_subject_object<'b>(
1439 &'a self,
1440 subject: impl Into<SubjectRef<'b>>,
1441 object: impl Into<TermRef<'b>>,
1442 ) -> impl Iterator<Item = NamedNodeRef<'a>> + 'a {
1443 self.read().predicates_for_interned_subject_object(
1444 self.dataset.encoded_subject(subject),
1445 self.dataset.encoded_term(object),
1446 )
1447 }
1448
1449 pub fn triples_for_predicate<'b>(
1450 &'a self,
1451 predicate: impl Into<NamedNodeRef<'b>>,
1452 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1453 self.read()
1454 .triples_for_interned_predicate(self.dataset.encoded_named_node(predicate))
1455 }
1456
1457 pub fn subjects_for_predicate_object<'b>(
1458 &'a self,
1459 predicate: impl Into<NamedNodeRef<'b>>,
1460 object: impl Into<TermRef<'b>>,
1461 ) -> impl Iterator<Item = SubjectRef<'a>> + 'a {
1462 self.read().subjects_for_interned_predicate_object(
1463 self.dataset.encoded_named_node(predicate),
1464 self.dataset.encoded_term(object),
1465 )
1466 }
1467
1468 pub fn subject_for_predicate_object<'b>(
1469 &'a self,
1470 predicate: impl Into<NamedNodeRef<'b>>,
1471 object: impl Into<TermRef<'b>>,
1472 ) -> Option<SubjectRef<'a>> {
1473 self.read().subject_for_predicate_object(predicate, object)
1474 }
1475
1476 pub fn triples_for_object<'b>(
1477 &'a self,
1478 object: TermRef<'b>,
1479 ) -> impl Iterator<Item = TripleRef<'a>> + 'a {
1480 self.read()
1481 .triples_for_interned_object(self.dataset.encoded_term(object))
1482 }
1483
1484 pub fn contains<'b>(&self, triple: impl Into<TripleRef<'b>>) -> bool {
1486 self.read().contains(triple)
1487 }
1488
1489 pub fn len(&self) -> usize {
1491 self.read().len()
1492 }
1493
1494 pub fn is_empty(&self) -> bool {
1496 self.read().is_empty()
1497 }
1498}
1499
1500impl Extend<Triple> for GraphViewMut<'_> {
1501 fn extend<I: IntoIterator<Item = Triple>>(&mut self, iter: I) {
1502 for t in iter {
1503 self.insert(&t);
1504 }
1505 }
1506}
1507
1508impl<'b, T: Into<TripleRef<'b>>> Extend<T> for GraphViewMut<'_> {
1509 fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
1510 for t in iter {
1511 self.insert(t);
1512 }
1513 }
1514}
1515
1516impl<'a> IntoIterator for &'a GraphViewMut<'a> {
1517 type Item = TripleRef<'a>;
1518 type IntoIter = GraphViewIter<'a>;
1519
1520 fn into_iter(self) -> Self::IntoIter {
1521 self.iter()
1522 }
1523}
1524
1525impl fmt::Display for GraphViewMut<'_> {
1526 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1527 for t in self {
1528 writeln!(f, "{t}")?;
1529 }
1530 Ok(())
1531 }
1532}
1533
1534pub struct Iter<'a> {
1536 dataset: &'a Dataset,
1537 inner: std::collections::btree_set::Iter<
1538 'a,
1539 (
1540 InternedSubject,
1541 InternedNamedNode,
1542 InternedTerm,
1543 InternedGraphName,
1544 ),
1545 >,
1546}
1547
1548impl<'a> Iterator for Iter<'a> {
1549 type Item = QuadRef<'a>;
1550
1551 fn next(&mut self) -> Option<Self::Item> {
1552 self.inner
1553 .next()
1554 .map(|(s, p, o, g)| self.dataset.decode_spog((s, p, o, g)))
1555 }
1556}
1557
1558pub struct GraphViewIter<'a> {
1560 dataset: &'a Dataset,
1561 inner: std::collections::btree_set::Range<
1562 'a,
1563 (
1564 InternedGraphName,
1565 InternedSubject,
1566 InternedNamedNode,
1567 InternedTerm,
1568 ),
1569 >,
1570}
1571
1572impl<'a> Iterator for GraphViewIter<'a> {
1573 type Item = TripleRef<'a>;
1574
1575 fn next(&mut self) -> Option<Self::Item> {
1576 self.inner
1577 .next()
1578 .map(|(_, s, p, o)| self.dataset.decode_spo((s, p, o)))
1579 }
1580}
1581
1582type QuadsPerBlankNode = HashMap<
1583 InternedBlankNode,
1584 Vec<(
1585 InternedSubject,
1586 InternedNamedNode,
1587 InternedTerm,
1588 InternedGraphName,
1589 )>,
1590>;
1591
1592#[derive(Default, Debug, Clone, Copy, Eq, PartialEq, Hash)]
1596#[non_exhaustive]
1597pub enum CanonicalizationAlgorithm {
1598 #[default]
1602 Unstable,
1603}
1604
1605#[cfg(test)]
1606mod tests {
1607 use super::*;
1608
1609 #[test]
1610 fn test_canon() {
1611 let mut dataset = Dataset::new();
1612 dataset.insert(QuadRef::new(
1613 BlankNode::default().as_ref(),
1614 NamedNodeRef::new_unchecked("http://ex"),
1615 BlankNode::default().as_ref(),
1616 GraphNameRef::DefaultGraph,
1617 ));
1618 dataset.insert(QuadRef::new(
1619 BlankNode::default().as_ref(),
1620 NamedNodeRef::new_unchecked("http://ex"),
1621 BlankNode::default().as_ref(),
1622 GraphNameRef::DefaultGraph,
1623 ));
1624 dataset.canonicalize(CanonicalizationAlgorithm::Unstable);
1625 let mut dataset2 = Dataset::new();
1626 dataset2.insert(QuadRef::new(
1627 BlankNode::default().as_ref(),
1628 NamedNodeRef::new_unchecked("http://ex"),
1629 BlankNode::default().as_ref(),
1630 GraphNameRef::DefaultGraph,
1631 ));
1632 dataset2.insert(QuadRef::new(
1633 BlankNode::default().as_ref(),
1634 NamedNodeRef::new_unchecked("http://ex"),
1635 BlankNode::default().as_ref(),
1636 GraphNameRef::DefaultGraph,
1637 ));
1638 dataset2.canonicalize(CanonicalizationAlgorithm::Unstable);
1639 assert_eq!(dataset, dataset2);
1640 }
1641}