1use crate::algebra::{
2 Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, MinusAlgorithm, OrderExpression,
3};
4use crate::type_inference::{
5 infer_expression_type, infer_graph_pattern_types, VariableType, VariableTypes,
6};
7use oxrdf::Variable;
8use spargebra::algebra::PropertyPathExpression;
9use spargebra::term::{GroundTermPattern, NamedNodePattern};
10use std::cmp::{max, min};
11
12pub struct Optimizer;
13
14impl Optimizer {
15 pub fn optimize_graph_pattern(pattern: GraphPattern) -> GraphPattern {
16 let pattern = Self::normalize_pattern(pattern, &VariableTypes::default());
17 let pattern = Self::reorder_joins(pattern, &VariableTypes::default());
18 Self::push_filters(pattern, Vec::new(), &VariableTypes::default())
19 }
20
21 fn normalize_pattern(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern {
23 match pattern {
24 GraphPattern::QuadPattern {
25 subject,
26 predicate,
27 object,
28 graph_name,
29 } => GraphPattern::QuadPattern {
30 subject,
31 predicate,
32 object,
33 graph_name,
34 },
35 GraphPattern::Path {
36 subject,
37 path,
38 object,
39 graph_name,
40 } => GraphPattern::Path {
41 subject,
42 path,
43 object,
44 graph_name,
45 },
46 GraphPattern::Graph { graph_name } => GraphPattern::Graph { graph_name },
47 GraphPattern::Join {
48 left,
49 right,
50 algorithm,
51 } => GraphPattern::join(
52 Self::normalize_pattern(*left, input_types),
53 Self::normalize_pattern(*right, input_types),
54 algorithm,
55 ),
56 GraphPattern::LeftJoin {
57 left,
58 right,
59 expression,
60 algorithm,
61 } => {
62 let left = Self::normalize_pattern(*left, input_types);
63 let right = Self::normalize_pattern(*right, input_types);
64 let mut inner_types = infer_graph_pattern_types(&left, input_types.clone());
65 inner_types.intersect_with(infer_graph_pattern_types(&right, input_types.clone()));
66 GraphPattern::left_join(
67 left,
68 right,
69 Self::normalize_expression(expression, &inner_types),
70 algorithm,
71 )
72 }
73 #[cfg(feature = "sep-0006")]
74 GraphPattern::Lateral { left, right } => {
75 let left = Self::normalize_pattern(*left, input_types);
76 let left_types = infer_graph_pattern_types(&left, input_types.clone());
77 let right = Self::normalize_pattern(*right, &left_types);
78 GraphPattern::lateral(left, right)
79 }
80 GraphPattern::Filter { inner, expression } => {
81 let inner = Self::normalize_pattern(*inner, input_types);
82 let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
83 let expression = Self::normalize_expression(expression, &inner_types);
84 let expression_type = infer_expression_type(&expression, &inner_types);
85 if expression_type == VariableType::UNDEF {
86 GraphPattern::empty()
87 } else {
88 GraphPattern::filter(inner, expression)
89 }
90 }
91 GraphPattern::Union { inner } => GraphPattern::union_all(
92 inner
93 .into_iter()
94 .map(|e| Self::normalize_pattern(e, input_types)),
95 ),
96 GraphPattern::Extend {
97 inner,
98 variable,
99 expression,
100 } => {
101 let inner = Self::normalize_pattern(*inner, input_types);
102 let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
103 let expression = Self::normalize_expression(expression, &inner_types);
104 let expression_type = infer_expression_type(&expression, &inner_types);
105 if expression_type == VariableType::UNDEF {
106 inner
108 } else {
109 GraphPattern::extend(inner, variable, expression)
110 }
111 }
112 GraphPattern::Minus {
113 left,
114 right,
115 algorithm,
116 } => GraphPattern::minus(
117 Self::normalize_pattern(*left, input_types),
118 Self::normalize_pattern(*right, input_types),
119 algorithm,
120 ),
121 GraphPattern::Values {
122 variables,
123 bindings,
124 } => GraphPattern::values(variables, bindings),
125 GraphPattern::OrderBy { inner, expression } => {
126 let inner = Self::normalize_pattern(*inner, input_types);
127 let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
128 GraphPattern::order_by(
129 inner,
130 expression
131 .into_iter()
132 .map(|e| match e {
133 OrderExpression::Asc(e) => {
134 OrderExpression::Asc(Self::normalize_expression(e, &inner_types))
135 }
136 OrderExpression::Desc(e) => {
137 OrderExpression::Desc(Self::normalize_expression(e, &inner_types))
138 }
139 })
140 .collect(),
141 )
142 }
143 GraphPattern::Project { inner, variables } => {
144 GraphPattern::project(Self::normalize_pattern(*inner, input_types), variables)
145 }
146 GraphPattern::Distinct { inner } => {
147 GraphPattern::distinct(Self::normalize_pattern(*inner, input_types))
148 }
149 GraphPattern::Reduced { inner } => {
150 GraphPattern::reduced(Self::normalize_pattern(*inner, input_types))
151 }
152 GraphPattern::Slice {
153 inner,
154 start,
155 length,
156 } => GraphPattern::slice(Self::normalize_pattern(*inner, input_types), start, length),
157 GraphPattern::Group {
158 inner,
159 variables,
160 aggregates,
161 } => {
162 GraphPattern::group(
164 Self::normalize_pattern(*inner, input_types),
165 variables,
166 aggregates,
167 )
168 }
169 GraphPattern::Service { .. } => {
170 pattern
172 }
173 }
174 }
175
176 fn normalize_expression(expression: Expression, types: &VariableTypes) -> Expression {
177 match expression {
178 Expression::NamedNode(node) => node.into(),
179 Expression::Literal(literal) => literal.into(),
180 Expression::Variable(variable) => variable.into(),
181 Expression::Or(inner) => Expression::or_all(
182 inner
183 .into_iter()
184 .map(|e| Self::normalize_expression(e, types)),
185 ),
186 Expression::And(inner) => Expression::and_all(
187 inner
188 .into_iter()
189 .map(|e| Self::normalize_expression(e, types)),
190 ),
191 Expression::Equal(left, right) => {
192 let left = Self::normalize_expression(*left, types);
193 let left_types = infer_expression_type(&left, types);
194 let right = Self::normalize_expression(*right, types);
195 let right_types = infer_expression_type(&right, types);
196 #[allow(unused_mut)]
197 let mut must_use_equal = left_types.literal && right_types.literal;
198 #[cfg(feature = "rdf-star")]
199 {
200 must_use_equal = must_use_equal || left_types.triple && right_types.triple;
201 }
202 if must_use_equal {
203 Expression::equal(left, right)
204 } else {
205 Expression::same_term(left, right)
206 }
207 }
208 Expression::SameTerm(left, right) => Expression::same_term(
209 Self::normalize_expression(*left, types),
210 Self::normalize_expression(*right, types),
211 ),
212 Expression::Greater(left, right) => Expression::greater(
213 Self::normalize_expression(*left, types),
214 Self::normalize_expression(*right, types),
215 ),
216 Expression::GreaterOrEqual(left, right) => Expression::greater_or_equal(
217 Self::normalize_expression(*left, types),
218 Self::normalize_expression(*right, types),
219 ),
220 Expression::Less(left, right) => Expression::less(
221 Self::normalize_expression(*left, types),
222 Self::normalize_expression(*right, types),
223 ),
224 Expression::LessOrEqual(left, right) => Expression::less_or_equal(
225 Self::normalize_expression(*left, types),
226 Self::normalize_expression(*right, types),
227 ),
228 Expression::Add(left, right) => {
229 Self::normalize_expression(*left, types) + Self::normalize_expression(*right, types)
230 }
231 Expression::Subtract(left, right) => {
232 Self::normalize_expression(*left, types) - Self::normalize_expression(*right, types)
233 }
234 Expression::Multiply(left, right) => {
235 Self::normalize_expression(*left, types) * Self::normalize_expression(*right, types)
236 }
237 Expression::Divide(left, right) => {
238 Self::normalize_expression(*left, types) / Self::normalize_expression(*right, types)
239 }
240 Expression::UnaryPlus(inner) => {
241 Expression::unary_plus(Self::normalize_expression(*inner, types))
242 }
243 Expression::UnaryMinus(inner) => -Self::normalize_expression(*inner, types),
244 Expression::Not(inner) => !Self::normalize_expression(*inner, types),
245 Expression::Exists(inner) => Expression::exists(Self::normalize_pattern(*inner, types)),
246 Expression::Bound(variable) => {
247 let t = types.get(&variable);
248 if !t.undef {
249 true.into()
250 } else if t == VariableType::UNDEF {
251 false.into()
252 } else {
253 Expression::Bound(variable)
254 }
255 }
256 Expression::If(cond, then, els) => Expression::if_cond(
257 Self::normalize_expression(*cond, types),
258 Self::normalize_expression(*then, types),
259 Self::normalize_expression(*els, types),
260 ),
261 Expression::Coalesce(inners) => Expression::coalesce(
262 inners
263 .into_iter()
264 .map(|e| Self::normalize_expression(e, types))
265 .collect(),
266 ),
267 Expression::FunctionCall(name, args) => Expression::call(
268 name,
269 args.into_iter()
270 .map(|e| Self::normalize_expression(e, types))
271 .collect(),
272 ),
273 }
274 }
275
276 fn push_filters(
277 pattern: GraphPattern,
278 mut filters: Vec<Expression>,
279 input_types: &VariableTypes,
280 ) -> GraphPattern {
281 match pattern {
282 GraphPattern::QuadPattern { .. }
283 | GraphPattern::Path { .. }
284 | GraphPattern::Graph { .. }
285 | GraphPattern::Values { .. } => {
286 GraphPattern::filter(pattern, Expression::and_all(filters))
287 }
288 GraphPattern::Join {
289 left,
290 right,
291 algorithm,
292 } => {
293 let left_types = infer_graph_pattern_types(&left, input_types.clone());
294 let right_types = infer_graph_pattern_types(&right, input_types.clone());
295 let mut left_filters = Vec::new();
296 let mut right_filters = Vec::new();
297 let mut final_filters = Vec::new();
298 for filter in filters {
299 let push_left = are_all_expression_variables_bound(&filter, &left_types);
300 let push_right = are_all_expression_variables_bound(&filter, &right_types);
301 if push_left {
302 if push_right {
303 left_filters.push(filter.clone());
304 right_filters.push(filter);
305 } else {
306 left_filters.push(filter);
307 }
308 } else if push_right {
309 right_filters.push(filter);
310 } else {
311 final_filters.push(filter);
312 }
313 }
314 GraphPattern::filter(
315 GraphPattern::join(
316 Self::push_filters(*left, left_filters, input_types),
317 Self::push_filters(*right, right_filters, input_types),
318 algorithm,
319 ),
320 Expression::and_all(final_filters),
321 )
322 }
323 #[cfg(feature = "sep-0006")]
324 GraphPattern::Lateral { left, right } => {
325 let left_types = infer_graph_pattern_types(&left, input_types.clone());
326 let mut left_filters = Vec::new();
327 let mut right_filters = Vec::new();
328 for filter in filters {
329 let push_left = are_all_expression_variables_bound(&filter, &left_types);
330 if push_left {
331 left_filters.push(filter);
332 } else {
333 right_filters.push(filter);
334 }
335 }
336 let left = Self::push_filters(*left, left_filters, input_types);
337 let right = Self::push_filters(*right, right_filters, &left_types);
338 if let GraphPattern::Filter {
339 inner: inner_right,
340 expression,
341 } = right
342 {
343 GraphPattern::filter(GraphPattern::lateral(left, *inner_right), expression)
345 } else {
346 GraphPattern::lateral(left, right)
347 }
348 }
349 GraphPattern::LeftJoin {
350 left,
351 right,
352 expression,
353 algorithm,
354 } => {
355 let left_types = infer_graph_pattern_types(&left, input_types.clone());
356 let right_types = infer_graph_pattern_types(&right, input_types.clone());
357 let mut left_filters = Vec::new();
358 let mut right_filters = Vec::new();
359 let mut final_filters = Vec::new();
360 for filter in filters {
361 let push_left = are_all_expression_variables_bound(&filter, &left_types);
362 if push_left {
363 left_filters.push(filter);
364 } else {
365 final_filters.push(filter);
366 }
367 }
368 let expression = if expression.effective_boolean_value().is_none()
369 && (are_all_expression_variables_bound(&expression, &right_types)
370 || are_no_expression_variables_bound(&expression, &left_types))
371 {
372 right_filters.push(expression);
373 true.into()
374 } else {
375 expression
376 };
377 GraphPattern::filter(
378 GraphPattern::left_join(
379 Self::push_filters(*left, left_filters, input_types),
380 Self::push_filters(*right, right_filters, input_types),
381 expression,
382 algorithm,
383 ),
384 Expression::and_all(final_filters),
385 )
386 }
387 GraphPattern::Minus {
388 left,
389 right,
390 algorithm,
391 } => GraphPattern::minus(
392 Self::push_filters(*left, filters, input_types),
393 Self::push_filters(*right, Vec::new(), input_types),
394 algorithm,
395 ),
396 GraphPattern::Extend {
397 inner,
398 expression,
399 variable,
400 } => {
401 let mut inner_filters = Vec::new();
403 let mut final_filters = Vec::new();
404 for filter in filters {
405 let extend_variable_used =
406 filter.used_variables().into_iter().any(|v| *v == variable);
407 if extend_variable_used {
408 final_filters.push(filter);
409 } else {
410 inner_filters.push(filter);
411 }
412 }
413 GraphPattern::filter(
414 GraphPattern::extend(
415 Self::push_filters(*inner, inner_filters, input_types),
416 variable,
417 expression,
418 ),
419 Expression::and_all(final_filters),
420 )
421 }
422 GraphPattern::Filter { inner, expression } => {
423 if let Expression::And(expressions) = expression {
424 filters.extend(expressions)
425 } else {
426 filters.push(expression)
427 };
428 Self::push_filters(*inner, filters, input_types)
429 }
430 GraphPattern::Union { inner } => GraphPattern::union_all(
431 inner
432 .into_iter()
433 .map(|c| Self::push_filters(c, filters.clone(), input_types)),
434 ),
435 GraphPattern::Slice {
436 inner,
437 start,
438 length,
439 } => GraphPattern::filter(
440 GraphPattern::slice(
441 Self::push_filters(*inner, Vec::new(), input_types),
442 start,
443 length,
444 ),
445 Expression::and_all(filters),
446 ),
447 GraphPattern::Distinct { inner } => {
448 GraphPattern::distinct(Self::push_filters(*inner, filters, input_types))
449 }
450 GraphPattern::Reduced { inner } => {
451 GraphPattern::reduced(Self::push_filters(*inner, filters, input_types))
452 }
453 GraphPattern::Project { inner, variables } => {
454 GraphPattern::project(Self::push_filters(*inner, filters, input_types), variables)
455 }
456 GraphPattern::OrderBy { inner, expression } => {
457 GraphPattern::order_by(Self::push_filters(*inner, filters, input_types), expression)
458 }
459 GraphPattern::Service { .. } => {
460 GraphPattern::filter(pattern, Expression::and_all(filters))
463 }
464 GraphPattern::Group {
465 inner,
466 variables,
467 aggregates,
468 } => GraphPattern::filter(
469 GraphPattern::group(
470 Self::push_filters(*inner, Vec::new(), input_types),
471 variables,
472 aggregates,
473 ),
474 Expression::and_all(filters),
475 ),
476 }
477 }
478
479 fn reorder_joins(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern {
480 match pattern {
481 GraphPattern::QuadPattern { .. }
482 | GraphPattern::Path { .. }
483 | GraphPattern::Values { .. }
484 | GraphPattern::Graph { .. } => pattern,
485 GraphPattern::Join { left, right, .. } => {
486 let mut to_reorder = Vec::new();
488 let mut todo = vec![*right, *left];
489 while let Some(e) = todo.pop() {
490 if let GraphPattern::Join { left, right, .. } = e {
491 todo.push(*right);
492 todo.push(*left);
493 } else {
494 to_reorder.push(e);
495 }
496 }
497
498 let to_reorder_types = to_reorder
500 .iter()
501 .map(|p| infer_graph_pattern_types(p, input_types.clone()))
502 .collect::<Vec<_>>();
503
504 let mut output_cartesian_product_joins = Vec::new();
506 let mut not_yet_reordered_ids = vec![true; to_reorder.len()];
507 while let Some(next_entry_id) = not_yet_reordered_ids
509 .iter()
510 .enumerate()
511 .filter(|(_, v)| **v)
512 .map(|(i, _)| i)
513 .min_by_key(|i| estimate_graph_pattern_size(&to_reorder[*i], input_types))
514 {
515 not_yet_reordered_ids[next_entry_id] = false; let mut output = to_reorder[next_entry_id].clone();
517 let mut output_types = to_reorder_types[next_entry_id].clone();
518 while let Some(next_id) = not_yet_reordered_ids
520 .iter()
521 .enumerate()
522 .filter(|(_, v)| **v)
523 .map(|(i, _)| i)
524 .filter(|i| {
525 has_common_variables(&output_types, &to_reorder_types[*i], input_types)
526 })
527 .min_by_key(|i| {
528 if cfg!(feature = "sep-0006")
530 && is_fit_for_for_loop_join(
531 &to_reorder[*i],
532 input_types,
533 &output_types,
534 )
535 {
536 estimate_lateral_cost(
537 &output,
538 &output_types,
539 &to_reorder[*i],
540 input_types,
541 )
542 } else {
543 estimate_join_cost(
544 &output,
545 &to_reorder[*i],
546 &JoinAlgorithm::HashBuildLeftProbeRight {
547 keys: join_key_variables(
548 &output_types,
549 &to_reorder_types[*i],
550 input_types,
551 ),
552 },
553 input_types,
554 )
555 }
556 })
557 {
558 not_yet_reordered_ids[next_id] = false; let next = to_reorder[next_id].clone();
560 #[cfg(feature = "sep-0006")]
561 {
562 output = if is_fit_for_for_loop_join(&next, input_types, &output_types)
563 {
564 GraphPattern::lateral(output, next)
565 } else {
566 GraphPattern::join(
567 output,
568 next,
569 JoinAlgorithm::HashBuildLeftProbeRight {
570 keys: join_key_variables(
571 &output_types,
572 &to_reorder_types[next_id],
573 input_types,
574 ),
575 },
576 )
577 };
578 }
579 #[cfg(not(feature = "sep-0006"))]
580 {
581 output = GraphPattern::join(
582 output,
583 next,
584 JoinAlgorithm::HashBuildLeftProbeRight {
585 keys: join_key_variables(
586 &output_types,
587 &to_reorder_types[next_id],
588 input_types,
589 ),
590 },
591 );
592 }
593 output_types.intersect_with(to_reorder_types[next_id].clone());
594 }
595 output_cartesian_product_joins.push(output);
596 }
597 output_cartesian_product_joins
598 .into_iter()
599 .reduce(|left, right| {
600 let keys = join_key_variables(
601 &infer_graph_pattern_types(&left, input_types.clone()),
602 &infer_graph_pattern_types(&right, input_types.clone()),
603 input_types,
604 );
605 if estimate_graph_pattern_size(&left, input_types)
606 <= estimate_graph_pattern_size(&right, input_types)
607 {
608 GraphPattern::join(
609 left,
610 right,
611 JoinAlgorithm::HashBuildLeftProbeRight { keys },
612 )
613 } else {
614 GraphPattern::join(
615 right,
616 left,
617 JoinAlgorithm::HashBuildLeftProbeRight { keys },
618 )
619 }
620 })
621 .unwrap()
622 }
623 #[cfg(feature = "sep-0006")]
624 GraphPattern::Lateral { left, right } => {
625 let left_types = infer_graph_pattern_types(&left, input_types.clone());
626 GraphPattern::lateral(
627 Self::reorder_joins(*left, input_types),
628 Self::reorder_joins(*right, &left_types),
629 )
630 }
631 GraphPattern::LeftJoin {
632 left,
633 right,
634 expression,
635 ..
636 } => {
637 let left = Self::reorder_joins(*left, input_types);
638 let left_types = infer_graph_pattern_types(&left, input_types.clone());
639 let right = Self::reorder_joins(*right, input_types);
640 let right_types = infer_graph_pattern_types(&right, input_types.clone());
641 #[cfg(feature = "sep-0006")]
642 {
643 if is_fit_for_for_loop_join(&right, input_types, &left_types)
644 && has_common_variables(&left_types, &right_types, input_types)
645 {
646 return GraphPattern::lateral(
647 left,
648 GraphPattern::left_join(
649 GraphPattern::empty_singleton(),
650 right,
651 expression,
652 LeftJoinAlgorithm::HashBuildRightProbeLeft { keys: Vec::new() },
653 ),
654 );
655 }
656 }
657 GraphPattern::left_join(
658 left,
659 right,
660 expression,
661 LeftJoinAlgorithm::HashBuildRightProbeLeft {
662 keys: join_key_variables(&left_types, &right_types, input_types),
663 },
664 )
665 }
666 GraphPattern::Minus { left, right, .. } => {
667 let left = Self::reorder_joins(*left, input_types);
668 let left_types = infer_graph_pattern_types(&left, input_types.clone());
669 let right = Self::reorder_joins(*right, input_types);
670 let right_types = infer_graph_pattern_types(&right, input_types.clone());
671 GraphPattern::minus(
672 left,
673 right,
674 MinusAlgorithm::HashBuildRightProbeLeft {
675 keys: join_key_variables(&left_types, &right_types, input_types),
676 },
677 )
678 }
679 GraphPattern::Extend {
680 inner,
681 expression,
682 variable,
683 } => GraphPattern::extend(
684 Self::reorder_joins(*inner, input_types),
685 variable,
686 expression,
687 ),
688 GraphPattern::Filter { inner, expression } => {
689 GraphPattern::filter(Self::reorder_joins(*inner, input_types), expression)
690 }
691 GraphPattern::Union { inner } => GraphPattern::union_all(
692 inner
693 .into_iter()
694 .map(|c| Self::reorder_joins(c, input_types)),
695 ),
696 GraphPattern::Slice {
697 inner,
698 start,
699 length,
700 } => GraphPattern::slice(Self::reorder_joins(*inner, input_types), start, length),
701 GraphPattern::Distinct { inner } => {
702 GraphPattern::distinct(Self::reorder_joins(*inner, input_types))
703 }
704 GraphPattern::Reduced { inner } => {
705 GraphPattern::reduced(Self::reorder_joins(*inner, input_types))
706 }
707 GraphPattern::Project { inner, variables } => {
708 GraphPattern::project(Self::reorder_joins(*inner, input_types), variables)
709 }
710 GraphPattern::OrderBy { inner, expression } => {
711 GraphPattern::order_by(Self::reorder_joins(*inner, input_types), expression)
712 }
713 GraphPattern::Service { .. } => {
714 pattern
716 }
717 GraphPattern::Group {
718 inner,
719 variables,
720 aggregates,
721 } => GraphPattern::group(
722 Self::reorder_joins(*inner, input_types),
723 variables,
724 aggregates,
725 ),
726 }
727 }
728}
729
730fn is_fit_for_for_loop_join(
731 pattern: &GraphPattern,
732 global_input_types: &VariableTypes,
733 entry_types: &VariableTypes,
734) -> bool {
735 match pattern {
737 GraphPattern::Values { .. }
738 | GraphPattern::QuadPattern { .. }
739 | GraphPattern::Path { .. }
740 | GraphPattern::Graph { .. } => true,
741 #[cfg(feature = "sep-0006")]
742 GraphPattern::Lateral { left, right } => {
743 is_fit_for_for_loop_join(left, global_input_types, entry_types)
744 && is_fit_for_for_loop_join(right, global_input_types, entry_types)
745 }
746 GraphPattern::LeftJoin {
747 left,
748 right,
749 expression,
750 ..
751 } => {
752 if !is_fit_for_for_loop_join(left, global_input_types, entry_types) {
753 return false;
754 }
755
756 let mut left_types = infer_graph_pattern_types(left, global_input_types.clone());
758 let right_types = infer_graph_pattern_types(right, global_input_types.clone());
759 if right_types.iter().any(|(variable, t)| {
760 *t != VariableType::UNDEF
761 && left_types.get(variable).undef
762 && entry_types.get(variable) != VariableType::UNDEF
763 }) {
764 return false;
765 }
766
767 left_types.intersect_with(right_types);
769 is_expression_fit_for_for_loop_join(expression, &left_types, entry_types)
770 }
771 GraphPattern::Union { inner } => inner
772 .iter()
773 .all(|i| is_fit_for_for_loop_join(i, global_input_types, entry_types)),
774 GraphPattern::Filter { inner, expression } => {
775 is_fit_for_for_loop_join(inner, global_input_types, entry_types)
776 && is_expression_fit_for_for_loop_join(
777 expression,
778 &infer_graph_pattern_types(inner, global_input_types.clone()),
779 entry_types,
780 )
781 }
782 GraphPattern::Extend {
783 inner,
784 expression,
785 variable,
786 } => {
787 is_fit_for_for_loop_join(inner, global_input_types, entry_types)
788 && entry_types.get(variable) == VariableType::UNDEF
789 && is_expression_fit_for_for_loop_join(
790 expression,
791 &infer_graph_pattern_types(inner, global_input_types.clone()),
792 entry_types,
793 )
794 }
795 GraphPattern::Join { .. }
796 | GraphPattern::Minus { .. }
797 | GraphPattern::Service { .. }
798 | GraphPattern::OrderBy { .. }
799 | GraphPattern::Distinct { .. }
800 | GraphPattern::Reduced { .. }
801 | GraphPattern::Slice { .. }
802 | GraphPattern::Project { .. }
803 | GraphPattern::Group { .. } => false,
804 }
805}
806
807fn are_all_expression_variables_bound(
808 expression: &Expression,
809 variable_types: &VariableTypes,
810) -> bool {
811 expression
812 .used_variables()
813 .into_iter()
814 .all(|v| !variable_types.get(v).undef)
815}
816
817fn are_no_expression_variables_bound(
818 expression: &Expression,
819 variable_types: &VariableTypes,
820) -> bool {
821 expression
822 .used_variables()
823 .into_iter()
824 .all(|v| variable_types.get(v) == VariableType::UNDEF)
825}
826
827fn is_expression_fit_for_for_loop_join(
828 expression: &Expression,
829 input_types: &VariableTypes,
830 entry_types: &VariableTypes,
831) -> bool {
832 match expression {
833 Expression::NamedNode(_) | Expression::Literal(_) => true,
834 Expression::Variable(v) | Expression::Bound(v) => {
835 !input_types.get(v).undef || entry_types.get(v) == VariableType::UNDEF
836 }
837 Expression::Or(inner)
838 | Expression::And(inner)
839 | Expression::Coalesce(inner)
840 | Expression::FunctionCall(_, inner) => inner
841 .iter()
842 .all(|e| is_expression_fit_for_for_loop_join(e, input_types, entry_types)),
843 Expression::Equal(a, b)
844 | Expression::SameTerm(a, b)
845 | Expression::Greater(a, b)
846 | Expression::GreaterOrEqual(a, b)
847 | Expression::Less(a, b)
848 | Expression::LessOrEqual(a, b)
849 | Expression::Add(a, b)
850 | Expression::Subtract(a, b)
851 | Expression::Multiply(a, b)
852 | Expression::Divide(a, b) => {
853 is_expression_fit_for_for_loop_join(a, input_types, entry_types)
854 && is_expression_fit_for_for_loop_join(b, input_types, entry_types)
855 }
856 Expression::UnaryPlus(e) | Expression::UnaryMinus(e) | Expression::Not(e) => {
857 is_expression_fit_for_for_loop_join(e, input_types, entry_types)
858 }
859 Expression::If(a, b, c) => {
860 is_expression_fit_for_for_loop_join(a, input_types, entry_types)
861 && is_expression_fit_for_for_loop_join(b, input_types, entry_types)
862 && is_expression_fit_for_for_loop_join(c, input_types, entry_types)
863 }
864 Expression::Exists(inner) => is_fit_for_for_loop_join(inner, input_types, entry_types),
865 }
866}
867
868fn has_common_variables(
869 left: &VariableTypes,
870 right: &VariableTypes,
871 input_types: &VariableTypes,
872) -> bool {
873 left.iter().any(|(variable, left_type)| {
875 !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef
876 })
877}
878
879fn join_key_variables(
880 left: &VariableTypes,
881 right: &VariableTypes,
882 input_types: &VariableTypes,
883) -> Vec<Variable> {
884 left.iter()
885 .filter(|(variable, left_type)| {
886 !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef
887 })
888 .map(|(variable, _)| variable.clone())
889 .collect()
890}
891
892fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTypes) -> usize {
893 match pattern {
894 GraphPattern::Values { bindings, .. } => bindings.len(),
895 GraphPattern::QuadPattern {
896 subject,
897 predicate,
898 object,
899 ..
900 } => estimate_triple_pattern_size(
901 is_term_pattern_bound(subject, input_types),
902 is_named_node_pattern_bound(predicate, input_types),
903 is_term_pattern_bound(object, input_types),
904 ),
905 GraphPattern::Path {
906 subject,
907 path,
908 object,
909 ..
910 } => estimate_path_size(
911 is_term_pattern_bound(subject, input_types),
912 path,
913 is_term_pattern_bound(object, input_types),
914 ),
915 GraphPattern::Graph { graph_name } => {
916 if is_named_node_pattern_bound(graph_name, input_types) {
917 100
918 } else {
919 1
920 }
921 }
922 GraphPattern::Join {
923 left,
924 right,
925 algorithm,
926 } => estimate_join_cost(left, right, algorithm, input_types),
927 GraphPattern::LeftJoin {
928 left,
929 right,
930 algorithm,
931 ..
932 } => match algorithm {
933 LeftJoinAlgorithm::HashBuildRightProbeLeft { keys } => {
934 let left_size = estimate_graph_pattern_size(left, input_types);
935 max(
936 left_size,
937 left_size
938 .saturating_mul(estimate_graph_pattern_size(
939 right,
940 &infer_graph_pattern_types(right, input_types.clone()),
941 ))
942 .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap())),
943 )
944 }
945 },
946 #[cfg(feature = "sep-0006")]
947 GraphPattern::Lateral { left, right } => estimate_lateral_cost(
948 left,
949 &infer_graph_pattern_types(left, input_types.clone()),
950 right,
951 input_types,
952 ),
953 GraphPattern::Union { inner } => inner
954 .iter()
955 .map(|inner| estimate_graph_pattern_size(inner, input_types))
956 .fold(0, usize::saturating_add),
957 GraphPattern::Minus { left, .. } => estimate_graph_pattern_size(left, input_types),
958 GraphPattern::Filter { inner, .. }
959 | GraphPattern::Extend { inner, .. }
960 | GraphPattern::OrderBy { inner, .. }
961 | GraphPattern::Project { inner, .. }
962 | GraphPattern::Distinct { inner, .. }
963 | GraphPattern::Reduced { inner, .. }
964 | GraphPattern::Group { inner, .. }
965 | GraphPattern::Service { inner, .. } => estimate_graph_pattern_size(inner, input_types),
966 GraphPattern::Slice {
967 inner,
968 start,
969 length,
970 } => {
971 let inner = estimate_graph_pattern_size(inner, input_types);
972 if let Some(length) = length {
973 min(inner, *length - *start)
974 } else {
975 inner
976 }
977 }
978 }
979}
980
981fn estimate_join_cost(
982 left: &GraphPattern,
983 right: &GraphPattern,
984 algorithm: &JoinAlgorithm,
985 input_types: &VariableTypes,
986) -> usize {
987 match algorithm {
988 JoinAlgorithm::HashBuildLeftProbeRight { keys } => {
989 estimate_graph_pattern_size(left, input_types)
990 .saturating_mul(estimate_graph_pattern_size(right, input_types))
991 .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap()))
992 }
993 }
994}
995fn estimate_lateral_cost(
996 left: &GraphPattern,
997 left_types: &VariableTypes,
998 right: &GraphPattern,
999 input_types: &VariableTypes,
1000) -> usize {
1001 estimate_graph_pattern_size(left, input_types)
1002 .saturating_mul(estimate_graph_pattern_size(right, left_types))
1003}
1004
1005fn estimate_triple_pattern_size(
1006 subject_bound: bool,
1007 predicate_bound: bool,
1008 object_bound: bool,
1009) -> usize {
1010 match (subject_bound, predicate_bound, object_bound) {
1011 (true, true, true) => 1,
1012 (true, true, false) => 10,
1013 (true, false, true) => 2,
1014 (false, true, true) => 10_000,
1015 (true, false, false) => 100,
1016 (false, false, false) => 1_000_000_000,
1017 (false, true, false) => 1_000_000,
1018 (false, false, true) => 100_000,
1019 }
1020}
1021
1022fn estimate_path_size(start_bound: bool, path: &PropertyPathExpression, end_bound: bool) -> usize {
1023 match path {
1024 PropertyPathExpression::NamedNode(_) => {
1025 estimate_triple_pattern_size(start_bound, true, end_bound)
1026 }
1027 PropertyPathExpression::Reverse(p) => estimate_path_size(end_bound, p, start_bound),
1028 PropertyPathExpression::Sequence(a, b) => {
1029 min(
1031 estimate_path_size(start_bound, a, false)
1032 .saturating_mul(estimate_path_size(true, b, end_bound)),
1033 estimate_path_size(start_bound, a, true)
1034 .saturating_mul(estimate_path_size(false, b, end_bound)),
1035 )
1036 }
1037 PropertyPathExpression::Alternative(a, b) => estimate_path_size(start_bound, a, end_bound)
1038 .saturating_add(estimate_path_size(start_bound, b, end_bound)),
1039 PropertyPathExpression::ZeroOrMore(p) => {
1040 if start_bound && end_bound {
1041 1
1042 } else if start_bound || end_bound {
1043 estimate_path_size(start_bound, p, end_bound).saturating_mul(1000)
1044 } else {
1045 1_000_000_000
1046 }
1047 }
1048 PropertyPathExpression::OneOrMore(p) => {
1049 if start_bound && end_bound {
1050 1
1051 } else {
1052 estimate_path_size(start_bound, p, end_bound).saturating_mul(1000)
1053 }
1054 }
1055 PropertyPathExpression::ZeroOrOne(p) => {
1056 if start_bound && end_bound {
1057 1
1058 } else if start_bound || end_bound {
1059 estimate_path_size(start_bound, p, end_bound)
1060 } else {
1061 1_000_000_000
1062 }
1063 }
1064 PropertyPathExpression::NegatedPropertySet(_) => {
1065 estimate_triple_pattern_size(start_bound, false, end_bound)
1066 }
1067 }
1068}
1069
1070fn is_term_pattern_bound(pattern: &GroundTermPattern, input_types: &VariableTypes) -> bool {
1071 match pattern {
1072 GroundTermPattern::NamedNode(_) | GroundTermPattern::Literal(_) => true,
1073 GroundTermPattern::Variable(v) => !input_types.get(v).undef,
1074 #[cfg(feature = "rdf-star")]
1075 GroundTermPattern::Triple(t) => {
1076 is_term_pattern_bound(&t.subject, input_types)
1077 && is_named_node_pattern_bound(&t.predicate, input_types)
1078 && is_term_pattern_bound(&t.object, input_types)
1079 }
1080 }
1081}
1082
1083fn is_named_node_pattern_bound(pattern: &NamedNodePattern, input_types: &VariableTypes) -> bool {
1084 match pattern {
1085 NamedNodePattern::NamedNode(_) => true,
1086 NamedNodePattern::Variable(v) => !input_types.get(v).undef,
1087 }
1088}