sparopt/
optimizer.rs

1use crate::algebra::{
2    Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, MinusAlgorithm, OrderExpression,
3};
4use crate::type_inference::{
5    infer_expression_type, infer_graph_pattern_types, VariableType, VariableTypes,
6};
7use oxrdf::Variable;
8use spargebra::algebra::PropertyPathExpression;
9use spargebra::term::{GroundTermPattern, NamedNodePattern};
10use std::cmp::{max, min};
11
12pub struct Optimizer;
13
14impl Optimizer {
15    pub fn optimize_graph_pattern(pattern: GraphPattern) -> GraphPattern {
16        let pattern = Self::normalize_pattern(pattern, &VariableTypes::default());
17        let pattern = Self::reorder_joins(pattern, &VariableTypes::default());
18        Self::push_filters(pattern, Vec::new(), &VariableTypes::default())
19    }
20
21    /// Normalize the pattern, discarding any join ordering information
22    fn normalize_pattern(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern {
23        match pattern {
24            GraphPattern::QuadPattern {
25                subject,
26                predicate,
27                object,
28                graph_name,
29            } => GraphPattern::QuadPattern {
30                subject,
31                predicate,
32                object,
33                graph_name,
34            },
35            GraphPattern::Path {
36                subject,
37                path,
38                object,
39                graph_name,
40            } => GraphPattern::Path {
41                subject,
42                path,
43                object,
44                graph_name,
45            },
46            GraphPattern::Graph { graph_name } => GraphPattern::Graph { graph_name },
47            GraphPattern::Join {
48                left,
49                right,
50                algorithm,
51            } => GraphPattern::join(
52                Self::normalize_pattern(*left, input_types),
53                Self::normalize_pattern(*right, input_types),
54                algorithm,
55            ),
56            GraphPattern::LeftJoin {
57                left,
58                right,
59                expression,
60                algorithm,
61            } => {
62                let left = Self::normalize_pattern(*left, input_types);
63                let right = Self::normalize_pattern(*right, input_types);
64                let mut inner_types = infer_graph_pattern_types(&left, input_types.clone());
65                inner_types.intersect_with(infer_graph_pattern_types(&right, input_types.clone()));
66                GraphPattern::left_join(
67                    left,
68                    right,
69                    Self::normalize_expression(expression, &inner_types),
70                    algorithm,
71                )
72            }
73            #[cfg(feature = "sep-0006")]
74            GraphPattern::Lateral { left, right } => {
75                let left = Self::normalize_pattern(*left, input_types);
76                let left_types = infer_graph_pattern_types(&left, input_types.clone());
77                let right = Self::normalize_pattern(*right, &left_types);
78                GraphPattern::lateral(left, right)
79            }
80            GraphPattern::Filter { inner, expression } => {
81                let inner = Self::normalize_pattern(*inner, input_types);
82                let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
83                let expression = Self::normalize_expression(expression, &inner_types);
84                let expression_type = infer_expression_type(&expression, &inner_types);
85                if expression_type == VariableType::UNDEF {
86                    GraphPattern::empty()
87                } else {
88                    GraphPattern::filter(inner, expression)
89                }
90            }
91            GraphPattern::Union { inner } => GraphPattern::union_all(
92                inner
93                    .into_iter()
94                    .map(|e| Self::normalize_pattern(e, input_types)),
95            ),
96            GraphPattern::Extend {
97                inner,
98                variable,
99                expression,
100            } => {
101                let inner = Self::normalize_pattern(*inner, input_types);
102                let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
103                let expression = Self::normalize_expression(expression, &inner_types);
104                let expression_type = infer_expression_type(&expression, &inner_types);
105                if expression_type == VariableType::UNDEF {
106                    // TODO: valid?
107                    inner
108                } else {
109                    GraphPattern::extend(inner, variable, expression)
110                }
111            }
112            GraphPattern::Minus {
113                left,
114                right,
115                algorithm,
116            } => GraphPattern::minus(
117                Self::normalize_pattern(*left, input_types),
118                Self::normalize_pattern(*right, input_types),
119                algorithm,
120            ),
121            GraphPattern::Values {
122                variables,
123                bindings,
124            } => GraphPattern::values(variables, bindings),
125            GraphPattern::OrderBy { inner, expression } => {
126                let inner = Self::normalize_pattern(*inner, input_types);
127                let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
128                GraphPattern::order_by(
129                    inner,
130                    expression
131                        .into_iter()
132                        .map(|e| match e {
133                            OrderExpression::Asc(e) => {
134                                OrderExpression::Asc(Self::normalize_expression(e, &inner_types))
135                            }
136                            OrderExpression::Desc(e) => {
137                                OrderExpression::Desc(Self::normalize_expression(e, &inner_types))
138                            }
139                        })
140                        .collect(),
141                )
142            }
143            GraphPattern::Project { inner, variables } => {
144                GraphPattern::project(Self::normalize_pattern(*inner, input_types), variables)
145            }
146            GraphPattern::Distinct { inner } => {
147                GraphPattern::distinct(Self::normalize_pattern(*inner, input_types))
148            }
149            GraphPattern::Reduced { inner } => {
150                GraphPattern::reduced(Self::normalize_pattern(*inner, input_types))
151            }
152            GraphPattern::Slice {
153                inner,
154                start,
155                length,
156            } => GraphPattern::slice(Self::normalize_pattern(*inner, input_types), start, length),
157            GraphPattern::Group {
158                inner,
159                variables,
160                aggregates,
161            } => {
162                // TODO: min, max and sample don't care about DISTINCT
163                GraphPattern::group(
164                    Self::normalize_pattern(*inner, input_types),
165                    variables,
166                    aggregates,
167                )
168            }
169            GraphPattern::Service { .. } => {
170                // We leave this problem to the remote SPARQL endpoint
171                pattern
172            }
173        }
174    }
175
176    fn normalize_expression(expression: Expression, types: &VariableTypes) -> Expression {
177        match expression {
178            Expression::NamedNode(node) => node.into(),
179            Expression::Literal(literal) => literal.into(),
180            Expression::Variable(variable) => variable.into(),
181            Expression::Or(inner) => Expression::or_all(
182                inner
183                    .into_iter()
184                    .map(|e| Self::normalize_expression(e, types)),
185            ),
186            Expression::And(inner) => Expression::and_all(
187                inner
188                    .into_iter()
189                    .map(|e| Self::normalize_expression(e, types)),
190            ),
191            Expression::Equal(left, right) => {
192                let left = Self::normalize_expression(*left, types);
193                let left_types = infer_expression_type(&left, types);
194                let right = Self::normalize_expression(*right, types);
195                let right_types = infer_expression_type(&right, types);
196                #[allow(unused_mut)]
197                let mut must_use_equal = left_types.literal && right_types.literal;
198                #[cfg(feature = "rdf-star")]
199                {
200                    must_use_equal = must_use_equal || left_types.triple && right_types.triple;
201                }
202                if must_use_equal {
203                    Expression::equal(left, right)
204                } else {
205                    Expression::same_term(left, right)
206                }
207            }
208            Expression::SameTerm(left, right) => Expression::same_term(
209                Self::normalize_expression(*left, types),
210                Self::normalize_expression(*right, types),
211            ),
212            Expression::Greater(left, right) => Expression::greater(
213                Self::normalize_expression(*left, types),
214                Self::normalize_expression(*right, types),
215            ),
216            Expression::GreaterOrEqual(left, right) => Expression::greater_or_equal(
217                Self::normalize_expression(*left, types),
218                Self::normalize_expression(*right, types),
219            ),
220            Expression::Less(left, right) => Expression::less(
221                Self::normalize_expression(*left, types),
222                Self::normalize_expression(*right, types),
223            ),
224            Expression::LessOrEqual(left, right) => Expression::less_or_equal(
225                Self::normalize_expression(*left, types),
226                Self::normalize_expression(*right, types),
227            ),
228            Expression::Add(left, right) => {
229                Self::normalize_expression(*left, types) + Self::normalize_expression(*right, types)
230            }
231            Expression::Subtract(left, right) => {
232                Self::normalize_expression(*left, types) - Self::normalize_expression(*right, types)
233            }
234            Expression::Multiply(left, right) => {
235                Self::normalize_expression(*left, types) * Self::normalize_expression(*right, types)
236            }
237            Expression::Divide(left, right) => {
238                Self::normalize_expression(*left, types) / Self::normalize_expression(*right, types)
239            }
240            Expression::UnaryPlus(inner) => {
241                Expression::unary_plus(Self::normalize_expression(*inner, types))
242            }
243            Expression::UnaryMinus(inner) => -Self::normalize_expression(*inner, types),
244            Expression::Not(inner) => !Self::normalize_expression(*inner, types),
245            Expression::Exists(inner) => Expression::exists(Self::normalize_pattern(*inner, types)),
246            Expression::Bound(variable) => {
247                let t = types.get(&variable);
248                if !t.undef {
249                    true.into()
250                } else if t == VariableType::UNDEF {
251                    false.into()
252                } else {
253                    Expression::Bound(variable)
254                }
255            }
256            Expression::If(cond, then, els) => Expression::if_cond(
257                Self::normalize_expression(*cond, types),
258                Self::normalize_expression(*then, types),
259                Self::normalize_expression(*els, types),
260            ),
261            Expression::Coalesce(inners) => Expression::coalesce(
262                inners
263                    .into_iter()
264                    .map(|e| Self::normalize_expression(e, types))
265                    .collect(),
266            ),
267            Expression::FunctionCall(name, args) => Expression::call(
268                name,
269                args.into_iter()
270                    .map(|e| Self::normalize_expression(e, types))
271                    .collect(),
272            ),
273        }
274    }
275
276    fn push_filters(
277        pattern: GraphPattern,
278        mut filters: Vec<Expression>,
279        input_types: &VariableTypes,
280    ) -> GraphPattern {
281        match pattern {
282            GraphPattern::QuadPattern { .. }
283            | GraphPattern::Path { .. }
284            | GraphPattern::Graph { .. }
285            | GraphPattern::Values { .. } => {
286                GraphPattern::filter(pattern, Expression::and_all(filters))
287            }
288            GraphPattern::Join {
289                left,
290                right,
291                algorithm,
292            } => {
293                let left_types = infer_graph_pattern_types(&left, input_types.clone());
294                let right_types = infer_graph_pattern_types(&right, input_types.clone());
295                let mut left_filters = Vec::new();
296                let mut right_filters = Vec::new();
297                let mut final_filters = Vec::new();
298                for filter in filters {
299                    let push_left = are_all_expression_variables_bound(&filter, &left_types);
300                    let push_right = are_all_expression_variables_bound(&filter, &right_types);
301                    if push_left {
302                        if push_right {
303                            left_filters.push(filter.clone());
304                            right_filters.push(filter);
305                        } else {
306                            left_filters.push(filter);
307                        }
308                    } else if push_right {
309                        right_filters.push(filter);
310                    } else {
311                        final_filters.push(filter);
312                    }
313                }
314                GraphPattern::filter(
315                    GraphPattern::join(
316                        Self::push_filters(*left, left_filters, input_types),
317                        Self::push_filters(*right, right_filters, input_types),
318                        algorithm,
319                    ),
320                    Expression::and_all(final_filters),
321                )
322            }
323            #[cfg(feature = "sep-0006")]
324            GraphPattern::Lateral { left, right } => {
325                let left_types = infer_graph_pattern_types(&left, input_types.clone());
326                let mut left_filters = Vec::new();
327                let mut right_filters = Vec::new();
328                for filter in filters {
329                    let push_left = are_all_expression_variables_bound(&filter, &left_types);
330                    if push_left {
331                        left_filters.push(filter);
332                    } else {
333                        right_filters.push(filter);
334                    }
335                }
336                let left = Self::push_filters(*left, left_filters, input_types);
337                let right = Self::push_filters(*right, right_filters, &left_types);
338                if let GraphPattern::Filter {
339                    inner: inner_right,
340                    expression,
341                } = right
342                {
343                    // We prefer to have filter out of the lateral rather than inside the right part
344                    GraphPattern::filter(GraphPattern::lateral(left, *inner_right), expression)
345                } else {
346                    GraphPattern::lateral(left, right)
347                }
348            }
349            GraphPattern::LeftJoin {
350                left,
351                right,
352                expression,
353                algorithm,
354            } => {
355                let left_types = infer_graph_pattern_types(&left, input_types.clone());
356                let right_types = infer_graph_pattern_types(&right, input_types.clone());
357                let mut left_filters = Vec::new();
358                let mut right_filters = Vec::new();
359                let mut final_filters = Vec::new();
360                for filter in filters {
361                    let push_left = are_all_expression_variables_bound(&filter, &left_types);
362                    if push_left {
363                        left_filters.push(filter);
364                    } else {
365                        final_filters.push(filter);
366                    }
367                }
368                let expression = if expression.effective_boolean_value().is_none()
369                    && (are_all_expression_variables_bound(&expression, &right_types)
370                        || are_no_expression_variables_bound(&expression, &left_types))
371                {
372                    right_filters.push(expression);
373                    true.into()
374                } else {
375                    expression
376                };
377                GraphPattern::filter(
378                    GraphPattern::left_join(
379                        Self::push_filters(*left, left_filters, input_types),
380                        Self::push_filters(*right, right_filters, input_types),
381                        expression,
382                        algorithm,
383                    ),
384                    Expression::and_all(final_filters),
385                )
386            }
387            GraphPattern::Minus {
388                left,
389                right,
390                algorithm,
391            } => GraphPattern::minus(
392                Self::push_filters(*left, filters, input_types),
393                Self::push_filters(*right, Vec::new(), input_types),
394                algorithm,
395            ),
396            GraphPattern::Extend {
397                inner,
398                expression,
399                variable,
400            } => {
401                // TODO: handle the case where the filter overrides an expression variable (should not happen in SPARQL but allowed in the algebra)
402                let mut inner_filters = Vec::new();
403                let mut final_filters = Vec::new();
404                for filter in filters {
405                    let extend_variable_used =
406                        filter.used_variables().into_iter().any(|v| *v == variable);
407                    if extend_variable_used {
408                        final_filters.push(filter);
409                    } else {
410                        inner_filters.push(filter);
411                    }
412                }
413                GraphPattern::filter(
414                    GraphPattern::extend(
415                        Self::push_filters(*inner, inner_filters, input_types),
416                        variable,
417                        expression,
418                    ),
419                    Expression::and_all(final_filters),
420                )
421            }
422            GraphPattern::Filter { inner, expression } => {
423                if let Expression::And(expressions) = expression {
424                    filters.extend(expressions)
425                } else {
426                    filters.push(expression)
427                };
428                Self::push_filters(*inner, filters, input_types)
429            }
430            GraphPattern::Union { inner } => GraphPattern::union_all(
431                inner
432                    .into_iter()
433                    .map(|c| Self::push_filters(c, filters.clone(), input_types)),
434            ),
435            GraphPattern::Slice {
436                inner,
437                start,
438                length,
439            } => GraphPattern::filter(
440                GraphPattern::slice(
441                    Self::push_filters(*inner, Vec::new(), input_types),
442                    start,
443                    length,
444                ),
445                Expression::and_all(filters),
446            ),
447            GraphPattern::Distinct { inner } => {
448                GraphPattern::distinct(Self::push_filters(*inner, filters, input_types))
449            }
450            GraphPattern::Reduced { inner } => {
451                GraphPattern::reduced(Self::push_filters(*inner, filters, input_types))
452            }
453            GraphPattern::Project { inner, variables } => {
454                GraphPattern::project(Self::push_filters(*inner, filters, input_types), variables)
455            }
456            GraphPattern::OrderBy { inner, expression } => {
457                GraphPattern::order_by(Self::push_filters(*inner, filters, input_types), expression)
458            }
459            GraphPattern::Service { .. } => {
460                // TODO: we can be smart and push some filters
461                // But we need to check the behavior of SILENT that can transform no results into a singleton
462                GraphPattern::filter(pattern, Expression::and_all(filters))
463            }
464            GraphPattern::Group {
465                inner,
466                variables,
467                aggregates,
468            } => GraphPattern::filter(
469                GraphPattern::group(
470                    Self::push_filters(*inner, Vec::new(), input_types),
471                    variables,
472                    aggregates,
473                ),
474                Expression::and_all(filters),
475            ),
476        }
477    }
478
479    fn reorder_joins(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern {
480        match pattern {
481            GraphPattern::QuadPattern { .. }
482            | GraphPattern::Path { .. }
483            | GraphPattern::Values { .. }
484            | GraphPattern::Graph { .. } => pattern,
485            GraphPattern::Join { left, right, .. } => {
486                // We flatten the join operation
487                let mut to_reorder = Vec::new();
488                let mut todo = vec![*right, *left];
489                while let Some(e) = todo.pop() {
490                    if let GraphPattern::Join { left, right, .. } = e {
491                        todo.push(*right);
492                        todo.push(*left);
493                    } else {
494                        to_reorder.push(e);
495                    }
496                }
497
498                // We do first type inference
499                let to_reorder_types = to_reorder
500                    .iter()
501                    .map(|p| infer_graph_pattern_types(p, input_types.clone()))
502                    .collect::<Vec<_>>();
503
504                // We do greedy join reordering
505                let mut output_cartesian_product_joins = Vec::new();
506                let mut not_yet_reordered_ids = vec![true; to_reorder.len()];
507                // We look for the next connected component to reorder and pick the smallest element
508                while let Some(next_entry_id) = not_yet_reordered_ids
509                    .iter()
510                    .enumerate()
511                    .filter(|(_, v)| **v)
512                    .map(|(i, _)| i)
513                    .min_by_key(|i| estimate_graph_pattern_size(&to_reorder[*i], input_types))
514                {
515                    not_yet_reordered_ids[next_entry_id] = false; // It's now done
516                    let mut output = to_reorder[next_entry_id].clone();
517                    let mut output_types = to_reorder_types[next_entry_id].clone();
518                    // We look for an other child to join with that does not blow up the join cost
519                    while let Some(next_id) = not_yet_reordered_ids
520                        .iter()
521                        .enumerate()
522                        .filter(|(_, v)| **v)
523                        .map(|(i, _)| i)
524                        .filter(|i| {
525                            has_common_variables(&output_types, &to_reorder_types[*i], input_types)
526                        })
527                        .min_by_key(|i| {
528                            // Estimation of the join cost
529                            if cfg!(feature = "sep-0006")
530                                && is_fit_for_for_loop_join(
531                                    &to_reorder[*i],
532                                    input_types,
533                                    &output_types,
534                                )
535                            {
536                                estimate_lateral_cost(
537                                    &output,
538                                    &output_types,
539                                    &to_reorder[*i],
540                                    input_types,
541                                )
542                            } else {
543                                estimate_join_cost(
544                                    &output,
545                                    &to_reorder[*i],
546                                    &JoinAlgorithm::HashBuildLeftProbeRight {
547                                        keys: join_key_variables(
548                                            &output_types,
549                                            &to_reorder_types[*i],
550                                            input_types,
551                                        ),
552                                    },
553                                    input_types,
554                                )
555                            }
556                        })
557                    {
558                        not_yet_reordered_ids[next_id] = false; // It's now done
559                        let next = to_reorder[next_id].clone();
560                        #[cfg(feature = "sep-0006")]
561                        {
562                            output = if is_fit_for_for_loop_join(&next, input_types, &output_types)
563                            {
564                                GraphPattern::lateral(output, next)
565                            } else {
566                                GraphPattern::join(
567                                    output,
568                                    next,
569                                    JoinAlgorithm::HashBuildLeftProbeRight {
570                                        keys: join_key_variables(
571                                            &output_types,
572                                            &to_reorder_types[next_id],
573                                            input_types,
574                                        ),
575                                    },
576                                )
577                            };
578                        }
579                        #[cfg(not(feature = "sep-0006"))]
580                        {
581                            output = GraphPattern::join(
582                                output,
583                                next,
584                                JoinAlgorithm::HashBuildLeftProbeRight {
585                                    keys: join_key_variables(
586                                        &output_types,
587                                        &to_reorder_types[next_id],
588                                        input_types,
589                                    ),
590                                },
591                            );
592                        }
593                        output_types.intersect_with(to_reorder_types[next_id].clone());
594                    }
595                    output_cartesian_product_joins.push(output);
596                }
597                output_cartesian_product_joins
598                    .into_iter()
599                    .reduce(|left, right| {
600                        let keys = join_key_variables(
601                            &infer_graph_pattern_types(&left, input_types.clone()),
602                            &infer_graph_pattern_types(&right, input_types.clone()),
603                            input_types,
604                        );
605                        if estimate_graph_pattern_size(&left, input_types)
606                            <= estimate_graph_pattern_size(&right, input_types)
607                        {
608                            GraphPattern::join(
609                                left,
610                                right,
611                                JoinAlgorithm::HashBuildLeftProbeRight { keys },
612                            )
613                        } else {
614                            GraphPattern::join(
615                                right,
616                                left,
617                                JoinAlgorithm::HashBuildLeftProbeRight { keys },
618                            )
619                        }
620                    })
621                    .unwrap()
622            }
623            #[cfg(feature = "sep-0006")]
624            GraphPattern::Lateral { left, right } => {
625                let left_types = infer_graph_pattern_types(&left, input_types.clone());
626                GraphPattern::lateral(
627                    Self::reorder_joins(*left, input_types),
628                    Self::reorder_joins(*right, &left_types),
629                )
630            }
631            GraphPattern::LeftJoin {
632                left,
633                right,
634                expression,
635                ..
636            } => {
637                let left = Self::reorder_joins(*left, input_types);
638                let left_types = infer_graph_pattern_types(&left, input_types.clone());
639                let right = Self::reorder_joins(*right, input_types);
640                let right_types = infer_graph_pattern_types(&right, input_types.clone());
641                #[cfg(feature = "sep-0006")]
642                {
643                    if is_fit_for_for_loop_join(&right, input_types, &left_types)
644                        && has_common_variables(&left_types, &right_types, input_types)
645                    {
646                        return GraphPattern::lateral(
647                            left,
648                            GraphPattern::left_join(
649                                GraphPattern::empty_singleton(),
650                                right,
651                                expression,
652                                LeftJoinAlgorithm::HashBuildRightProbeLeft { keys: Vec::new() },
653                            ),
654                        );
655                    }
656                }
657                GraphPattern::left_join(
658                    left,
659                    right,
660                    expression,
661                    LeftJoinAlgorithm::HashBuildRightProbeLeft {
662                        keys: join_key_variables(&left_types, &right_types, input_types),
663                    },
664                )
665            }
666            GraphPattern::Minus { left, right, .. } => {
667                let left = Self::reorder_joins(*left, input_types);
668                let left_types = infer_graph_pattern_types(&left, input_types.clone());
669                let right = Self::reorder_joins(*right, input_types);
670                let right_types = infer_graph_pattern_types(&right, input_types.clone());
671                GraphPattern::minus(
672                    left,
673                    right,
674                    MinusAlgorithm::HashBuildRightProbeLeft {
675                        keys: join_key_variables(&left_types, &right_types, input_types),
676                    },
677                )
678            }
679            GraphPattern::Extend {
680                inner,
681                expression,
682                variable,
683            } => GraphPattern::extend(
684                Self::reorder_joins(*inner, input_types),
685                variable,
686                expression,
687            ),
688            GraphPattern::Filter { inner, expression } => {
689                GraphPattern::filter(Self::reorder_joins(*inner, input_types), expression)
690            }
691            GraphPattern::Union { inner } => GraphPattern::union_all(
692                inner
693                    .into_iter()
694                    .map(|c| Self::reorder_joins(c, input_types)),
695            ),
696            GraphPattern::Slice {
697                inner,
698                start,
699                length,
700            } => GraphPattern::slice(Self::reorder_joins(*inner, input_types), start, length),
701            GraphPattern::Distinct { inner } => {
702                GraphPattern::distinct(Self::reorder_joins(*inner, input_types))
703            }
704            GraphPattern::Reduced { inner } => {
705                GraphPattern::reduced(Self::reorder_joins(*inner, input_types))
706            }
707            GraphPattern::Project { inner, variables } => {
708                GraphPattern::project(Self::reorder_joins(*inner, input_types), variables)
709            }
710            GraphPattern::OrderBy { inner, expression } => {
711                GraphPattern::order_by(Self::reorder_joins(*inner, input_types), expression)
712            }
713            GraphPattern::Service { .. } => {
714                // We don't do join reordering inside of SERVICE calls, we don't know about cardinalities
715                pattern
716            }
717            GraphPattern::Group {
718                inner,
719                variables,
720                aggregates,
721            } => GraphPattern::group(
722                Self::reorder_joins(*inner, input_types),
723                variables,
724                aggregates,
725            ),
726        }
727    }
728}
729
730fn is_fit_for_for_loop_join(
731    pattern: &GraphPattern,
732    global_input_types: &VariableTypes,
733    entry_types: &VariableTypes,
734) -> bool {
735    // TODO: think more about it
736    match pattern {
737        GraphPattern::Values { .. }
738        | GraphPattern::QuadPattern { .. }
739        | GraphPattern::Path { .. }
740        | GraphPattern::Graph { .. } => true,
741        #[cfg(feature = "sep-0006")]
742        GraphPattern::Lateral { left, right } => {
743            is_fit_for_for_loop_join(left, global_input_types, entry_types)
744                && is_fit_for_for_loop_join(right, global_input_types, entry_types)
745        }
746        GraphPattern::LeftJoin {
747            left,
748            right,
749            expression,
750            ..
751        } => {
752            if !is_fit_for_for_loop_join(left, global_input_types, entry_types) {
753                return false;
754            }
755
756            // It is not ok to transform into for loop join if right binds a variable also bound by the entry part of the for loop join
757            let mut left_types = infer_graph_pattern_types(left, global_input_types.clone());
758            let right_types = infer_graph_pattern_types(right, global_input_types.clone());
759            if right_types.iter().any(|(variable, t)| {
760                *t != VariableType::UNDEF
761                    && left_types.get(variable).undef
762                    && entry_types.get(variable) != VariableType::UNDEF
763            }) {
764                return false;
765            }
766
767            // We don't forget the final expression
768            left_types.intersect_with(right_types);
769            is_expression_fit_for_for_loop_join(expression, &left_types, entry_types)
770        }
771        GraphPattern::Union { inner } => inner
772            .iter()
773            .all(|i| is_fit_for_for_loop_join(i, global_input_types, entry_types)),
774        GraphPattern::Filter { inner, expression } => {
775            is_fit_for_for_loop_join(inner, global_input_types, entry_types)
776                && is_expression_fit_for_for_loop_join(
777                    expression,
778                    &infer_graph_pattern_types(inner, global_input_types.clone()),
779                    entry_types,
780                )
781        }
782        GraphPattern::Extend {
783            inner,
784            expression,
785            variable,
786        } => {
787            is_fit_for_for_loop_join(inner, global_input_types, entry_types)
788                && entry_types.get(variable) == VariableType::UNDEF
789                && is_expression_fit_for_for_loop_join(
790                    expression,
791                    &infer_graph_pattern_types(inner, global_input_types.clone()),
792                    entry_types,
793                )
794        }
795        GraphPattern::Join { .. }
796        | GraphPattern::Minus { .. }
797        | GraphPattern::Service { .. }
798        | GraphPattern::OrderBy { .. }
799        | GraphPattern::Distinct { .. }
800        | GraphPattern::Reduced { .. }
801        | GraphPattern::Slice { .. }
802        | GraphPattern::Project { .. }
803        | GraphPattern::Group { .. } => false,
804    }
805}
806
807fn are_all_expression_variables_bound(
808    expression: &Expression,
809    variable_types: &VariableTypes,
810) -> bool {
811    expression
812        .used_variables()
813        .into_iter()
814        .all(|v| !variable_types.get(v).undef)
815}
816
817fn are_no_expression_variables_bound(
818    expression: &Expression,
819    variable_types: &VariableTypes,
820) -> bool {
821    expression
822        .used_variables()
823        .into_iter()
824        .all(|v| variable_types.get(v) == VariableType::UNDEF)
825}
826
827fn is_expression_fit_for_for_loop_join(
828    expression: &Expression,
829    input_types: &VariableTypes,
830    entry_types: &VariableTypes,
831) -> bool {
832    match expression {
833        Expression::NamedNode(_) | Expression::Literal(_) => true,
834        Expression::Variable(v) | Expression::Bound(v) => {
835            !input_types.get(v).undef || entry_types.get(v) == VariableType::UNDEF
836        }
837        Expression::Or(inner)
838        | Expression::And(inner)
839        | Expression::Coalesce(inner)
840        | Expression::FunctionCall(_, inner) => inner
841            .iter()
842            .all(|e| is_expression_fit_for_for_loop_join(e, input_types, entry_types)),
843        Expression::Equal(a, b)
844        | Expression::SameTerm(a, b)
845        | Expression::Greater(a, b)
846        | Expression::GreaterOrEqual(a, b)
847        | Expression::Less(a, b)
848        | Expression::LessOrEqual(a, b)
849        | Expression::Add(a, b)
850        | Expression::Subtract(a, b)
851        | Expression::Multiply(a, b)
852        | Expression::Divide(a, b) => {
853            is_expression_fit_for_for_loop_join(a, input_types, entry_types)
854                && is_expression_fit_for_for_loop_join(b, input_types, entry_types)
855        }
856        Expression::UnaryPlus(e) | Expression::UnaryMinus(e) | Expression::Not(e) => {
857            is_expression_fit_for_for_loop_join(e, input_types, entry_types)
858        }
859        Expression::If(a, b, c) => {
860            is_expression_fit_for_for_loop_join(a, input_types, entry_types)
861                && is_expression_fit_for_for_loop_join(b, input_types, entry_types)
862                && is_expression_fit_for_for_loop_join(c, input_types, entry_types)
863        }
864        Expression::Exists(inner) => is_fit_for_for_loop_join(inner, input_types, entry_types),
865    }
866}
867
868fn has_common_variables(
869    left: &VariableTypes,
870    right: &VariableTypes,
871    input_types: &VariableTypes,
872) -> bool {
873    // TODO: we should be smart and count as shared variables FILTER(?a = ?b)
874    left.iter().any(|(variable, left_type)| {
875        !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef
876    })
877}
878
879fn join_key_variables(
880    left: &VariableTypes,
881    right: &VariableTypes,
882    input_types: &VariableTypes,
883) -> Vec<Variable> {
884    left.iter()
885        .filter(|(variable, left_type)| {
886            !left_type.undef && !right.get(variable).undef && input_types.get(variable).undef
887        })
888        .map(|(variable, _)| variable.clone())
889        .collect()
890}
891
892fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTypes) -> usize {
893    match pattern {
894        GraphPattern::Values { bindings, .. } => bindings.len(),
895        GraphPattern::QuadPattern {
896            subject,
897            predicate,
898            object,
899            ..
900        } => estimate_triple_pattern_size(
901            is_term_pattern_bound(subject, input_types),
902            is_named_node_pattern_bound(predicate, input_types),
903            is_term_pattern_bound(object, input_types),
904        ),
905        GraphPattern::Path {
906            subject,
907            path,
908            object,
909            ..
910        } => estimate_path_size(
911            is_term_pattern_bound(subject, input_types),
912            path,
913            is_term_pattern_bound(object, input_types),
914        ),
915        GraphPattern::Graph { graph_name } => {
916            if is_named_node_pattern_bound(graph_name, input_types) {
917                100
918            } else {
919                1
920            }
921        }
922        GraphPattern::Join {
923            left,
924            right,
925            algorithm,
926        } => estimate_join_cost(left, right, algorithm, input_types),
927        GraphPattern::LeftJoin {
928            left,
929            right,
930            algorithm,
931            ..
932        } => match algorithm {
933            LeftJoinAlgorithm::HashBuildRightProbeLeft { keys } => {
934                let left_size = estimate_graph_pattern_size(left, input_types);
935                max(
936                    left_size,
937                    left_size
938                        .saturating_mul(estimate_graph_pattern_size(
939                            right,
940                            &infer_graph_pattern_types(right, input_types.clone()),
941                        ))
942                        .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap())),
943                )
944            }
945        },
946        #[cfg(feature = "sep-0006")]
947        GraphPattern::Lateral { left, right } => estimate_lateral_cost(
948            left,
949            &infer_graph_pattern_types(left, input_types.clone()),
950            right,
951            input_types,
952        ),
953        GraphPattern::Union { inner } => inner
954            .iter()
955            .map(|inner| estimate_graph_pattern_size(inner, input_types))
956            .fold(0, usize::saturating_add),
957        GraphPattern::Minus { left, .. } => estimate_graph_pattern_size(left, input_types),
958        GraphPattern::Filter { inner, .. }
959        | GraphPattern::Extend { inner, .. }
960        | GraphPattern::OrderBy { inner, .. }
961        | GraphPattern::Project { inner, .. }
962        | GraphPattern::Distinct { inner, .. }
963        | GraphPattern::Reduced { inner, .. }
964        | GraphPattern::Group { inner, .. }
965        | GraphPattern::Service { inner, .. } => estimate_graph_pattern_size(inner, input_types),
966        GraphPattern::Slice {
967            inner,
968            start,
969            length,
970        } => {
971            let inner = estimate_graph_pattern_size(inner, input_types);
972            if let Some(length) = length {
973                min(inner, *length - *start)
974            } else {
975                inner
976            }
977        }
978    }
979}
980
981fn estimate_join_cost(
982    left: &GraphPattern,
983    right: &GraphPattern,
984    algorithm: &JoinAlgorithm,
985    input_types: &VariableTypes,
986) -> usize {
987    match algorithm {
988        JoinAlgorithm::HashBuildLeftProbeRight { keys } => {
989            estimate_graph_pattern_size(left, input_types)
990                .saturating_mul(estimate_graph_pattern_size(right, input_types))
991                .saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap()))
992        }
993    }
994}
995fn estimate_lateral_cost(
996    left: &GraphPattern,
997    left_types: &VariableTypes,
998    right: &GraphPattern,
999    input_types: &VariableTypes,
1000) -> usize {
1001    estimate_graph_pattern_size(left, input_types)
1002        .saturating_mul(estimate_graph_pattern_size(right, left_types))
1003}
1004
1005fn estimate_triple_pattern_size(
1006    subject_bound: bool,
1007    predicate_bound: bool,
1008    object_bound: bool,
1009) -> usize {
1010    match (subject_bound, predicate_bound, object_bound) {
1011        (true, true, true) => 1,
1012        (true, true, false) => 10,
1013        (true, false, true) => 2,
1014        (false, true, true) => 10_000,
1015        (true, false, false) => 100,
1016        (false, false, false) => 1_000_000_000,
1017        (false, true, false) => 1_000_000,
1018        (false, false, true) => 100_000,
1019    }
1020}
1021
1022fn estimate_path_size(start_bound: bool, path: &PropertyPathExpression, end_bound: bool) -> usize {
1023    match path {
1024        PropertyPathExpression::NamedNode(_) => {
1025            estimate_triple_pattern_size(start_bound, true, end_bound)
1026        }
1027        PropertyPathExpression::Reverse(p) => estimate_path_size(end_bound, p, start_bound),
1028        PropertyPathExpression::Sequence(a, b) => {
1029            // We do a for loop join in the best direction
1030            min(
1031                estimate_path_size(start_bound, a, false)
1032                    .saturating_mul(estimate_path_size(true, b, end_bound)),
1033                estimate_path_size(start_bound, a, true)
1034                    .saturating_mul(estimate_path_size(false, b, end_bound)),
1035            )
1036        }
1037        PropertyPathExpression::Alternative(a, b) => estimate_path_size(start_bound, a, end_bound)
1038            .saturating_add(estimate_path_size(start_bound, b, end_bound)),
1039        PropertyPathExpression::ZeroOrMore(p) => {
1040            if start_bound && end_bound {
1041                1
1042            } else if start_bound || end_bound {
1043                estimate_path_size(start_bound, p, end_bound).saturating_mul(1000)
1044            } else {
1045                1_000_000_000
1046            }
1047        }
1048        PropertyPathExpression::OneOrMore(p) => {
1049            if start_bound && end_bound {
1050                1
1051            } else {
1052                estimate_path_size(start_bound, p, end_bound).saturating_mul(1000)
1053            }
1054        }
1055        PropertyPathExpression::ZeroOrOne(p) => {
1056            if start_bound && end_bound {
1057                1
1058            } else if start_bound || end_bound {
1059                estimate_path_size(start_bound, p, end_bound)
1060            } else {
1061                1_000_000_000
1062            }
1063        }
1064        PropertyPathExpression::NegatedPropertySet(_) => {
1065            estimate_triple_pattern_size(start_bound, false, end_bound)
1066        }
1067    }
1068}
1069
1070fn is_term_pattern_bound(pattern: &GroundTermPattern, input_types: &VariableTypes) -> bool {
1071    match pattern {
1072        GroundTermPattern::NamedNode(_) | GroundTermPattern::Literal(_) => true,
1073        GroundTermPattern::Variable(v) => !input_types.get(v).undef,
1074        #[cfg(feature = "rdf-star")]
1075        GroundTermPattern::Triple(t) => {
1076            is_term_pattern_bound(&t.subject, input_types)
1077                && is_named_node_pattern_bound(&t.predicate, input_types)
1078                && is_term_pattern_bound(&t.object, input_types)
1079        }
1080    }
1081}
1082
1083fn is_named_node_pattern_bound(pattern: &NamedNodePattern, input_types: &VariableTypes) -> bool {
1084    match pattern {
1085        NamedNodePattern::NamedNode(_) => true,
1086        NamedNodePattern::Variable(v) => !input_types.get(v).undef,
1087    }
1088}