use crate::algebra::{
Expression, GraphPattern, JoinAlgorithm, LeftJoinAlgorithm, MinusAlgorithm, OrderExpression,
};
use crate::type_inference::{
infer_expression_type, infer_graph_pattern_types, VariableType, VariableTypes,
};
use oxrdf::Variable;
use spargebra::algebra::PropertyPathExpression;
use spargebra::term::{GroundTermPattern, NamedNodePattern};
use std::cmp::{max, min};
pub struct Optimizer;
impl Optimizer {
pub fn optimize_graph_pattern(pattern: GraphPattern) -> GraphPattern {
let pattern = Self::normalize_pattern(pattern, &VariableTypes::default());
let pattern = Self::reorder_joins(pattern, &VariableTypes::default());
Self::push_filters(pattern, Vec::new(), &VariableTypes::default())
}
fn normalize_pattern(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern {
match pattern {
GraphPattern::QuadPattern {
subject,
predicate,
object,
graph_name,
} => GraphPattern::QuadPattern {
subject,
predicate,
object,
graph_name,
},
GraphPattern::Path {
subject,
path,
object,
graph_name,
} => GraphPattern::Path {
subject,
path,
object,
graph_name,
},
GraphPattern::Join {
left,
right,
algorithm,
} => GraphPattern::join(
Self::normalize_pattern(*left, input_types),
Self::normalize_pattern(*right, input_types),
algorithm,
),
GraphPattern::LeftJoin {
left,
right,
expression,
algorithm,
} => {
let left = Self::normalize_pattern(*left, input_types);
let right = Self::normalize_pattern(*right, input_types);
let mut inner_types = infer_graph_pattern_types(&left, input_types.clone());
inner_types.intersect_with(infer_graph_pattern_types(&right, input_types.clone()));
GraphPattern::left_join(
left,
right,
Self::normalize_expression(expression, &inner_types),
algorithm,
)
}
#[cfg(feature = "sep-0006")]
GraphPattern::Lateral { left, right } => {
let left = Self::normalize_pattern(*left, input_types);
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right = Self::normalize_pattern(*right, &left_types);
GraphPattern::lateral(left, right)
}
GraphPattern::Filter { inner, expression } => {
let inner = Self::normalize_pattern(*inner, input_types);
let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
let expression = Self::normalize_expression(expression, &inner_types);
let expression_type = infer_expression_type(&expression, &inner_types);
if expression_type == VariableType::UNDEF {
GraphPattern::empty()
} else {
GraphPattern::filter(inner, expression)
}
}
GraphPattern::Union { inner } => GraphPattern::union_all(
inner
.into_iter()
.map(|e| Self::normalize_pattern(e, input_types)),
),
GraphPattern::Extend {
inner,
variable,
expression,
} => {
let inner = Self::normalize_pattern(*inner, input_types);
let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
let expression = Self::normalize_expression(expression, &inner_types);
let expression_type = infer_expression_type(&expression, &inner_types);
if expression_type == VariableType::UNDEF {
inner
} else {
GraphPattern::extend(inner, variable, expression)
}
}
GraphPattern::Minus {
left,
right,
algorithm,
} => GraphPattern::minus(
Self::normalize_pattern(*left, input_types),
Self::normalize_pattern(*right, input_types),
algorithm,
),
GraphPattern::Values {
variables,
bindings,
} => GraphPattern::values(variables, bindings),
GraphPattern::OrderBy { inner, expression } => {
let inner = Self::normalize_pattern(*inner, input_types);
let inner_types = infer_graph_pattern_types(&inner, input_types.clone());
GraphPattern::order_by(
inner,
expression
.into_iter()
.map(|e| match e {
OrderExpression::Asc(e) => {
OrderExpression::Asc(Self::normalize_expression(e, &inner_types))
}
OrderExpression::Desc(e) => {
OrderExpression::Desc(Self::normalize_expression(e, &inner_types))
}
})
.collect(),
)
}
GraphPattern::Project { inner, variables } => {
GraphPattern::project(Self::normalize_pattern(*inner, input_types), variables)
}
GraphPattern::Distinct { inner } => {
GraphPattern::distinct(Self::normalize_pattern(*inner, input_types))
}
GraphPattern::Reduced { inner } => {
GraphPattern::reduced(Self::normalize_pattern(*inner, input_types))
}
GraphPattern::Slice {
inner,
start,
length,
} => GraphPattern::slice(Self::normalize_pattern(*inner, input_types), start, length),
GraphPattern::Group {
inner,
variables,
aggregates,
} => {
GraphPattern::group(
Self::normalize_pattern(*inner, input_types),
variables,
aggregates,
)
}
GraphPattern::Service { .. } => {
pattern
}
}
}
fn normalize_expression(expression: Expression, types: &VariableTypes) -> Expression {
match expression {
Expression::NamedNode(node) => node.into(),
Expression::Literal(literal) => literal.into(),
Expression::Variable(variable) => variable.into(),
Expression::Or(inner) => Expression::or_all(
inner
.into_iter()
.map(|e| Self::normalize_expression(e, types)),
),
Expression::And(inner) => Expression::and_all(
inner
.into_iter()
.map(|e| Self::normalize_expression(e, types)),
),
Expression::Equal(left, right) => {
let left = Self::normalize_expression(*left, types);
let left_types = infer_expression_type(&left, types);
let right = Self::normalize_expression(*right, types);
let right_types = infer_expression_type(&right, types);
#[allow(unused_mut)]
let mut must_use_equal = left_types.literal && right_types.literal;
#[cfg(feature = "rdf-star")]
{
must_use_equal = must_use_equal || left_types.triple && right_types.triple;
}
if must_use_equal {
Expression::equal(left, right)
} else {
Expression::same_term(left, right)
}
}
Expression::SameTerm(left, right) => Expression::same_term(
Self::normalize_expression(*left, types),
Self::normalize_expression(*right, types),
),
Expression::Greater(left, right) => Expression::greater(
Self::normalize_expression(*left, types),
Self::normalize_expression(*right, types),
),
Expression::GreaterOrEqual(left, right) => Expression::greater_or_equal(
Self::normalize_expression(*left, types),
Self::normalize_expression(*right, types),
),
Expression::Less(left, right) => Expression::less(
Self::normalize_expression(*left, types),
Self::normalize_expression(*right, types),
),
Expression::LessOrEqual(left, right) => Expression::less_or_equal(
Self::normalize_expression(*left, types),
Self::normalize_expression(*right, types),
),
Expression::Add(left, right) => {
Self::normalize_expression(*left, types) + Self::normalize_expression(*right, types)
}
Expression::Subtract(left, right) => {
Self::normalize_expression(*left, types) - Self::normalize_expression(*right, types)
}
Expression::Multiply(left, right) => {
Self::normalize_expression(*left, types) * Self::normalize_expression(*right, types)
}
Expression::Divide(left, right) => {
Self::normalize_expression(*left, types) / Self::normalize_expression(*right, types)
}
Expression::UnaryPlus(inner) => {
Expression::unary_plus(Self::normalize_expression(*inner, types))
}
Expression::UnaryMinus(inner) => -Self::normalize_expression(*inner, types),
Expression::Not(inner) => !Self::normalize_expression(*inner, types),
Expression::Exists(inner) => Expression::exists(Self::normalize_pattern(*inner, types)),
Expression::Bound(variable) => {
let t = types.get(&variable);
if !t.undef {
true.into()
} else if t == VariableType::UNDEF {
false.into()
} else {
Expression::Bound(variable)
}
}
Expression::If(cond, then, els) => Expression::if_cond(
Self::normalize_expression(*cond, types),
Self::normalize_expression(*then, types),
Self::normalize_expression(*els, types),
),
Expression::Coalesce(inners) => Expression::coalesce(
inners
.into_iter()
.map(|e| Self::normalize_expression(e, types))
.collect(),
),
Expression::FunctionCall(name, args) => Expression::call(
name,
args.into_iter()
.map(|e| Self::normalize_expression(e, types))
.collect(),
),
}
}
fn push_filters(
pattern: GraphPattern,
mut filters: Vec<Expression>,
input_types: &VariableTypes,
) -> GraphPattern {
match pattern {
GraphPattern::QuadPattern { .. }
| GraphPattern::Path { .. }
| GraphPattern::Values { .. } => {
GraphPattern::filter(pattern, Expression::and_all(filters))
}
GraphPattern::Join {
left,
right,
algorithm,
} => {
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right_types = infer_graph_pattern_types(&right, input_types.clone());
let mut left_filters = Vec::new();
let mut right_filters = Vec::new();
let mut final_filters = Vec::new();
for filter in filters {
let push_left = are_all_expression_variables_bound(&filter, &left_types);
let push_right = are_all_expression_variables_bound(&filter, &right_types);
if push_left {
if push_right {
left_filters.push(filter.clone());
right_filters.push(filter);
} else {
left_filters.push(filter);
}
} else if push_right {
right_filters.push(filter);
} else {
final_filters.push(filter);
}
}
GraphPattern::filter(
GraphPattern::join(
Self::push_filters(*left, left_filters, input_types),
Self::push_filters(*right, right_filters, input_types),
algorithm,
),
Expression::and_all(final_filters),
)
}
#[cfg(feature = "sep-0006")]
GraphPattern::Lateral { left, right } => {
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let mut left_filters = Vec::new();
let mut right_filters = Vec::new();
for filter in filters {
let push_left = are_all_expression_variables_bound(&filter, &left_types);
if push_left {
left_filters.push(filter);
} else {
right_filters.push(filter);
}
}
let left = Self::push_filters(*left, left_filters, input_types);
let right = Self::push_filters(*right, right_filters, &left_types);
if let GraphPattern::Filter {
inner: right,
expression,
} = right
{
GraphPattern::filter(GraphPattern::lateral(left, *right), expression)
} else {
GraphPattern::lateral(left, right)
}
}
GraphPattern::LeftJoin {
left,
right,
expression,
algorithm,
} => {
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right_types = infer_graph_pattern_types(&right, input_types.clone());
let mut left_filters = Vec::new();
let mut right_filters = Vec::new();
let mut final_filters = Vec::new();
for filter in filters {
let push_left = are_all_expression_variables_bound(&filter, &left_types);
if push_left {
left_filters.push(filter);
} else {
final_filters.push(filter);
}
}
let expression = if expression.effective_boolean_value().is_none()
&& (are_all_expression_variables_bound(&expression, &right_types)
|| are_no_expression_variables_bound(&expression, &left_types))
{
right_filters.push(expression);
true.into()
} else {
expression
};
GraphPattern::filter(
GraphPattern::left_join(
Self::push_filters(*left, left_filters, input_types),
Self::push_filters(*right, right_filters, input_types),
expression,
algorithm,
),
Expression::and_all(final_filters),
)
}
GraphPattern::Minus {
left,
right,
algorithm,
} => GraphPattern::minus(
Self::push_filters(*left, filters, input_types),
Self::push_filters(*right, Vec::new(), input_types),
algorithm,
),
GraphPattern::Extend {
inner,
expression,
variable,
} => {
let mut inner_filters = Vec::new();
let mut final_filters = Vec::new();
for filter in filters {
let extend_variable_used =
filter.used_variables().into_iter().any(|v| *v == variable);
if extend_variable_used {
final_filters.push(filter);
} else {
inner_filters.push(filter);
}
}
GraphPattern::filter(
GraphPattern::extend(
Self::push_filters(*inner, inner_filters, input_types),
variable,
expression,
),
Expression::and_all(final_filters),
)
}
GraphPattern::Filter { inner, expression } => {
if let Expression::And(expressions) = expression {
filters.extend(expressions)
} else {
filters.push(expression)
};
Self::push_filters(*inner, filters, input_types)
}
GraphPattern::Union { inner } => GraphPattern::union_all(
inner
.into_iter()
.map(|c| Self::push_filters(c, filters.clone(), input_types)),
),
GraphPattern::Slice {
inner,
start,
length,
} => GraphPattern::filter(
GraphPattern::slice(
Self::push_filters(*inner, Vec::new(), input_types),
start,
length,
),
Expression::and_all(filters),
),
GraphPattern::Distinct { inner } => {
GraphPattern::distinct(Self::push_filters(*inner, filters, input_types))
}
GraphPattern::Reduced { inner } => {
GraphPattern::reduced(Self::push_filters(*inner, filters, input_types))
}
GraphPattern::Project { inner, variables } => {
GraphPattern::project(Self::push_filters(*inner, filters, input_types), variables)
}
GraphPattern::OrderBy { inner, expression } => {
GraphPattern::order_by(Self::push_filters(*inner, filters, input_types), expression)
}
GraphPattern::Service { .. } => {
GraphPattern::filter(pattern, Expression::and_all(filters))
}
GraphPattern::Group {
inner,
variables,
aggregates,
} => GraphPattern::filter(
GraphPattern::group(
Self::push_filters(*inner, Vec::new(), input_types),
variables,
aggregates,
),
Expression::and_all(filters),
),
}
}
fn reorder_joins(pattern: GraphPattern, input_types: &VariableTypes) -> GraphPattern {
match pattern {
GraphPattern::QuadPattern { .. }
| GraphPattern::Path { .. }
| GraphPattern::Values { .. } => pattern,
GraphPattern::Join { left, right, .. } => {
let mut to_reorder = Vec::new();
let mut todo = vec![*right, *left];
while let Some(e) = todo.pop() {
if let GraphPattern::Join { left, right, .. } = e {
todo.push(*right);
todo.push(*left);
} else {
to_reorder.push(e);
}
}
let to_reorder_types = to_reorder
.iter()
.map(|p| infer_graph_pattern_types(p, input_types.clone()))
.collect::<Vec<_>>();
let mut output_cartesian_product_joins = Vec::new();
let mut not_yet_reordered_ids = vec![true; to_reorder.len()];
while let Some(next_entry_id) = not_yet_reordered_ids
.iter()
.enumerate()
.filter(|(_, v)| **v)
.map(|(i, _)| i)
.min_by_key(|i| estimate_graph_pattern_size(&to_reorder[*i], input_types))
{
not_yet_reordered_ids[next_entry_id] = false; let mut output = to_reorder[next_entry_id].clone();
let mut output_types = to_reorder_types[next_entry_id].clone();
while let Some(next_id) = not_yet_reordered_ids
.iter()
.enumerate()
.filter(|(_, v)| **v)
.map(|(i, _)| i)
.filter(|i| {
has_common_variables(&output_types, &to_reorder_types[*i], input_types)
})
.min_by_key(|i| {
if cfg!(feature = "sep-0006")
&& is_fit_for_for_loop_join(
&to_reorder[*i],
input_types,
&output_types,
)
{
estimate_lateral_cost(
&output,
&output_types,
&to_reorder[*i],
input_types,
)
} else {
estimate_join_cost(
&output,
&to_reorder[*i],
&JoinAlgorithm::HashBuildLeftProbeRight {
keys: join_key_variables(
&output_types,
&to_reorder_types[*i],
input_types,
),
},
input_types,
)
}
})
{
not_yet_reordered_ids[next_id] = false; let next = to_reorder[next_id].clone();
#[cfg(feature = "sep-0006")]
{
output = if is_fit_for_for_loop_join(&next, input_types, &output_types)
{
GraphPattern::lateral(output, next)
} else {
GraphPattern::join(
output,
next,
JoinAlgorithm::HashBuildLeftProbeRight {
keys: join_key_variables(
&output_types,
&to_reorder_types[next_id],
input_types,
),
},
)
};
}
#[cfg(not(feature = "sep-0006"))]
{
output = GraphPattern::join(
output,
next,
JoinAlgorithm::HashBuildLeftProbeRight {
keys: join_key_variables(
&output_types,
&to_reorder_types[next_id],
input_types,
),
},
);
}
output_types.intersect_with(to_reorder_types[next_id].clone());
}
output_cartesian_product_joins.push(output);
}
output_cartesian_product_joins
.into_iter()
.reduce(|left, right| {
let keys = join_key_variables(
&infer_graph_pattern_types(&left, input_types.clone()),
&infer_graph_pattern_types(&right, input_types.clone()),
input_types,
);
if estimate_graph_pattern_size(&left, input_types)
<= estimate_graph_pattern_size(&right, input_types)
{
GraphPattern::join(
left,
right,
JoinAlgorithm::HashBuildLeftProbeRight { keys },
)
} else {
GraphPattern::join(
right,
left,
JoinAlgorithm::HashBuildLeftProbeRight { keys },
)
}
})
.unwrap()
}
#[cfg(feature = "sep-0006")]
GraphPattern::Lateral { left, right } => {
let left_types = infer_graph_pattern_types(&left, input_types.clone());
GraphPattern::lateral(
Self::reorder_joins(*left, input_types),
Self::reorder_joins(*right, &left_types),
)
}
GraphPattern::LeftJoin {
left,
right,
expression,
..
} => {
let left = Self::reorder_joins(*left, input_types);
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right = Self::reorder_joins(*right, input_types);
let right_types = infer_graph_pattern_types(&right, input_types.clone());
#[cfg(feature = "sep-0006")]
{
if is_fit_for_for_loop_join(&right, input_types, &left_types)
&& has_common_variables(&left_types, &right_types, input_types)
{
return GraphPattern::lateral(
left,
GraphPattern::left_join(
GraphPattern::empty_singleton(),
right,
expression,
LeftJoinAlgorithm::HashBuildRightProbeLeft { keys: Vec::new() },
),
);
}
}
GraphPattern::left_join(
left,
right,
expression,
LeftJoinAlgorithm::HashBuildRightProbeLeft {
keys: join_key_variables(&left_types, &right_types, input_types),
},
)
}
GraphPattern::Minus { left, right, .. } => {
let left = Self::reorder_joins(*left, input_types);
let left_types = infer_graph_pattern_types(&left, input_types.clone());
let right = Self::reorder_joins(*right, input_types);
let right_types = infer_graph_pattern_types(&right, input_types.clone());
GraphPattern::minus(
left,
right,
MinusAlgorithm::HashBuildRightProbeLeft {
keys: join_key_variables(&left_types, &right_types, input_types),
},
)
}
GraphPattern::Extend {
inner,
expression,
variable,
} => GraphPattern::extend(
Self::reorder_joins(*inner, input_types),
variable,
expression,
),
GraphPattern::Filter { inner, expression } => {
GraphPattern::filter(Self::reorder_joins(*inner, input_types), expression)
}
GraphPattern::Union { inner } => GraphPattern::union_all(
inner
.into_iter()
.map(|c| Self::reorder_joins(c, input_types)),
),
GraphPattern::Slice {
inner,
start,
length,
} => GraphPattern::slice(Self::reorder_joins(*inner, input_types), start, length),
GraphPattern::Distinct { inner } => {
GraphPattern::distinct(Self::reorder_joins(*inner, input_types))
}
GraphPattern::Reduced { inner } => {
GraphPattern::reduced(Self::reorder_joins(*inner, input_types))
}
GraphPattern::Project { inner, variables } => {
GraphPattern::project(Self::reorder_joins(*inner, input_types), variables)
}
GraphPattern::OrderBy { inner, expression } => {
GraphPattern::order_by(Self::reorder_joins(*inner, input_types), expression)
}
GraphPattern::Service { .. } => {
pattern
}
GraphPattern::Group {
inner,
variables,
aggregates,
} => GraphPattern::group(
Self::reorder_joins(*inner, input_types),
variables,
aggregates,
),
}
}
}
fn is_fit_for_for_loop_join(
pattern: &GraphPattern,
global_input_types: &VariableTypes,
entry_types: &VariableTypes,
) -> bool {
match pattern {
GraphPattern::Values { .. }
| GraphPattern::QuadPattern { .. }
| GraphPattern::Path { .. } => true,
#[cfg(feature = "sep-0006")]
GraphPattern::Lateral { left, right } => {
is_fit_for_for_loop_join(left, global_input_types, entry_types)
&& is_fit_for_for_loop_join(right, global_input_types, entry_types)
}
GraphPattern::LeftJoin {
left,
right,
expression,
..
} => {
if !is_fit_for_for_loop_join(left, global_input_types, entry_types) {
return false;
}
let mut left_types = infer_graph_pattern_types(left, global_input_types.clone());
let right_types = infer_graph_pattern_types(right, global_input_types.clone());
if right_types.iter().any(|(variable, t)| {
*t != VariableType::UNDEF
&& left_types.get(variable).undef
&& entry_types.get(variable) != VariableType::UNDEF
}) {
return false;
}
left_types.intersect_with(right_types);
is_expression_fit_for_for_loop_join(expression, &left_types, entry_types)
}
GraphPattern::Union { inner } => inner
.iter()
.all(|i| is_fit_for_for_loop_join(i, global_input_types, entry_types)),
GraphPattern::Filter { inner, expression } => {
is_fit_for_for_loop_join(inner, global_input_types, entry_types)
&& is_expression_fit_for_for_loop_join(
expression,
&infer_graph_pattern_types(inner, global_input_types.clone()),
entry_types,
)
}
GraphPattern::Extend {
inner,
expression,
variable,
} => {
is_fit_for_for_loop_join(inner, global_input_types, entry_types)
&& entry_types.get(variable) == VariableType::UNDEF
&& is_expression_fit_for_for_loop_join(
expression,
&infer_graph_pattern_types(inner, global_input_types.clone()),
entry_types,
)
}
GraphPattern::Join { .. }
| GraphPattern::Minus { .. }
| GraphPattern::Service { .. }
| GraphPattern::OrderBy { .. }
| GraphPattern::Distinct { .. }
| GraphPattern::Reduced { .. }
| GraphPattern::Slice { .. }
| GraphPattern::Project { .. }
| GraphPattern::Group { .. } => false,
}
}
fn are_all_expression_variables_bound(
expression: &Expression,
variable_types: &VariableTypes,
) -> bool {
expression
.used_variables()
.into_iter()
.all(|v| !variable_types.get(v).undef)
}
fn are_no_expression_variables_bound(
expression: &Expression,
variable_types: &VariableTypes,
) -> bool {
expression
.used_variables()
.into_iter()
.all(|v| variable_types.get(v) == VariableType::UNDEF)
}
fn is_expression_fit_for_for_loop_join(
expression: &Expression,
input_types: &VariableTypes,
entry_types: &VariableTypes,
) -> bool {
match expression {
Expression::NamedNode(_) | Expression::Literal(_) => true,
Expression::Variable(v) | Expression::Bound(v) => {
!input_types.get(v).undef || entry_types.get(v) == VariableType::UNDEF
}
Expression::Or(inner)
| Expression::And(inner)
| Expression::Coalesce(inner)
| Expression::FunctionCall(_, inner) => inner
.iter()
.all(|e| is_expression_fit_for_for_loop_join(e, input_types, entry_types)),
Expression::Equal(a, b)
| Expression::SameTerm(a, b)
| Expression::Greater(a, b)
| Expression::GreaterOrEqual(a, b)
| Expression::Less(a, b)
| Expression::LessOrEqual(a, b)
| Expression::Add(a, b)
| Expression::Subtract(a, b)
| Expression::Multiply(a, b)
| Expression::Divide(a, b) => {
is_expression_fit_for_for_loop_join(a, input_types, entry_types)
&& is_expression_fit_for_for_loop_join(b, input_types, entry_types)
}
Expression::UnaryPlus(e) | Expression::UnaryMinus(e) | Expression::Not(e) => {
is_expression_fit_for_for_loop_join(e, input_types, entry_types)
}
Expression::If(a, b, c) => {
is_expression_fit_for_for_loop_join(a, input_types, entry_types)
&& is_expression_fit_for_for_loop_join(b, input_types, entry_types)
&& is_expression_fit_for_for_loop_join(c, input_types, entry_types)
}
Expression::Exists(inner) => is_fit_for_for_loop_join(inner, input_types, entry_types),
}
}
fn has_common_variables(
left: &VariableTypes,
right: &VariableTypes,
input_types: &VariableTypes,
) -> bool {
left.iter().any(|(variable, left_type)| {
!left_type.undef && !right.get(variable).undef && input_types.get(variable).undef
})
}
fn join_key_variables(
left: &VariableTypes,
right: &VariableTypes,
input_types: &VariableTypes,
) -> Vec<Variable> {
left.iter()
.filter(|(variable, left_type)| {
!left_type.undef && !right.get(variable).undef && input_types.get(variable).undef
})
.map(|(variable, _)| variable.clone())
.collect()
}
fn estimate_graph_pattern_size(pattern: &GraphPattern, input_types: &VariableTypes) -> usize {
match pattern {
GraphPattern::Values { bindings, .. } => bindings.len(),
GraphPattern::QuadPattern {
subject,
predicate,
object,
..
} => estimate_triple_pattern_size(
is_term_pattern_bound(subject, input_types),
is_named_node_pattern_bound(predicate, input_types),
is_term_pattern_bound(object, input_types),
),
GraphPattern::Path {
subject,
path,
object,
..
} => estimate_path_size(
is_term_pattern_bound(subject, input_types),
path,
is_term_pattern_bound(object, input_types),
),
GraphPattern::Join {
left,
right,
algorithm,
} => estimate_join_cost(left, right, algorithm, input_types),
GraphPattern::LeftJoin {
left,
right,
algorithm,
..
} => match algorithm {
LeftJoinAlgorithm::HashBuildRightProbeLeft { keys } => {
let left_size = estimate_graph_pattern_size(left, input_types);
max(
left_size,
left_size
.saturating_mul(estimate_graph_pattern_size(
right,
&infer_graph_pattern_types(right, input_types.clone()),
))
.saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap())),
)
}
},
#[cfg(feature = "sep-0006")]
GraphPattern::Lateral { left, right } => estimate_lateral_cost(
left,
&infer_graph_pattern_types(left, input_types.clone()),
right,
input_types,
),
GraphPattern::Union { inner } => inner
.iter()
.map(|inner| estimate_graph_pattern_size(inner, input_types))
.fold(0, usize::saturating_add),
GraphPattern::Minus { left, .. } => estimate_graph_pattern_size(left, input_types),
GraphPattern::Filter { inner, .. }
| GraphPattern::Extend { inner, .. }
| GraphPattern::OrderBy { inner, .. }
| GraphPattern::Project { inner, .. }
| GraphPattern::Distinct { inner, .. }
| GraphPattern::Reduced { inner, .. }
| GraphPattern::Group { inner, .. }
| GraphPattern::Service { inner, .. } => estimate_graph_pattern_size(inner, input_types),
GraphPattern::Slice {
inner,
start,
length,
} => {
let inner = estimate_graph_pattern_size(inner, input_types);
if let Some(length) = length {
min(inner, *length - *start)
} else {
inner
}
}
}
}
fn estimate_join_cost(
left: &GraphPattern,
right: &GraphPattern,
algorithm: &JoinAlgorithm,
input_types: &VariableTypes,
) -> usize {
match algorithm {
JoinAlgorithm::HashBuildLeftProbeRight { keys } => {
estimate_graph_pattern_size(left, input_types)
.saturating_mul(estimate_graph_pattern_size(right, input_types))
.saturating_div(1_000_usize.saturating_pow(keys.len().try_into().unwrap()))
}
}
}
fn estimate_lateral_cost(
left: &GraphPattern,
left_types: &VariableTypes,
right: &GraphPattern,
input_types: &VariableTypes,
) -> usize {
estimate_graph_pattern_size(left, input_types)
.saturating_mul(estimate_graph_pattern_size(right, left_types))
}
fn estimate_triple_pattern_size(
subject_bound: bool,
predicate_bound: bool,
object_bound: bool,
) -> usize {
match (subject_bound, predicate_bound, object_bound) {
(true, true, true) => 1,
(true, true, false) => 10,
(true, false, true) => 2,
(false, true, true) => 10_000,
(true, false, false) => 100,
(false, false, false) => 1_000_000_000,
(false, true, false) => 1_000_000,
(false, false, true) => 100_000,
}
}
fn estimate_path_size(start_bound: bool, path: &PropertyPathExpression, end_bound: bool) -> usize {
match path {
PropertyPathExpression::NamedNode(_) => {
estimate_triple_pattern_size(start_bound, true, end_bound)
}
PropertyPathExpression::Reverse(p) => estimate_path_size(end_bound, p, start_bound),
PropertyPathExpression::Sequence(a, b) => {
min(
estimate_path_size(start_bound, a, false)
.saturating_mul(estimate_path_size(true, b, end_bound)),
estimate_path_size(start_bound, a, true)
.saturating_mul(estimate_path_size(false, b, end_bound)),
)
}
PropertyPathExpression::Alternative(a, b) => estimate_path_size(start_bound, a, end_bound)
.saturating_add(estimate_path_size(start_bound, b, end_bound)),
PropertyPathExpression::ZeroOrMore(p) => {
if start_bound && end_bound {
1
} else if start_bound || end_bound {
estimate_path_size(start_bound, p, end_bound).saturating_mul(1000)
} else {
1_000_000_000
}
}
PropertyPathExpression::OneOrMore(p) => {
if start_bound && end_bound {
1
} else {
estimate_path_size(start_bound, p, end_bound).saturating_mul(1000)
}
}
PropertyPathExpression::ZeroOrOne(p) => {
if start_bound && end_bound {
1
} else if start_bound || end_bound {
estimate_path_size(start_bound, p, end_bound)
} else {
1_000_000_000
}
}
PropertyPathExpression::NegatedPropertySet(_) => {
estimate_triple_pattern_size(start_bound, false, end_bound)
}
}
}
fn is_term_pattern_bound(pattern: &GroundTermPattern, input_types: &VariableTypes) -> bool {
match pattern {
GroundTermPattern::NamedNode(_) | GroundTermPattern::Literal(_) => true,
GroundTermPattern::Variable(v) => !input_types.get(v).undef,
#[cfg(feature = "rdf-star")]
GroundTermPattern::Triple(t) => {
is_term_pattern_bound(&t.subject, input_types)
&& is_named_node_pattern_bound(&t.predicate, input_types)
&& is_term_pattern_bound(&t.object, input_types)
}
}
}
fn is_named_node_pattern_bound(pattern: &NamedNodePattern, input_types: &VariableTypes) -> bool {
match pattern {
NamedNodePattern::NamedNode(_) => true,
NamedNodePattern::Variable(v) => !input_types.get(v).undef,
}
}