/// Simplify an expression by replacing subexpressions based on a guarantee: /// a boolean expression which is guaranteed to evaluate to `true`. For example, this is /// used to remove redundant function calls from a filter expression or to replace a /// reference to a constant-value field with a literal. ARROW_EXPORT Result<Expression> SimplifyWithGuarantee(Expression, const Expression& guaranteed_true_predicate);
/// \brief Extract an equality from an expression. /// /// Recognizes expressions of the form: /// equal(a, 2) /// is_null(a) std::optional<std::pair<FieldRef, Datum>> ExtractOneFieldValue(const Expression& guarantee);
// Conjunction members which are represented in known_values are erased from // conjunction_members Status ExtractKnownFieldValues(std::vector<Expression>* conjunction_members, KnownFieldValues* known_values){ // filter out consumed conjunction members, leaving only unconsumed *conjunction_members = arrow::internal::FilterVector( std::move(*conjunction_members), [known_values](const Expression& guarantee) -> bool { if (auto known_value = ExtractOneFieldValue(guarantee)) { known_values->map.insert(std::move(*known_value)); returnfalse; } returntrue; });
Result<Expression> ReplaceFieldsWithKnownValues(const KnownFieldValues& known_values, Expression expr){ if (!expr.IsBound()) { return Status::Invalid( "ReplaceFieldsWithKnownValues called on an unbound Expression"); }
returnModifyExpression( std::move(expr), [&known_values](Expression expr) -> Result<Expression> { if (auto ref = expr.field_ref()) { auto it = known_values.map.find(*ref); if (it != known_values.map.end()) { Datum lit = it->second; if (lit.type()->Equals(*expr.type())) returnliteral(std::move(lit)); // type mismatch, try casting the known value to the correct type
if (expr.type()->id() == Type::DICTIONARY && lit.type()->id() != Type::DICTIONARY) { // the known value must be dictionary encoded
/// Modify an Expression with pre-order and post-order visitation. /// `pre` will be invoked on each Expression. `pre` will visit Calls before their /// arguments, `post_call` will visit Calls (and no other Expressions) after their /// arguments. Visitors should return the Identical expression to indicate no change; this /// will prevent unnecessary construction in the common case where a modification is not /// possible/necessary/... /// /// If an argument was modified, `post_call` visits a reconstructed Call with the modified /// arguments but also receives a pointer to the unmodified Expression as a second /// argument. If no arguments were modified the unmodified Expression* will be nullptr. template <typename PreVisit, typename PostVisitCall> Result<Expression> ModifyExpression(Expression expr, const PreVisit& pre, const PostVisitCall& post_call){ ARROW_ASSIGN_OR_RAISE(expr, Result<Expression>(pre(std::move(expr))));
for (size_t i = 0; i < call->arguments.size(); ++i) { ARROW_ASSIGN_OR_RAISE(auto modified_argument, ModifyExpression(call->arguments[i], pre, post_call));
if (Identical(modified_argument, call->arguments[i])) { continue; }
if (!at_least_one_modified) { modified_arguments = call->arguments; at_least_one_modified = true; }
if (at_least_one_modified) { // reconstruct the call expression with the modified arguments auto modified_call = *call; modified_call.arguments = std::move(modified_arguments); returnpost_call(Expression(std::move(modified_call)), &expr); }
returnpost_call(std::move(expr), NULLPTR); }
is_valid: Nullablity
这段逻辑非常弱智:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
for (constauto& guarantee : conjunction_members) { if (!guarantee.call()) continue; // ...
if (guarantee.call()->function_name == "is_valid") { ARROW_ASSIGN_OR_RAISE( auto simplified, SimplifyIsValidGuarantee(std::move(expr), *CallNotNull(guarantee)));
/// \brief Simplify an expression given a guarantee, if the guarantee /// is is_valid(). Result<Expression> SimplifyIsValidGuarantee(Expression expr, const Expression::Call& guarantee){ if (guarantee.function_name != "is_valid") return expr;
// An inequality comparison which a target Expression is known to satisfy. If nullable, // the target may evaluate to null in addition to values satisfying the comparison. structInequality { // The inequality type Comparison::type cmp; // The LHS of the inequality const FieldRef& target; // The RHS of the inequality const Datum& bound; // Whether target can be null bool nullable; // Extract an Inequality if possible, derived from "less", // "greater", "less_equal", and "greater_equal" expressions, // possibly disjuncted with an "is_null" Expression. // cmp(a, 2) // cmp(a, 2) or is_null(a) static std::optional<Inequality> ExtractOne(const Expression& guarantee) };
structComparison { enumtype { NA = 0, EQUAL = 1, LESS = 2, GREATER = 4, NOT_EQUAL = LESS | GREATER, LESS_EQUAL = LESS | EQUAL, GREATER_EQUAL = GREATER | EQUAL, }; };
static std::optional<Inequality> ExtractOneFromComparison(const Expression& guarantee){ auto call = guarantee.call(); if (!call) return std::nullopt;
if (auto cmp = Comparison::Get(call->function_name)) { // not_equal comparisons are not very usable as guarantees if (*cmp == Comparison::NOT_EQUAL) return std::nullopt;
auto target = call->arguments[0].field_ref(); if (!target) return std::nullopt;
auto bound = call->arguments[1].literal(); if (!bound) return std::nullopt; if (!bound->is_scalar()) return std::nullopt;
/// The given expression simplifies to `value` if the inequality /// target is not nullable. Otherwise, it simplifies to either a /// call to true_unless_null or !true_unless_null. Result<Expression> simplified_to(const Expression& bound_target, bool value)const{ if (!nullable) returnliteral(value);
ExecContext exec_context;
// Data may be null, so comparison will yield `value` - or null IFF the data was null // // true_unless_null is cheap; it purely reuses the validity bitmap for the values // buffer. Inversion is less cheap but we expect that term never to be evaluated // since invert(true_unless_null(x)) is not satisfiable. Expression::Call call; call.function_name = "true_unless_null"; call.arguments = {bound_target}; ARROW_ASSIGN_OR_RAISE( auto true_unless_null, BindNonRecursive(std::move(call), /*insert_implicit_casts=*/false, &exec_context)); if (value) return true_unless_null;