diff --git a/benchmarks/README.md b/benchmarks/README.md index 0b71628b2db12..b0aa4277e687d 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -157,7 +157,7 @@ To get data in `DATA_DIR` for TPCDS, please follow instructions in `./benchmarks DATA_DIR=../../datafusion-benchmarks/tpcds/data/sf1/ ./benchmarks/compare_tpcds.sh main mybranch ``` -Alternatively you can compare manually followng the example velor +Alternatively, you can compare manually following the example below ```shell git checkout main @@ -240,6 +240,21 @@ Benchmark tpch_mem.json └──────────────┴──────────────┴──────────────┴───────────────┘ ``` +## Comparing performance of main and a PR + +### TPCDS + +Considering you already have TPCDS data locally + +```shell +export DATA_DIR=../../datafusion-benchmarks/tpcds/data/sf1/ +export PR_NUMBER=19464 +gh pr checkout $PR_NUMBER --repo apache/datafusion -b pr-$PR_NUMBER +git checkout main +git pull +./benchmarks/compare_tpcds.sh main pr-$PR_NUMBER +``` + ### Running Benchmarks Manually Assuming data is in the `data` directory, the `tpch` benchmark can be run with a command like this: diff --git a/benchmarks/bench.sh b/benchmarks/bench.sh index d5fa52d7f00ee..6679405623d06 100755 --- a/benchmarks/bench.sh +++ b/benchmarks/bench.sh @@ -684,7 +684,7 @@ run_tpch_mem() { # Runs the tpcds benchmark run_tpcds() { - TPCDS_DIR="${DATA_DIR}/tpcds_sf1" + TPCDS_DIR="${DATA_DIR}" # Check if TPCDS data directory and representative file exists if [ ! -f "${TPCDS_DIR}/web_site.parquet" ]; then diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 26d7fc99cb17c..28a07ad761012 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -24,10 +24,10 @@ use crate::sort_properties::{ExprProperties, SortProperties}; use crate::udf_eq::UdfEq; use crate::{ColumnarValue, Documentation, Expr, Signature}; use arrow::datatypes::{DataType, Field, FieldRef}; +#[cfg(debug_assertions)] +use datafusion_common::assert_or_internal_err; use datafusion_common::config::ConfigOptions; -use datafusion_common::{ - ExprSchema, Result, ScalarValue, assert_or_internal_err, not_impl_err, -}; +use datafusion_common::{ExprSchema, Result, ScalarValue, not_impl_err}; use datafusion_expr_common::dyn_eq::{DynEq, DynHash}; use datafusion_expr_common::interval_arithmetic::Interval; use std::any::Any;