diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala index a513aa1a77..911de69cbf 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometArithmeticBenchmark.scala @@ -44,7 +44,7 @@ object CometArithmeticBenchmark extends CometBenchmarkBase { val name = s"Binary op ${dataType.sql}, dictionary = $useDictionary" val query = s"SELECT c1 ${op.sig} c2 FROM $table" - runExpressionBenchmark(name, values, query) + runExpressionBenchmark(name, values, query, isAnsiMode = false) } } } @@ -64,7 +64,7 @@ object CometArithmeticBenchmark extends CometBenchmarkBase { val name = s"Binary op ${dataType.sql}, dictionary = $useDictionary" val query = s"SELECT c1 ${op.sig} c2 FROM $table" - runExpressionBenchmark(name, values, query) + runExpressionBenchmark(name, values, query, isAnsiMode = false) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala index 8d56cefa05..b9cf70e4c4 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometBenchmarkBase.scala @@ -23,7 +23,7 @@ import java.io.File import java.nio.charset.StandardCharsets import java.util.Base64 -import scala.util.Random +import scala.util.{Random, Try} import org.apache.parquet.crypto.DecryptionPropertiesFactory import org.apache.parquet.crypto.keytools.{KeyToolkit, PropertiesDrivenCryptoFactory} @@ -88,28 +88,6 @@ trait CometBenchmarkBase extends SqlBasedBenchmark { } } - /** Runs function `f` with Comet on and off. */ - final def runWithComet(name: String, cardinality: Long)(f: => Unit): Unit = { - val benchmark = new Benchmark(name, cardinality, output = output) - - benchmark.addCase(s"$name - Spark ") { _ => - withSQLConf(CometConf.COMET_ENABLED.key -> "false") { - f - } - } - - benchmark.addCase(s"$name - Comet") { _ => - withSQLConf( - CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_EXEC_ENABLED.key -> "true", - SQLConf.ANSI_ENABLED.key -> "false") { - f - } - } - - benchmark.run() - } - /** * Runs an expression benchmark with standard cases: Spark, Comet (Scan), Comet (Scan + Exec). * This provides a consistent benchmark structure for expression evaluation. @@ -127,37 +105,53 @@ trait CometBenchmarkBase extends SqlBasedBenchmark { name: String, cardinality: Long, query: String, + isAnsiMode: Boolean, extraCometConfigs: Map[String, String] = Map.empty): Unit = { + val benchmark = new Benchmark(name, cardinality, output = output) benchmark.addCase("Spark") { _ => - withSQLConf(CometConf.COMET_ENABLED.key -> "false") { - spark.sql(query).noop() + withSQLConf( + CometConf.COMET_ENABLED.key -> "false", + SQLConf.ANSI_ENABLED.key -> isAnsiMode.toString) { + runSparkCommand(spark, query, isAnsiMode) } } benchmark.addCase("Comet (Scan)") { _ => withSQLConf( CometConf.COMET_ENABLED.key -> "true", - CometConf.COMET_EXEC_ENABLED.key -> "false") { - spark.sql(query).noop() + CometConf.COMET_EXEC_ENABLED.key -> "false", + SQLConf.ANSI_ENABLED.key -> isAnsiMode.toString) { + runSparkCommand(spark, query, isAnsiMode) } } val cometExecConfigs = Map( CometConf.COMET_ENABLED.key -> "true", CometConf.COMET_EXEC_ENABLED.key -> "true", - "spark.sql.optimizer.constantFolding.enabled" -> "false") ++ extraCometConfigs + "spark.sql.optimizer.constantFolding.enabled" -> "false", + SQLConf.ANSI_ENABLED.key -> isAnsiMode.toString) ++ extraCometConfigs benchmark.addCase("Comet (Scan + Exec)") { _ => withSQLConf(cometExecConfigs.toSeq: _*) { - spark.sql(query).noop() + runSparkCommand(spark, query, isAnsiMode) } } benchmark.run() } + private def runSparkCommand(spark: SparkSession, query: String, isANSIMode: Boolean): Unit = { + if (isANSIMode) { + Try { + spark.sql(query).noop() + } + } else { + spark.sql(query).noop() + } + } + protected def prepareTable(dir: File, df: DataFrame, partition: Option[String] = None): Unit = { val testDf = if (partition.isDefined) { df.write.partitionBy(partition.get) diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastBenchmark.scala index 975abd632f..b4a3c13ad0 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastBenchmark.scala @@ -84,11 +84,9 @@ object CometCastBenchmark extends CometBenchmarkBase { val functionSQL = castExprSQL(toDataType, "value") val query = s"SELECT $functionSQL FROM parquetV1Table" val name = - s"Cast function to : ${toDataType} , ansi mode enabled : ${isAnsiMode}" + s"Cast function from : ${fromDataType} to : ${toDataType} , ansi mode enabled : ${isAnsiMode}" - val extraConfigs = Map(SQLConf.ANSI_ENABLED.key -> isAnsiMode.toString) - - runExpressionBenchmark(name, values, query, extraConfigs) + runExpressionBenchmark(name, values, query, isAnsiMode) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala index c5eb9ea390..b710552d44 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometConditionalExpressionBenchmark.scala @@ -35,7 +35,7 @@ object CometConditionalExpressionBenchmark extends CometBenchmarkBase { val query = "select CASE WHEN c1 < 0 THEN '<0' WHEN c1 = 0 THEN '=0' ELSE '>0' END from parquetV1Table" - runExpressionBenchmark("Case When Expr", values, query) + runExpressionBenchmark("Case When Expr", values, query, isAnsiMode = false) } } } @@ -47,7 +47,7 @@ object CometConditionalExpressionBenchmark extends CometBenchmarkBase { val query = "select IF (c1 < 0, '<0', '>=0') from parquetV1Table" - runExpressionBenchmark("If Expr", values, query) + runExpressionBenchmark("If Expr", values, query, isAnsiMode = false) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala index 47eff41bbd..abcae06e7d 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometDatetimeExpressionBenchmark.scala @@ -41,7 +41,7 @@ object CometDatetimeExpressionBenchmark extends CometBenchmarkBase { val isDictionary = if (useDictionary) "(Dictionary)" else "" val name = s"Date Truncate $isDictionary - $level" val query = s"select trunc(dt, '$level') from parquetV1Table" - runExpressionBenchmark(name, values, query) + runExpressionBenchmark(name, values, query, isAnsiMode = false) } } } @@ -70,7 +70,7 @@ object CometDatetimeExpressionBenchmark extends CometBenchmarkBase { val isDictionary = if (useDictionary) "(Dictionary)" else "" val name = s"Timestamp Truncate $isDictionary - $level" val query = s"select date_trunc('$level', ts) from parquetV1Table" - runExpressionBenchmark(name, values, query) + runExpressionBenchmark(name, values, query, isAnsiMode = false) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala index 5b4741ba68..f52d4900d2 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometJsonExpressionBenchmark.scala @@ -120,7 +120,12 @@ object CometJsonExpressionBenchmark extends CometBenchmarkBase { CometConf.getExprAllowIncompatConfigKey( classOf[JsonToStructs]) -> "true") ++ config.extraCometConfigs - runExpressionBenchmark(config.name, values, config.query, extraConfigs) + runExpressionBenchmark( + config.name, + values, + config.query, + isAnsiMode = false, + extraConfigs) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala index 6506c5665d..db68e2db40 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometPredicateExpressionBenchmark.scala @@ -38,7 +38,7 @@ object CometPredicateExpressionBenchmark extends CometBenchmarkBase { val query = "select * from parquetV1Table where c1 in ('positive', 'zero')" - runExpressionBenchmark("in Expr", values, query) + runExpressionBenchmark("in Expr", values, query, isAnsiMode = false) } } } diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala index 41eabb8513..7f27cd593b 100644 --- a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala +++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometStringExpressionBenchmark.scala @@ -55,7 +55,12 @@ object CometStringExpressionBenchmark extends CometBenchmarkBase { val extraConfigs = Map(CometConf.COMET_CASE_CONVERSION_ENABLED.key -> "true") ++ config.extraCometConfigs - runExpressionBenchmark(config.name, values, config.query, extraConfigs) + runExpressionBenchmark( + config.name, + values, + config.query, + isAnsiMode = false, + extraConfigs) } } }