Skip to content

Commit 96a87bd

Browse files
committed
upmerge
2 parents ab95a9b + 74a6a8d commit 96a87bd

File tree

26 files changed

+525
-256
lines changed

26 files changed

+525
-256
lines changed

common/src/main/scala/org/apache/comet/CometConf.scala

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ object CometConf extends ShimCometConf {
282282
.checkValues(Set("zstd", "lz4", "snappy"))
283283
.createWithDefault("lz4")
284284

285-
val COMET_EXEC_SHUFFLE_COMPRESSION_LEVEL: ConfigEntry[Int] =
285+
val COMET_EXEC_SHUFFLE_COMPRESSION_ZSTD_LEVEL: ConfigEntry[Int] =
286286
conf(s"$COMET_EXEC_CONFIG_PREFIX.shuffle.compression.zstd.level")
287287
.doc("The compression level to use when compressing shuffle files with zstd.")
288288
.intConf
@@ -461,15 +461,6 @@ object CometConf extends ShimCometConf {
461461
.intConf
462462
.createWithDefault(8192)
463463

464-
val COMET_EXEC_MEMORY_FRACTION: ConfigEntry[Double] = conf("spark.comet.exec.memoryFraction")
465-
.doc(
466-
"The fraction of memory from Comet memory overhead that the native memory " +
467-
"manager can use for execution. The purpose of this config is to set aside memory for " +
468-
"untracked data structures, as well as imprecise size estimation during memory " +
469-
"acquisition.")
470-
.doubleConf
471-
.createWithDefault(0.7)
472-
473464
val COMET_PARQUET_ENABLE_DIRECT_BUFFER: ConfigEntry[Boolean] =
474465
conf("spark.comet.parquet.enable.directBuffer")
475466
.doc("Whether to use Java direct byte buffer when reading Parquet.")

common/src/main/scala/org/apache/comet/vector/NativeUtil.scala

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,6 @@ class NativeUtil {
163163
case numRows =>
164164
val cometVectors = importVector(arrays, schemas)
165165
Some(new ColumnarBatch(cometVectors.toArray, numRows.toInt))
166-
case flag =>
167-
throw new IllegalStateException(s"Invalid native flag: $flag")
168166
}
169167
}
170168

dev/diffs/3.4.3.diff

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ index b386d135da1..46449e3f3f1 100644
5353
<!--
5454
This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
5555
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
56-
index c595b50950b..6b60213e775 100644
56+
index c595b50950b..3abb6cb9441 100644
5757
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
5858
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
5959
@@ -102,7 +102,7 @@ class SparkSession private(
@@ -79,7 +79,7 @@ index c595b50950b..6b60213e775 100644
7979
}
8080

8181
+ private def loadCometExtension(sparkContext: SparkContext): Seq[String] = {
82-
+ if (sparkContext.getConf.getBoolean("spark.comet.enabled", false)) {
82+
+ if (sparkContext.getConf.getBoolean("spark.comet.enabled", isCometEnabled)) {
8383
+ Seq("org.apache.comet.CometSparkSessionExtensions")
8484
+ } else {
8585
+ Seq.empty
@@ -100,6 +100,19 @@ index c595b50950b..6b60213e775 100644
100100
try {
101101
val extensionConfClass = Utils.classForName(extensionConfClassName)
102102
val extensionConf = extensionConfClass.getConstructor().newInstance()
103+
@@ -1323,4 +1333,12 @@ object SparkSession extends Logging {
104+
}
105+
}
106+
}
107+
+
108+
+ /**
109+
+ * Whether Comet extension is enabled
110+
+ */
111+
+ def isCometEnabled: Boolean = {
112+
+ val v = System.getenv("ENABLE_COMET")
113+
+ v == null || v.toBoolean
114+
+ }
115+
}
103116
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
104117
index db587dd9868..aac7295a53d 100644
105118
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
@@ -957,6 +970,37 @@ index 525d97e4998..8a3e7457618 100644
957970
AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
958971
sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
959972
}
973+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
974+
index 48ad10992c5..51d1ee65422 100644
975+
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
976+
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
977+
@@ -221,6 +221,8 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper {
978+
withSession(extensions) { session =>
979+
session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true)
980+
session.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
981+
+ // https://github.com/apache/datafusion-comet/issues/1197
982+
+ session.conf.set("spark.comet.enabled", false)
983+
assert(session.sessionState.columnarRules.contains(
984+
MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
985+
import session.sqlContext.implicits._
986+
@@ -279,6 +281,8 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper {
987+
}
988+
withSession(extensions) { session =>
989+
session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, enableAQE)
990+
+ // https://github.com/apache/datafusion-comet/issues/1197
991+
+ session.conf.set("spark.comet.enabled", false)
992+
assert(session.sessionState.columnarRules.contains(
993+
MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
994+
import session.sqlContext.implicits._
995+
@@ -317,6 +321,8 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper {
996+
val session = SparkSession.builder()
997+
.master("local[1]")
998+
.config(COLUMN_BATCH_SIZE.key, 2)
999+
+ // https://github.com/apache/datafusion-comet/issues/1197
1000+
+ .config("spark.comet.enabled", false)
1001+
.withExtensions { extensions =>
1002+
extensions.injectColumnar(session =>
1003+
MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) }
9601004
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
9611005
index 75eabcb96f2..36e3318ad7e 100644
9621006
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -2746,7 +2790,7 @@ index abe606ad9c1..2d930b64cca 100644
27462790
val tblTargetName = "tbl_target"
27472791
val tblSourceQualified = s"default.$tblSourceName"
27482792
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
2749-
index dd55fcfe42c..aa9b0be8e68 100644
2793+
index dd55fcfe42c..2702f87c1f1 100644
27502794
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
27512795
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
27522796
@@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -2770,17 +2814,14 @@ index dd55fcfe42c..aa9b0be8e68 100644
27702814
}
27712815
}
27722816

2773-
@@ -242,6 +247,41 @@ private[sql] trait SQLTestUtilsBase
2817+
@@ -242,6 +247,38 @@ private[sql] trait SQLTestUtilsBase
27742818
protected override def _sqlContext: SQLContext = self.spark.sqlContext
27752819
}
27762820

27772821
+ /**
27782822
+ * Whether Comet extension is enabled
27792823
+ */
2780-
+ protected def isCometEnabled: Boolean = {
2781-
+ val v = System.getenv("ENABLE_COMET")
2782-
+ v != null && v.toBoolean
2783-
+ }
2824+
+ protected def isCometEnabled: Boolean = SparkSession.isCometEnabled
27842825
+
27852826
+ /**
27862827
+ * Whether to enable ansi mode This is only effective when
@@ -2812,7 +2853,7 @@ index dd55fcfe42c..aa9b0be8e68 100644
28122853
protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
28132854
SparkSession.setActiveSession(spark)
28142855
super.withSQLConf(pairs: _*)(f)
2815-
@@ -434,6 +474,8 @@ private[sql] trait SQLTestUtilsBase
2856+
@@ -434,6 +471,8 @@ private[sql] trait SQLTestUtilsBase
28162857
val schema = df.schema
28172858
val withoutFilters = df.queryExecution.executedPlan.transform {
28182859
case FilterExec(_, child) => child
@@ -2910,10 +2951,10 @@ index 1966e1e64fd..cde97a0aafe 100644
29102951
spark.sql(
29112952
"""
29122953
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
2913-
index 07361cfdce9..6673c141c9a 100644
2954+
index 07361cfdce9..e40c59a4207 100644
29142955
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
29152956
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
2916-
@@ -55,25 +55,53 @@ object TestHive
2957+
@@ -55,25 +55,52 @@ object TestHive
29172958
new SparkContext(
29182959
System.getProperty("spark.sql.test.master", "local[1]"),
29192960
"TestSQLContext",
@@ -2955,8 +2996,7 @@ index 07361cfdce9..6673c141c9a 100644
29552996
+ // ConstantPropagation etc.
29562997
+ .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
29572998
+
2958-
+ val v = System.getenv("ENABLE_COMET")
2959-
+ if (v != null && v.toBoolean) {
2999+
+ if (SparkSession.isCometEnabled) {
29603000
+ conf
29613001
+ .set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
29623002
+ .set("spark.comet.enabled", "true")

dev/diffs/3.5.1.diff

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,19 +53,19 @@ index c46ab7b8fce..13357e8c7a6 100644
5353
<!--
5454
This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
5555
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
56-
index 27ae10b3d59..064cbc252ea 100644
56+
index 27ae10b3d59..78e69902dfd 100644
5757
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
5858
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
5959
@@ -1353,6 +1353,14 @@ object SparkSession extends Logging {
6060
}
6161
}
6262

6363
+ private def loadCometExtension(sparkContext: SparkContext): Seq[String] = {
64-
+ if (sparkContext.getConf.getBoolean("spark.comet.enabled", false)) {
65-
+ Seq("org.apache.comet.CometSparkSessionExtensions")
66-
+ } else {
67-
+ Seq.empty
68-
+ }
64+
+ if (sparkContext.getConf.getBoolean("spark.comet.enabled", isCometEnabled)) {
65+
+ Seq("org.apache.comet.CometSparkSessionExtensions")
66+
+ } else {
67+
+ Seq.empty
68+
+ }
6969
+ }
7070
+
7171
/**
@@ -79,6 +79,19 @@ index 27ae10b3d59..064cbc252ea 100644
7979
extensionConfClassNames.foreach { extensionConfClassName =>
8080
try {
8181
val extensionConfClass = Utils.classForName(extensionConfClassName)
82+
@@ -1396,4 +1405,12 @@ object SparkSession extends Logging {
83+
}
84+
}
85+
}
86+
+
87+
+ /**
88+
+ * Whether Comet extension is enabled
89+
+ */
90+
+ def isCometEnabled: Boolean = {
91+
+ val v = System.getenv("ENABLE_COMET")
92+
+ v == null || v.toBoolean
93+
+ }
94+
}
8295
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
8396
index db587dd9868..aac7295a53d 100644
8497
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
@@ -959,6 +972,37 @@ index cfeccbdf648..803d8734cc4 100644
959972
AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "external sort") {
960973
sql("SELECT * FROM testData2 ORDER BY a ASC, b ASC").collect()
961974
}
975+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
976+
index 8b4ac474f87..3f79f20822f 100644
977+
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
978+
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
979+
@@ -223,6 +223,8 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
980+
withSession(extensions) { session =>
981+
session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true)
982+
session.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
983+
+ // https://github.com/apache/datafusion-comet/issues/1197
984+
+ session.conf.set("spark.comet.enabled", false)
985+
assert(session.sessionState.columnarRules.contains(
986+
MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
987+
import session.sqlContext.implicits._
988+
@@ -281,6 +283,8 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
989+
}
990+
withSession(extensions) { session =>
991+
session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, enableAQE)
992+
+ // https://github.com/apache/datafusion-comet/issues/1197
993+
+ session.conf.set("spark.comet.enabled", false)
994+
assert(session.sessionState.columnarRules.contains(
995+
MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
996+
import session.sqlContext.implicits._
997+
@@ -319,6 +323,8 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
998+
val session = SparkSession.builder()
999+
.master("local[1]")
1000+
.config(COLUMN_BATCH_SIZE.key, 2)
1001+
+ // https://github.com/apache/datafusion-comet/issues/1197
1002+
+ .config("spark.comet.enabled", false)
1003+
.withExtensions { extensions =>
1004+
extensions.injectColumnar(session =>
1005+
MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())) }
9621006
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
9631007
index fbc256b3396..0821999c7c2 100644
9641008
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -2731,7 +2775,7 @@ index abe606ad9c1..2d930b64cca 100644
27312775
val tblTargetName = "tbl_target"
27322776
val tblSourceQualified = s"default.$tblSourceName"
27332777
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
2734-
index dd55fcfe42c..aa9b0be8e68 100644
2778+
index dd55fcfe42c..2702f87c1f1 100644
27352779
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
27362780
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
27372781
@@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -2755,17 +2799,14 @@ index dd55fcfe42c..aa9b0be8e68 100644
27552799
}
27562800
}
27572801

2758-
@@ -242,6 +247,41 @@ private[sql] trait SQLTestUtilsBase
2802+
@@ -242,6 +247,38 @@ private[sql] trait SQLTestUtilsBase
27592803
protected override def _sqlContext: SQLContext = self.spark.sqlContext
27602804
}
27612805

27622806
+ /**
27632807
+ * Whether Comet extension is enabled
27642808
+ */
2765-
+ protected def isCometEnabled: Boolean = {
2766-
+ val v = System.getenv("ENABLE_COMET")
2767-
+ v != null && v.toBoolean
2768-
+ }
2809+
+ protected def isCometEnabled: Boolean = SparkSession.isCometEnabled
27692810
+
27702811
+ /**
27712812
+ * Whether to enable ansi mode This is only effective when
@@ -2797,7 +2838,7 @@ index dd55fcfe42c..aa9b0be8e68 100644
27972838
protected override def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
27982839
SparkSession.setActiveSession(spark)
27992840
super.withSQLConf(pairs: _*)(f)
2800-
@@ -434,6 +474,8 @@ private[sql] trait SQLTestUtilsBase
2841+
@@ -434,6 +471,8 @@ private[sql] trait SQLTestUtilsBase
28012842
val schema = df.schema
28022843
val withoutFilters = df.queryExecution.executedPlan.transform {
28032844
case FilterExec(_, child) => child
@@ -2895,10 +2936,10 @@ index dc8b184fcee..dd69a989d40 100644
28952936
spark.sql(
28962937
"""
28972938
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
2898-
index 9284b35fb3e..e8984be5ebc 100644
2939+
index 9284b35fb3e..2a0269bdc16 100644
28992940
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
29002941
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
2901-
@@ -53,25 +53,53 @@ object TestHive
2942+
@@ -53,25 +53,52 @@ object TestHive
29022943
new SparkContext(
29032944
System.getProperty("spark.sql.test.master", "local[1]"),
29042945
"TestSQLContext",
@@ -2940,8 +2981,7 @@ index 9284b35fb3e..e8984be5ebc 100644
29402981
+ // ConstantPropagation etc.
29412982
+ .set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key, ConvertToLocalRelation.ruleName)
29422983
+
2943-
+ val v = System.getenv("ENABLE_COMET")
2944-
+ if (v != null && v.toBoolean) {
2984+
+ if (SparkSession.isCometEnabled) {
29452985
+ conf
29462986
+ .set("spark.sql.extensions", "org.apache.comet.CometSparkSessionExtensions")
29472987
+ .set("spark.comet.enabled", "true")

0 commit comments

Comments
 (0)