[SPARK-41979][SQL] Add missing dots for error messages in error classes

itholic · HyukjinKwon · commit 04d72659a88c · 2023-01-23T17:17:06.000+09:00
### What changes were proposed in this pull request? This PR proposes to add missing dots for error messages in error classes. This PR also fixes related tests, and includes a minor error message fix. ### Why are the changes needed? To keep consistency across all error messages. Error messages should end with a dot. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*` Closes apache#39505 from itholic/missing_dots. Authored-by: itholic <haejoon.lee@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
@@ -194,7 +194,7 @@ class SparkThrowableSuite extends SparkFunSuite {
         Map("objectName" -> "`foo`", "proposal" -> "`bar`, `baz`")
       ) ==
       "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with " +
-        "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]"
+        "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]."
     )
 
     assert(
@@ -206,7 +206,7 @@ class SparkThrowableSuite extends SparkFunSuite {
         ""
       ) ==
       "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with " +
-        "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]"
+        "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]."
     )
   }
 
diff --git a/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala b/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
@@ -113,11 +113,12 @@ class GraphiteSinkSuite extends SparkFunSuite {
     props.put("protocol", "http")
     val registry = new MetricRegistry
 
-    val e = intercept[SparkException] {
-      new GraphiteSink(props, registry)
-    }
-    assert(e.getErrorClass === "GRAPHITE_SINK_INVALID_PROTOCOL")
-    assert(e.getMessage ===
-      "[GRAPHITE_SINK_INVALID_PROTOCOL] Invalid Graphite protocol: http")
+    checkError(
+      exception = intercept[SparkException] {
+        new GraphiteSink(props, registry)
+      },
+      errorClass = "GRAPHITE_SINK_INVALID_PROTOCOL",
+      parameters = Map("protocol" -> "http")
+    )
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -153,11 +153,16 @@ class DataTypeSuite extends SparkFunSuite {
     val right = StructType(
       StructField("b", LongType) :: Nil)
 
-    val message = intercept[SparkException] {
-      left.merge(right)
-    }.getMessage
-    assert(message.equals("Failed to merge fields 'b' and 'b'. " +
-      "Failed to merge incompatible data types float and bigint"))
+    checkError(
+      exception = intercept[SparkException] {
+        left.merge(right)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2123",
+      parameters = Map(
+        "leftName" -> "b",
+        "rightName" -> "b",
+        "message" -> "Failed to merge incompatible data types float and bigint.")
+    )
   }
 
   test("existsRecursively") {
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
@@ -163,7 +163,7 @@ org.apache.spark.SparkDateTimeException
   "sqlState" : "22007",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31"
+    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
   }
 }
 
@@ -179,7 +179,7 @@ org.apache.spark.SparkDateTimeException
   "sqlState" : "22007",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31"
+    "message" : "Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31."
   }
 }
 
@@ -211,7 +211,7 @@ org.apache.spark.SparkDateTimeException
   "sqlState" : "22007",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31"
+    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -29,7 +29,7 @@ import scala.util.Random
 
 import org.scalatest.matchers.should.Matchers._
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkIllegalArgumentException}
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
@@ -1142,15 +1142,21 @@ class DataFrameSuite extends QueryTest
     val onlyPercentiles = person2.summary("0.1%", "99.9%")
     assert(onlyPercentiles.count() === 2)
 
-    val fooE = intercept[IllegalArgumentException] {
-      person2.summary("foo")
-    }
-    assert(fooE.getMessage === "foo is not a recognised statistic")
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        person2.summary("foo")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2114",
+      parameters = Map("stats" -> "foo")
+    )
 
-    val parseE = intercept[IllegalArgumentException] {
-      person2.summary("foo%")
-    }
-    assert(parseE.getMessage === "Unable to parse foo% as a percentile")
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        person2.summary("foo%")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2113",
+      parameters = Map("stats" -> "foo%")
+    )
   }
 
   test("apply on query results (SPARK-5462)") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -584,8 +584,17 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         val e = intercept[AnalysisException] {
           sql(s"SELECT * FROM test_view")
         }
-        assert(e.getMessage.contains("re-create the view by running: CREATE OR REPLACE"))
-        val ddl = e.getMessage.split(": ").last
+        checkError(
+          exception = e,
+          errorClass = "_LEGACY_ERROR_TEMP_1176",
+          parameters = Map(
+            "viewName" -> "`spark_catalog`.`default`.`test_view`",
+            "viewDDL" ->
+              "CREATE OR REPLACE VIEW spark_catalog.default.test_view  AS SELECT * FROM t",
+            "actualCols" -> "[]", "colName" -> "col_j",
+            "expectedNum" -> "1")
+        )
+        val ddl = e.getMessageParameters.get("viewDDL")
         sql(ddl)
         checkAnswer(sql("select * FROM test_view"), Row(1))
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.fs.viewfs.ViewFileSystem
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, when}
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkRuntimeException}
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.util._
@@ -133,10 +133,13 @@ class FileIndexSuite extends SharedSparkSession {
       val schema = StructType(Seq(StructField("a", IntegerType, false)))
       withSQLConf(SQLConf.VALIDATE_PARTITION_COLUMNS.key -> "true") {
         val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, Some(schema))
-        val msg = intercept[RuntimeException] {
-          fileIndex.partitionSpec()
-        }.getMessage
-        assert(msg == "Failed to cast value `foo` to `IntegerType` for partition column `a`")
+        checkError(
+          exception = intercept[SparkRuntimeException] {
+            fileIndex.partitionSpec()
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_2058",
+          parameters = Map("value" -> "foo", "dataType" -> "IntegerType", "columnName" -> "a")
+        )
       }
 
       withSQLConf(SQLConf.VALIDATE_PARTITION_COLUMNS.key -> "false") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -1001,9 +1001,29 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
       val col = spark.read.parquet(file).schema.fields.filter(_.name == "a")
       assert(col.length == 1)
       if (col(0).dataType == StringType) {
-        assert(errMsg.contains("Column: [a], Expected: int, Found: BINARY"))
+        checkError(
+          exception = e.getCause.asInstanceOf[SparkException],
+          errorClass = "_LEGACY_ERROR_TEMP_2063",
+          parameters = Map(
+            "filePath" ->
+              s".*${dir.getCanonicalPath}.*",
+            "column" -> "\\[a\\]",
+            "logicalType" -> "int",
+            "physicalType" -> "BINARY"),
+          matchPVals = true
+        )
       } else {
-        assert(errMsg.endsWith("Column: [a], Expected: string, Found: INT32"))
+        checkError(
+          exception = e.getCause.asInstanceOf[SparkException],
+          errorClass = "_LEGACY_ERROR_TEMP_2063",
+          parameters = Map(
+            "filePath" ->
+              s".*${dir.getCanonicalPath}.*",
+            "column" -> "\\[a\\]",
+            "logicalType" -> "string",
+            "physicalType" -> "INT32"),
+          matchPVals = true
+        )
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -535,10 +535,13 @@ class HashedRelationSuite extends SharedSparkSession {
       buffer.append(keyIterator.next().getLong(0))
     }
     // attempt an illegal next() call
-    val caught = intercept[SparkException] {
-      keyIterator.next()
-    }
-    assert(caught.getLocalizedMessage === "End of the iterator")
+    checkError(
+      exception = intercept[SparkException] {
+        keyIterator.next()
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2104",
+      parameters = Map.empty
+    )
     assert(buffer.sortWith(_ < _) === randomArray)
     buffer.clear()
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
@@ -261,7 +261,7 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
     }
   }
 
-  testQuietly("foreach with error not caused by ForeachWriter") {
+  test("foreach with error not caused by ForeachWriter") {
     withTempDir { checkpointDir =>
       val input = MemoryStream[Int]
       val query = input.toDS().repartition(1).map(_ / 0).writeStream
@@ -283,9 +283,11 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
       assert(allEvents(0)(0) === ForeachWriterSuite.Open(partition = 0, version = 0))
       // `close` should be called with the error
       val errorEvent = allEvents(0)(1).asInstanceOf[ForeachWriterSuite.Close]
-      assert(errorEvent.error.get.isInstanceOf[SparkException])
-      assert(errorEvent.error.get.getMessage ===
-        "Foreach writer has been aborted due to a task failure")
+      checkError(
+        exception = errorEvent.error.get.asInstanceOf[SparkException],
+        errorClass = "_LEGACY_ERROR_TEMP_2256",
+        parameters = Map.empty
+      )
     }
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -641,7 +641,7 @@ class CliSuite extends SparkFunSuite {
 
   test("SPARK-37694: delete [jar|file|archive] shall use spark sql processor") {
     runCliWithin(2.minute, errorResponses = Seq("ParseException"))(
-      "delete jar dummy.jar;" -> "Syntax error at or near 'jar': missing 'FROM'(line 1, pos 7)")
+      "delete jar dummy.jar;" -> "Syntax error at or near 'jar': missing 'FROM'.(line 1, pos 7)")
   }
 
   test("SPARK-37906: Spark SQL CLI should not pass final comment") {

Original file line number	Diff line number	Diff line change
`@@ -194,7 +194,7 @@ class SparkThrowableSuite extends SparkFunSuite {`
`194`	`194`	Map("objectName" -> "`foo`", "proposal" -> "`bar`, `baz`")
`195`	`195`	`) ==`
`196`	`196`	`"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with " +`
`197`		- "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]"
	`197`	+ "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]."
`198`	`198`	`)`
`199`	`199`
`200`	`200`	`assert(`
`@@ -206,7 +206,7 @@ class SparkThrowableSuite extends SparkFunSuite {`
`206`	`206`	`""`
`207`	`207`	`) ==`
`208`	`208`	`"[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with " +`
`209`		- "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]"
	`209`	+ "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]."
`210`	`210`	`)`
`211`	`211`	`}`
`212`	`212`
Original file line number	Diff line number	Diff line change
`@@ -163,7 +163,7 @@ org.apache.spark.SparkDateTimeException`
`163`	`163`	`"sqlState" : "22007",`
`164`	`164`	`"messageParameters" : {`
`165`	`165`	`"ansiConfig" : "\"spark.sql.ansi.enabled\"",`
`166`		`- "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31"`
	`166`	`+ "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."`
`167`	`167`	`}`
`168`	`168`	`}`
`169`	`169`
`@@ -179,7 +179,7 @@ org.apache.spark.SparkDateTimeException`
`179`	`179`	`"sqlState" : "22007",`
`180`	`180`	`"messageParameters" : {`
`181`	`181`	`"ansiConfig" : "\"spark.sql.ansi.enabled\"",`
`182`		`- "message" : "Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31"`
	`182`	`+ "message" : "Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31."`
`183`	`183`	`}`
`184`	`184`	`}`
`185`	`185`
`@@ -211,7 +211,7 @@ org.apache.spark.SparkDateTimeException`
`211`	`211`	`"sqlState" : "22007",`
`212`	`212`	`"messageParameters" : {`
`213`	`213`	`"ansiConfig" : "\"spark.sql.ansi.enabled\"",`
`214`		`- "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31"`
	`214`	`+ "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."`
`215`	`215`	`}`
`216`	`216`	`}`
`217`	`217`
Original file line number	Diff line number	Diff line change
`@@ -261,7 +261,7 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA`
`261`	`261`	`}`
`262`	`262`	`}`
`263`	`263`
`264`		`- testQuietly("foreach with error not caused by ForeachWriter") {`
	`264`	`+ test("foreach with error not caused by ForeachWriter") {`
`265`	`265`	`withTempDir { checkpointDir =>`
`266`	`266`	`val input = MemoryStream[Int]`
`267`	`267`	`val query = input.toDS().repartition(1).map(_ / 0).writeStream`
`@@ -283,9 +283,11 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA`
`283`	`283`	`assert(allEvents(0)(0) === ForeachWriterSuite.Open(partition = 0, version = 0))`
`284`	`284`	// `close` should be called with the error
`285`	`285`	`val errorEvent = allEvents(0)(1).asInstanceOf[ForeachWriterSuite.Close]`
`286`		`- assert(errorEvent.error.get.isInstanceOf[SparkException])`
`287`		`- assert(errorEvent.error.get.getMessage ===`
`288`		`- "Foreach writer has been aborted due to a task failure")`
	`286`	`+ checkError(`
	`287`	`+ exception = errorEvent.error.get.asInstanceOf[SparkException],`
	`288`	`+ errorClass = "_LEGACY_ERROR_TEMP_2256",`
	`289`	`+ parameters = Map.empty`
	`290`	`+ )`
`289`	`291`	`}`
`290`	`292`	`}`
`291`	`293`	`}`
Original file line number	Diff line number	Diff line change
`@@ -641,7 +641,7 @@ class CliSuite extends SparkFunSuite {`
`641`	`641`
`642`	`642`	`test("SPARK-37694: delete [jar\|file\|archive] shall use spark sql processor") {`
`643`	`643`	`runCliWithin(2.minute, errorResponses = Seq("ParseException"))(`
`644`		`- "delete jar dummy.jar;" -> "Syntax error at or near 'jar': missing 'FROM'(line 1, pos 7)")`
	`644`	`+ "delete jar dummy.jar;" -> "Syntax error at or near 'jar': missing 'FROM'.(line 1, pos 7)")`
`645`	`645`	`}`
`646`	`646`
`647`	`647`	`test("SPARK-37906: Spark SQL CLI should not pass final comment") {`