Skip to content

[SPARK-51773][SQL] Turn file formats into case classes to properly compare them #50562

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import org.apache.spark.sql.sources.{DataSourceRegister, Filter}
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.SerializableConfiguration

private[image] class ImageFileFormat extends FileFormat with DataSourceRegister {
private[image] case class ImageFileFormat() extends FileFormat with DataSourceRegister {

override def inferSchema(
sparkSession: SparkSession,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ private[libsvm] class LibSVMOutputWriter(
}
}

/** @see [[LibSVMDataSource]] for public documentation. */
// see `LibSVMDataSource` for public documentation.
// If this is moved or renamed, please update DataSource's backwardCompatibilityMap.
private[libsvm] class LibSVMFileFormat
private[libsvm] case class LibSVMFileFormat()
extends TextBasedFileFormat
with DataSourceRegister
with Logging {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ import org.apache.spark.util.SerializableConfiguration
* .load("/path/to/fileDir");
* }}}
*/
class BinaryFileFormat extends FileFormat with DataSourceRegister {
case class BinaryFileFormat() extends FileFormat with DataSourceRegister {

import BinaryFileFormat._

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import org.apache.spark.util.SerializableConfiguration
/**
* Provides access to CSV data from pure SQL statements.
*/
class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
case class CSVFileFormat() extends TextBasedFileFormat with DataSourceRegister {

override def shortName(): String = "csv"

Expand Down Expand Up @@ -158,10 +158,6 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {

override def toString: String = "CSV"

override def hashCode(): Int = getClass.hashCode()

override def equals(other: Any): Boolean = other.isInstanceOf[CSVFileFormat]

/**
* Allow reading variant from CSV, but don't allow writing variant into CSV. This is because the
* written data (the string representation of variant) may not be read back as the same variant.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import org.apache.spark.sql.sources._
import org.apache.spark.sql.types._
import org.apache.spark.util.SerializableConfiguration

class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {
case class JsonFileFormat() extends TextBasedFileFormat with DataSourceRegister {
override val shortName: String = "json"

override def isSplitable(
Expand Down Expand Up @@ -128,10 +128,6 @@ class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {

override def toString: String = "JSON"

override def hashCode(): Int = getClass.hashCode()

override def equals(other: Any): Boolean = other.isInstanceOf[JsonFileFormat]

override def supportDataType(dataType: DataType): Boolean = dataType match {
case _: VariantType => true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import org.apache.spark.util.{SerializableConfiguration, Utils}
/**
* A data source for reading text files. The text files must be encoded as UTF-8.
*/
class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
case class TextFileFormat() extends TextBasedFileFormat with DataSourceRegister {

override def shortName(): String = "text"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import org.apache.spark.util.SerializableConfiguration
/**
* Provides access to XML data from pure SQL statements.
*/
class XmlFileFormat extends TextBasedFileFormat with DataSourceRegister {
case class XmlFileFormat() extends TextBasedFileFormat with DataSourceRegister {

override def shortName(): String = "xml"

Expand Down Expand Up @@ -132,10 +132,6 @@ class XmlFileFormat extends TextBasedFileFormat with DataSourceRegister {

override def toString: String = "XML"

override def hashCode(): Int = getClass.hashCode()

override def equals(other: Any): Boolean = other.isInstanceOf[XmlFileFormat]

override def supportDataType(dataType: DataType): Boolean = dataType match {
case _: VariantType => true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ object LastArguments {
}

/** A test [[FileFormat]] that records the arguments passed to buildReader, and returns nothing. */
class TestFileFormat extends TextBasedFileFormat {
case class TestFileFormat() extends TextBasedFileFormat {

override def toString: String = "TestFileFormat"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import org.apache.spark.util.SerializableJobConf
*
* TODO: implement the read logic.
*/
class HiveFileFormat(fileSinkConf: FileSinkDesc)
case class HiveFileFormat(fileSinkConf: FileSinkDesc)
extends FileFormat with DataSourceRegister with Logging {

def this() = this(null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,12 @@ import org.apache.spark.util.SerializableConfiguration
* `FileFormat` for reading ORC files. If this is moved or renamed, please update
* `DataSource`'s backwardCompatibilityMap.
*/
class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable {
case class OrcFileFormat() extends FileFormat with DataSourceRegister with Serializable {

override def shortName(): String = "orc"

override def toString: String = "ORC"

override def hashCode(): Int = getClass.hashCode()

override def equals(other: Any): Boolean = other.isInstanceOf[OrcFileFormat]

override def inferSchema(
sparkSession: SparkSession,
options: Map[String, String],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ import org.apache.spark.util.ArrayImplicits._
import org.apache.spark.util.SerializableConfiguration
import org.apache.spark.util.Utils

class SimpleTextSource extends TextBasedFileFormat with DataSourceRegister {
case class SimpleTextSource() extends TextBasedFileFormat with DataSourceRegister {
override def shortName(): String = "test"

override def inferSchema(
Expand Down