Skip to content

[SPARK-51421][SQL] Get seconds of TIME datatype #50525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ object FunctionRegistry {
expression[NextDay]("next_day"),
expression[Now]("now"),
expression[Quarter]("quarter"),
expression[Second]("second"),
expressionBuilder("second", SecondExpressionBuilder),
expression[ParseToTimestamp]("to_timestamp"),
expression[ParseToDate]("to_date"),
expression[ToTime]("to_time"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
import org.apache.spark.sql.catalyst.util.TimeFormatter
import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
import org.apache.spark.sql.internal.types.StringTypeWithCollation
import org.apache.spark.sql.types.{AbstractDataType, IntegerType, ObjectType, TimeType}
import org.apache.spark.sql.types.{AbstractDataType, IntegerType, ObjectType, TimeType, TypeCollection}
import org.apache.spark.unsafe.types.UTF8String

/**
Expand Down Expand Up @@ -290,3 +290,62 @@ object HourExpressionBuilder extends ExpressionBuilder {
}
}
}

case class SecondsOfTime(child: Expression)
extends RuntimeReplaceable
with ExpectsInputTypes {

override def replacement: Expression = StaticInvoke(
classOf[DateTimeUtils.type],
IntegerType,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't it be DecimalType with a precision and scale that matches the precision of TimeType?

Copy link
Contributor Author

@senthh senthh Apr 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vinodkc In jira, @MaxGekk has given just IntegerType seconds for an example. So I thought the requirement is just to handle IntegerType. I can modify the implementation so that it should handle both with precision and without.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @vinodkc ,

MaxxGek has responded in jira that we need to return second without fraction. So as per Maxx requirement this PR will work without Fraction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And also test failed with "java.lang.OutOfMemoryError: Java heap space" is not relevant to our changes

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@HyukjinKwon @vinodkc and @MaxGekk It will be helpful if you re-review this PR and provide your input

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, the type should be IntergerType

"getSecondsOfTime",
Seq(child),
Seq(child.dataType)
)

override def inputTypes: Seq[AbstractDataType] =
Seq(TypeCollection(TimeType.MIN_PRECISION to TimeType.MAX_PRECISION map TimeType: _*))

override def children: Seq[Expression] = Seq(child)

override def prettyName: String = "second"

override protected def withNewChildrenInternal(
newChildren: IndexedSeq[Expression]): Expression = {
copy(child = newChildren.head)
}
}

@ExpressionDescription(
usage = """
_FUNC_(expr) - Returns the second component of the given expression.

If `expr` is a TIMESTAMP or a string that can be cast to timestamp,
it returns the second of that timestamp.
If `expr` is a TIME type (since 4.1.0), it returns the second of the time-of-day.
""",
examples = """
Examples:
> SELECT _FUNC_('2018-02-14 12:58:59');
59
> SELECT _FUNC_(TIME'13:25:59.999999');
59
""",
since = "1.5.0",
group = "datetime_funcs")
object SecondExpressionBuilder extends ExpressionBuilder {
override def build(name: String, expressions: Seq[Expression]): Expression = {
if (expressions.isEmpty) {
throw QueryCompilationErrors.wrongNumArgsError(name, Seq("> 0"), expressions.length)
} else {
val child = expressions.head
child.dataType match {
case _: TimeType =>
SecondsOfTime(child)
case _ =>
Second(child)
}
}
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ object DateTimeUtils extends SparkDateTimeUtils {
getLocalDateTime(micros, zoneId).getSecond
}

/**
* Returns the second value of a given TIME (TimeType) value.
*/
def getSecondsOfTime(micros: Long): Int = {
microsToLocalTime(micros).getSecond
}
/**
* Returns the seconds part and its fractional part with microseconds.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,4 +168,62 @@ class TimeExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkConsistencyBetweenInterpretedAndCodegen(
(child: Expression) => MinutesOfTime(child).replacement, TimeType())
}

test("SecondExpressionBuilder") {
// Empty expressions list
checkError(
exception = intercept[AnalysisException] {
SecondExpressionBuilder.build("second", Seq.empty)
},
condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
parameters = Map(
"functionName" -> "`second`",
"expectedNum" -> "> 0",
"actualNum" -> "0",
"docroot" -> SPARK_DOC_ROOT)
)

// test TIME-typed child should build SecondsOfTime
val timeExpr = Literal(localTime(12, 58, 59), TimeType())
val builtExprForTime = SecondExpressionBuilder.build("second", Seq(timeExpr))
assert(builtExprForTime.isInstanceOf[SecondsOfTime])
assert(builtExprForTime.asInstanceOf[SecondsOfTime].child eq timeExpr)

// test non TIME-typed child should build second
val tsExpr = Literal("2007-09-03 10:45:23")
val builtExprForTs = SecondExpressionBuilder.build("second", Seq(tsExpr))
assert(builtExprForTs.isInstanceOf[Second])
assert(builtExprForTs.asInstanceOf[Second].child eq tsExpr)
}

test("Second with TIME type") {
// A few test times in microseconds since midnight:
// time in microseconds -> expected second
val testTimes = Seq(
Copy link
Contributor

@vinodkc vinodkc Apr 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add these tests to test the output based on precision?

val time = "13:10:15.987654321"
Seq(
      0 -> 15.toDouble,
      1 -> 15.9,
      2 -> 15.98,
      3 -> 15.987,
      4 -> 15.9876,
      5 -> 15.98765,
      6 -> 15.987654).foreach { case (precision, expected) =>
      checkEvaluation(
        SecondsOfTime(Literal.create(time, TimeType(precision))),
        BigDecimal(expected))
    }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vinodkc Sure Vinod. I will create a function for SecondsOfTimeWithFraction and also include tests for the same

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@senthh , I updated the above comment; please check

localTime() -> 0,
localTime(1) -> 0,
localTime(0, 59) -> 0,
localTime(14, 30) -> 0,
localTime(12, 58, 59) -> 59,
localTime(23, 0, 1) -> 1,
localTime(23, 59, 59, 999999) -> 59
)

// Create a literal with TimeType() for each test microsecond value
// evaluate SecondsOfTime(...), and check that the result matches the expected second.
testTimes.foreach { case (micros, expectedSecond) =>
checkEvaluation(
SecondsOfTime(Literal(micros, TimeType())),
expectedSecond)
}

// Verify NULL handling
checkEvaluation(
SecondsOfTime(Literal.create(null, TimeType(TimeType.MICROS_PRECISION))),
null
)

checkConsistencyBetweenInterpretedAndCodegen(
(child: Expression) => SecondsOfTime(child).replacement, TimeType())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@
| org.apache.spark.sql.catalyst.expressions.SchemaOfJson | schema_of_json | SELECT schema_of_json('[{"col":0}]') | struct<schema_of_json([{"col":0}]):string> |
| org.apache.spark.sql.catalyst.expressions.SchemaOfXml | schema_of_xml | SELECT schema_of_xml('<p><a>1</a></p>') | struct<schema_of_xml(<p><a>1</a></p>):string> |
| org.apache.spark.sql.catalyst.expressions.Sec | sec | SELECT sec(0) | struct<SEC(0):double> |
| org.apache.spark.sql.catalyst.expressions.Second | second | SELECT second('2009-07-30 12:58:59') | struct<second(2009-07-30 12:58:59):int> |
| org.apache.spark.sql.catalyst.expressions.SecondExpressionBuilder | second | SELECT second('2018-02-14 12:58:59') | struct<second(2018-02-14 12:58:59):int> |
| org.apache.spark.sql.catalyst.expressions.SecondsToTimestamp | timestamp_seconds | SELECT timestamp_seconds(1230219000) | struct<timestamp_seconds(1230219000):timestamp> |
| org.apache.spark.sql.catalyst.expressions.Sentences | sentences | SELECT sentences('Hi there! Good morning.') | struct<sentences(Hi there! Good morning., , ):array<array<string>>> |
| org.apache.spark.sql.catalyst.expressions.Sequence | sequence | SELECT sequence(1, 5) | struct<sequence(1, 5):array<int>> |
Expand Down