Skip to content

Commit fc90ae5

Browse files
committed
[SPARK-52524] Support Timestamp type
### What changes were proposed in this pull request? This PR aims to support `Timestamp` type in `Row` and `DataFrame.collect`. ### Why are the changes needed? Previously, `Timestamp` is supported inside `Spark Connect Server`-side operation only, e.g. `DataFrame.show`. ### Does this PR introduce _any_ user-facing change? No, this is an additional type. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #202 from dongjoon-hyun/SPARK-52524. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 9da02c8 commit fc90ae5

File tree

3 files changed

+31
-0
lines changed

3 files changed

+31
-0
lines changed

Sources/SparkConnect/DataFrame.swift

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,22 @@ public actor DataFrame: Sendable {
429429
values.append(array.asAny(i) as? Decimal)
430430
case .primitiveInfo(.date32):
431431
values.append(array.asAny(i) as! Date)
432+
case .timeInfo(.timestamp):
433+
let timestampType = column.data.type as! ArrowTypeTimestamp
434+
assert(timestampType.timezone == "Etc/UTC")
435+
let timestamp = array.asAny(i) as! Int64
436+
let timeInterval =
437+
switch timestampType.unit {
438+
case .seconds:
439+
TimeInterval(timestamp)
440+
case .milliseconds:
441+
TimeInterval(timestamp) / 1_000
442+
case .microseconds:
443+
TimeInterval(timestamp) / 1_000_000
444+
case .nanoseconds:
445+
TimeInterval(timestamp) / 1_000_000_000
446+
}
447+
values.append(Date(timeIntervalSince1970: timeInterval))
432448
case ArrowType.ArrowBinary:
433449
values.append((array as! AsString).asString(i).utf8)
434450
case .complexInfo(.strct):

Sources/SparkConnect/Row.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ public struct Row: Sendable, Equatable {
7171
return a == b
7272
} else if let a = x as? Decimal, let b = y as? Decimal {
7373
return a == b
74+
} else if let a = x as? Date, let b = y as? Date {
75+
return a == b
7476
} else if let a = x as? String, let b = y as? String {
7577
return a == b
7678
} else {

Tests/SparkConnectTests/DataFrameTests.swift

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -932,6 +932,19 @@ struct DataFrameTests {
932932
#expect(try await df.collect() == expected)
933933
await spark.stop()
934934
}
935+
936+
@Test
937+
func timestamp() async throws {
938+
let spark = try await SparkSession.builder.getOrCreate()
939+
let df = try await spark.sql(
940+
"SELECT TIMESTAMP '2025-05-01 16:23:40', TIMESTAMP '2025-05-01 16:23:40.123456'")
941+
let expected = [
942+
Row(
943+
Date(timeIntervalSince1970: 1746116620.0), Date(timeIntervalSince1970: 1746116620.123456))
944+
]
945+
#expect(try await df.collect() == expected)
946+
await spark.stop()
947+
}
935948
#endif
936949

937950
@Test

0 commit comments

Comments
 (0)