Skip to content

Commit 742a266

Browse files
authored
Add adjust_tz argument to writer functions (#714)
1 parent cb6bae5 commit 742a266

10 files changed

+162
-15
lines changed

NEWS.md

+16-1
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,23 @@
77
* Ignore invalid SAV timestamp strings (#683).
88
* Fix compiler warnings (#707).
99

10+
* `write_*()` functions gain a new `adjust_tz` argument to allow more control
11+
over time zone conversion for date-time variables (#702). Thanks to @jmobrien
12+
for the detailed issue and feedback.
13+
14+
Stata, SPSS and SAS do not have a concept of time zone. Since haven 2.4.0
15+
date-time values in non-UTC time zones are implicitly converted when writing
16+
to ensure the time displayed in Stata/SPSS/SAS will match the time displayed
17+
to the user in R (see #555). This is the behaviour when `adjust_tz = TRUE`
18+
(the default). Although this is in line with general user expectations it can
19+
cause issues when the time zone is important, for e.g. when looking at
20+
differences between time points, since the underlying numeric data is changed
21+
to preserve the displayed time. Use `adjust_tz = FALSE` to write the time as
22+
the corresponding UTC value, which will appear different to the user but
23+
preserves the underlying numeric data.
24+
1025
* Fixed issue in `write_*()` functions where invisible return of input data
11-
frame included unintended alteration of date/time variables. (@jmobrien, #702)
26+
frame included unintended alteration of date time variables. (@jmobrien, #702)
1227

1328
* The experimental `write_sas()` function has been deprecated (#224). The
1429
sas7bdat file format is complex and undocumented, and as such writing SAS

R/haven-sas.R

+20-5
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ read_sas <- function(data_file, catalog_file = NULL,
8181
#' @export
8282
write_sas <- function(data, path) {
8383
lifecycle::deprecate_warn("2.6.0", "write_sas()", "write_xpt()")
84-
84+
8585
validate_sas(data)
8686
data_out <- adjust_tz(data)
8787
write_sas_(data_out, normalizePath(path, mustWork = FALSE))
88-
88+
8989
invisible(data)
9090
}
9191

@@ -134,7 +134,19 @@ read_xpt <- function(file, col_select = NULL, skip = 0, n_max = Inf, .name_repai
134134
#'
135135
#' Note that although SAS itself supports dataset labels up to 256 characters
136136
#' long, dataset labels in SAS transport files must be <= 40 characters.
137-
write_xpt <- function(data, path, version = 8, name = NULL, label = attr(data, "label")) {
137+
#' @param adjust_tz Stata, SPSS and SAS do not have a concept of time zone,
138+
#' and all [date-time] variables are treated as UTC. `adjust_tz` controls
139+
#' how the timezone of date-time values is treated when writing.
140+
#'
141+
#' * If `TRUE` (the default) the timezone of date-time values is ignored, and
142+
#' they will display the same in R and Stata/SPSS/SAS, e.g.
143+
#' `"2010-01-01 09:00:00 NZDT"` will be written as `"2010-01-01 09:00:00"`.
144+
#' Note that this changes the underlying numeric data, so use caution if
145+
#' preserving between-time-point differences is critical.
146+
#' * If `FALSE`, date-time values are written as the corresponding UTC value,
147+
#' e.g. `"2010-01-01 09:00:00 NZDT"` will be written as
148+
#' `"2009-12-31 20:00:00"`.
149+
write_xpt <- function(data, path, version = 8, name = NULL, label = attr(data, "label"), adjust_tz = TRUE) {
138150
if (!version %in% c(5, 8)) {
139151
cli_abort("SAS transport file version {.val {version}} is not currently supported.")
140152
}
@@ -145,8 +157,11 @@ write_xpt <- function(data, path, version = 8, name = NULL, label = attr(data, "
145157
name <- validate_xpt_name(name, version)
146158
label <- validate_xpt_label(label)
147159

148-
validate_sas(data)
149-
data_out <- adjust_tz(data)
160+
data_out <- validate_sas(data)
161+
162+
if (isTRUE(adjust_tz)) {
163+
data_out <- adjust_tz(data_out)
164+
}
150165

151166
write_xpt_(
152167
data_out,

R/haven-spss.R

+19-3
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,19 @@ read_por <- function(file, user_na = FALSE, col_select = NULL, skip = 0, n_max =
7676
#'
7777
#' `TRUE` and `FALSE` can be used for backwards compatibility, and correspond
7878
#' to the "zsav" and "none" options respectively.
79-
write_sav <- function(data, path, compress = c("byte", "none", "zsav")) {
79+
#' @param adjust_tz Stata, SPSS and SAS do not have a concept of time zone,
80+
#' and all [date-time] variables are treated as UTC. `adjust_tz` controls
81+
#' how the timezone of date-time values is treated when writing.
82+
#'
83+
#' * If `TRUE` (the default) the timezone of date-time values is ignored, and
84+
#' they will display the same in R and Stata/SPSS/SAS, e.g.
85+
#' `"2010-01-01 09:00:00 NZDT"` will be written as `"2010-01-01 09:00:00"`.
86+
#' Note that this changes the underlying numeric data, so use caution if
87+
#' preserving between-time-point differences is critical.
88+
#' * If `FALSE`, date-time values are written as the corresponding UTC value,
89+
#' e.g. `"2010-01-01 09:00:00 NZDT"` will be written as
90+
#' `"2009-12-31 20:00:00"`.
91+
write_sav <- function(data, path, compress = c("byte", "none", "zsav"), adjust_tz = TRUE) {
8092
if (isTRUE(compress)) {
8193
compress <- "zsav"
8294
} else if (isFALSE(compress)) {
@@ -85,8 +97,12 @@ write_sav <- function(data, path, compress = c("byte", "none", "zsav")) {
8597
compress <- arg_match(compress)
8698
}
8799

88-
validate_sav(data)
89-
data_out <- adjust_tz(data)
100+
data_out <- validate_sav(data)
101+
102+
if (isTRUE(adjust_tz)) {
103+
data_out <- adjust_tz(data_out)
104+
}
105+
90106
write_sav_(data_out, normalizePath(path, mustWork = FALSE), compress = compress)
91107
invisible(data)
92108
}

R/haven-stata.R

+18-3
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,26 @@ read_stata <- read_dta
7575
#' 2045, the maximum length of str# variables. See the Stata [long
7676
#' string](https://www.stata.com/features/overview/long-strings/)
7777
#' documentation for more details.
78-
write_dta <- function(data, path, version = 14, label = attr(data, "label"), strl_threshold = 2045) {
79-
validate_dta(data, version = version)
78+
#' @param adjust_tz Stata, SPSS and SAS do not have a concept of time zone,
79+
#' and all [date-time] variables are treated as UTC. `adjust_tz` controls
80+
#' how the timezone of date-time values is treated when writing.
81+
#'
82+
#' * If `TRUE` (the default) the timezone of date-time values is ignored, and
83+
#' they will display the same in R and Stata/SPSS/SAS, e.g.
84+
#' `"2010-01-01 09:00:00 NZDT"` will be written as `"2010-01-01 09:00:00"`.
85+
#' Note that this changes the underlying numeric data, so use caution if
86+
#' preserving between-time-point differences is critical.
87+
#' * If `FALSE`, date-time values are written as the corresponding UTC value,
88+
#' e.g. `"2010-01-01 09:00:00 NZDT"` will be written as
89+
#' `"2009-12-31 20:00:00"`.
90+
write_dta <- function(data, path, version = 14, label = attr(data, "label"), strl_threshold = 2045, adjust_tz = TRUE) {
91+
data_out <- validate_dta(data, version = version)
8092
validate_dta_label(label)
8193

82-
data_out <- adjust_tz(data)
94+
if (isTRUE(adjust_tz)) {
95+
data_out <- adjust_tz(data_out)
96+
}
97+
8398
write_dta_(
8499
data_out,
85100
normalizePath(path, mustWork = FALSE),

man/read_dta.Rd

+16-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/read_spss.Rd

+15-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/read_xpt.Rd

+22-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-haven-sas.R

+22
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,28 @@ test_that("can roundtrip missing values (as much as possible)", {
166166
expect_equal(roundtrip_var(NA_character_, "xpt"), "")
167167
})
168168

169+
test_that("can roundtrip date times", {
170+
x1 <- c(as.Date("2010-01-01"), NA)
171+
expect_equal(roundtrip_var(x1, "xpt"), x1)
172+
173+
# converted to same time in UTC
174+
x2 <- as.POSIXct("2010-01-01 09:00", tz = "Pacific/Auckland")
175+
expect_equal(
176+
roundtrip_var(x2, "xpt"),
177+
as.POSIXct("2010-01-01 09:00", tz = "UTC")
178+
)
179+
180+
x2_utc <- x2
181+
attr(x2_utc, "tzone") <- "UTC"
182+
expect_equal(
183+
roundtrip_var(x2, "xpt", adjust_tz = FALSE),
184+
x2_utc
185+
)
186+
187+
attr(x2, "label") <- "abc"
188+
expect_equal(attr(roundtrip_var(x2, "xpt"), "label"), "abc")
189+
})
190+
169191
test_that("invalid files generate informative errors", {
170192
expect_snapshot(error = TRUE, {
171193
write_xpt(mtcars, file.path(tempdir(), " temp.xpt"))

tests/testthat/test-haven-spss.R

+7
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,13 @@ test_that("can roundtrip date times", {
175175
as.POSIXct("2010-01-01 09:00", tz = "UTC")
176176
)
177177

178+
x2_utc <- x2
179+
attr(x2_utc, "tzone") <- "UTC"
180+
expect_equal(
181+
roundtrip_var(x2, "sav", adjust_tz = FALSE),
182+
x2_utc
183+
)
184+
178185
attr(x2, "label") <- "abc"
179186
expect_equal(attr(roundtrip_var(x2, "sav"), "label"), "abc")
180187
})

tests/testthat/test-haven-stata.R

+7
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,13 @@ test_that("can roundtrip date times", {
115115
as.POSIXct("2010-01-01 09:00", tz = "UTC")
116116
)
117117

118+
x2_utc <- x2
119+
attr(x2_utc, "tzone") <- "UTC"
120+
expect_equal(
121+
roundtrip_var(x2, "sav", adjust_tz = FALSE),
122+
x2_utc
123+
)
124+
118125
attr(x2, "label") <- "abc"
119126
expect_equal(attr(roundtrip_var(x2, "dta"), "label"), "abc")
120127
})

0 commit comments

Comments
 (0)