Skip to content

Commit c3ebc9e

Browse files
committed
Dates: remove anti-pattern of using Tuple{Union{Nothing, Something}, Int}
While the new optimizer should handle this better, the optimal efficient way to handle this is to make the missingness entirely disjoin: `Union{Nothing, Tuple{Something, Int}}` And make each `val` variable SSA, for better inference
1 parent 87c0902 commit c3ebc9e

File tree

3 files changed

+102
-88
lines changed

3 files changed

+102
-88
lines changed

stdlib/Dates/src/io.jl

Lines changed: 25 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,10 @@ abstract type AbstractDateToken end
1717
`locale`. If a `tryparsenext` method does not need a locale, it can leave
1818
the argument out in the method definition.
1919
20-
Return a tuple of 2 elements `(res, idx)`, where:
20+
If parsing succeeds, returns a tuple of 2 elements `(res, idx)`, where:
2121
22-
* `res` is either the result of the parsing, or `nothing` if parsing failed.
23-
* `idx` is an `Int` - if parsing failed, the index at which it failed; if
24-
parsing succeeded, `idx` is the index _after_ the index at which parsing ended.
22+
* `res` is the result of the parsing.
23+
* `idx::Int`, is the index _after_ the index at which parsing ended.
2524
"""
2625
function tryparsenext end
2726

@@ -39,7 +38,7 @@ function format end
3938

4039
# fallback to tryparsenext/format methods that don't care about locale
4140
@inline function tryparsenext(d::AbstractDateToken, str, i, len, locale)
42-
tryparsenext(d, str, i, len)
41+
return tryparsenext(d, str, i, len)
4342
end
4443

4544
function Base.string(t::Time)
@@ -91,38 +90,35 @@ end
9190
for c in "yYmdHMS"
9291
@eval begin
9392
@inline function tryparsenext(d::DatePart{$c}, str, i, len)
94-
tryparsenext_base10(str, i, len, min_width(d), max_width(d))
93+
return tryparsenext_base10(str, i, len, min_width(d), max_width(d))
9594
end
9695
end
9796
end
9897

9998
for (tok, fn) in zip("uUeE", [monthabbr_to_value, monthname_to_value, dayabbr_to_value, dayname_to_value])
10099
@eval @inline function tryparsenext(d::DatePart{$tok}, str, i, len, locale)
101-
word, i = tryparsenext_word(str, i, len, locale, max_width(d))
102-
val = word === nothing ? 0 : $fn(word, locale)
103-
if val == 0
104-
return nothing, i
105-
else
106-
return val, i
107-
end
100+
next = tryparsenext_word(str, i, len, locale, max_width(d))
101+
next === nothing && return nothing
102+
word, i = next
103+
val = $fn(word, locale)
104+
val == 0 && return nothing
105+
return val, i
108106
end
109107
end
110108

111109
# 3-digit (base 10) number following a decimal point. For InexactError below.
112110
struct Decimal3 end
113111

114112
@inline function tryparsenext(d::DatePart{'s'}, str, i, len)
115-
ms, ii = tryparsenext_base10(str, i, len, min_width(d), max_width(d))
116-
if ms !== nothing
117-
val0 = val = ms
118-
len = ii - i
119-
if len > 3
120-
val, r = divrem(val, Int64(10) ^ (len - 3))
121-
r == 0 || throw(InexactError(:convert, Decimal3, val0))
122-
else
123-
val *= Int64(10) ^ (3 - len)
124-
end
125-
ms = val
113+
val = tryparsenext_base10(str, i, len, min_width(d), max_width(d))
114+
val === nothing && return nothing
115+
ms0, ii = val
116+
len = ii - i
117+
if len > 3
118+
ms, r = divrem(ms0, Int64(10) ^ (len - 3))
119+
r == 0 || throw(InexactError(:convert, Decimal3, ms0))
120+
else
121+
ms = ms0 * Int64(10) ^ (3 - len)
126122
end
127123
return ms, ii
128124
end
@@ -186,10 +182,10 @@ Delim(d::T) where {T<:AbstractChar} = Delim{T, 1}(d)
186182
Delim(d::String) = Delim{String, length(d)}(d)
187183

188184
@inline function tryparsenext(d::Delim{<:AbstractChar, N}, str, i::Int, len) where N
189-
for j=1:N
190-
i > len && return (nothing, i)
185+
for j = 1:N
186+
i > len && return nothing
191187
c, i = next(str, i)
192-
c != d.d && return (nothing, i)
188+
c != d.d && return nothing
193189
end
194190
return true, i
195191
end
@@ -199,12 +195,12 @@ end
199195
i2 = start(d.d)
200196
for j = 1:N
201197
if i1 > len
202-
return nothing, i1
198+
return nothing
203199
end
204200
c1, i1 = next(str, i1)
205201
c2, i2 = next(d.d, i2)
206202
if c1 != c2
207-
return nothing, i1
203+
return nothing
208204
end
209205
end
210206
return true, i1

stdlib/Dates/src/parse.jl

Lines changed: 76 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ genvar(t::DataType) = Symbol(lowercase(string(nameof(t))))
2323
2424
Parse the string according to the directives within the `DateFormat`. Parsing will start at
2525
character index `pos` and will stop when all directives are used or we have parsed up to
26-
the end of the string, `len`. When a directive cannot be parsed the returned value tuple
26+
the end of the string, `len`. When a directive cannot be parsed the returned value
2727
will be `nothing` if `raise` is false otherwise an exception will be thrown.
2828
29-
Return a 3-element tuple `(values, pos, num_parsed)`:
30-
* `values::Union{Tuple, Nothing}`: Either `nothing`, or a tuple which contains a value
29+
If successful, return a 3-element tuple `(values, pos, num_parsed)`:
30+
* `values::Tuple`: A tuple which contains a value
3131
for each `DatePart` within the `DateFormat` in the order
3232
in which they occur. If the string ends before we finish parsing all the directives
3333
the missing values will be filled in with default values.
@@ -58,29 +58,30 @@ Return a 3-element tuple `(values, pos, num_parsed)`:
5858
for i = 1:length(directives)
5959
if directives[i] <: DatePart
6060
name = value_names[vi]
61-
val = Symbol(:val, name)
6261
vi += 1
6362
push!(parsers, quote
6463
pos > len && @goto done
65-
$val, next_pos = tryparsenext(directives[$i], str, pos, len, locale)
66-
$val === nothing && @goto error
67-
$name = $val
68-
pos = next_pos
64+
local $name
65+
let val = tryparsenext(directives[$i], str, pos, len, locale)
66+
val === nothing && @goto error
67+
$name, pos = val
68+
end
6969
num_parsed += 1
7070
directive_index += 1
7171
end)
7272
else
7373
push!(parsers, quote
7474
pos > len && @goto done
75-
delim, next_pos = tryparsenext(directives[$i], str, pos, len, locale)
76-
delim === nothing && @goto error
77-
pos = next_pos
75+
let val = tryparsenext(directives[$i], str, pos, len, locale)
76+
val === nothing && @goto error
77+
delim, pos = val
78+
end
7879
directive_index += 1
7980
end)
8081
end
8182
end
8283

83-
quote
84+
return quote
8485
directives = df.tokens
8586
locale::DateLocale = df.locale
8687

@@ -104,7 +105,7 @@ Return a 3-element tuple `(values, pos, num_parsed)`:
104105
throw(ArgumentError("Unable to parse date time. Expected directive $d at char $pos"))
105106
end
106107
end
107-
return nothing, pos, 0
108+
return nothing
108109
end
109110
end
110111

@@ -114,11 +115,11 @@ end
114115
Parse the string according to the directives within the `DateFormat`. The specified `TimeType`
115116
type determines the type of and order of tokens returned. If the given `DateFormat` or string
116117
does not provide a required token a default value will be used. When the string cannot be
117-
parsed the returned value tuple will be `nothing` if `raise` is false otherwise an exception will
118+
parsed the returned value will be `nothing` if `raise` is false otherwise an exception will
118119
be thrown.
119120
120-
Return a 2-element tuple `(values, pos)`:
121-
* `values::Union{Tuple, Nothing}`: Either `nothing`, or a tuple which contains a value
121+
If successful, returns a 2-element tuple `(values, pos)`:
122+
* `values::Tuple`: A tuple which contains a value
122123
for each token as specified by the passed in type.
123124
* `pos::Int`: The character index at which parsing stopped.
124125
"""
@@ -146,17 +147,18 @@ Return a 2-element tuple `(values, pos)`:
146147
# Unpacks the value tuple returned by `tryparsenext_core` into separate variables.
147148
value_tuple = Expr(:tuple, value_names...)
148149

149-
quote
150-
values, pos, num_parsed = tryparsenext_core(str, pos, len, df, raise)
151-
values === nothing && return nothing, pos
150+
return quote
151+
val = tryparsenext_core(str, pos, len, df, raise)
152+
val === nothing && return nothing
153+
values, pos, num_parsed = val
152154
$(assign_defaults...)
153155
$value_tuple = values
154156
return $(Expr(:tuple, output_names...)), pos
155157
end
156158
end
157159

158160
@inline function tryparsenext_base10(str::AbstractString, i::Int, len::Int, min_width::Int=1, max_width::Int=0)
159-
i > len && (return nothing, i)
161+
i > len && return nothing
160162
min_pos = min_width <= 0 ? i : i + min_width - 1
161163
max_pos = max_width <= 0 ? len : min(i + max_width - 1, len)
162164
d::Int64 = 0
@@ -170,7 +172,7 @@ end
170172
i = ii
171173
end
172174
if i <= min_pos
173-
return nothing, i
175+
return nothing
174176
else
175177
return d, i
176178
end
@@ -189,7 +191,7 @@ end
189191
i = ii
190192
end
191193
if word_end == 0
192-
return nothing, i
194+
return nothing
193195
else
194196
return SubString(str, word_start, word_end), i
195197
end
@@ -198,62 +200,76 @@ end
198200
function Base.parse(::Type{DateTime}, s::AbstractString, df::typeof(ISODateTimeFormat))
199201
i, end_pos = firstindex(s), lastindex(s)
200202

203+
local dy
201204
dm = dd = Int64(1)
202205
th = tm = ts = tms = Int64(0)
203206

204-
val, i = tryparsenext_base10(s, i, end_pos, 1)
205-
dy = val === nothing ? (@goto error) : val
206-
i > end_pos && @goto error
207+
let val = tryparsenext_base10(s, i, end_pos, 1)
208+
val === nothing && @goto error
209+
dy, i = val
210+
i > end_pos && @goto error
211+
end
207212

208213
c, i = next(s, i)
209214
c != '-' && @goto error
210215
i > end_pos && @goto done
211216

212-
val, i = tryparsenext_base10(s, i, end_pos, 1, 2)
213-
dm = val === nothing ? (@goto error) : val
214-
i > end_pos && @goto done
217+
let val = tryparsenext_base10(s, i, end_pos, 1, 2)
218+
val === nothing && @goto error
219+
dm, i = val
220+
i > end_pos && @goto done
221+
end
215222

216223
c, i = next(s, i)
217224
c != '-' && @goto error
218225
i > end_pos && @goto done
219226

220-
val, i = tryparsenext_base10(s, i, end_pos, 1, 2)
221-
dd = val === nothing ? (@goto error) : val
222-
i > end_pos && @goto done
227+
let val = tryparsenext_base10(s, i, end_pos, 1, 2)
228+
val === nothing && @goto error
229+
dd, i = val
230+
i > end_pos && @goto done
231+
end
223232

224233
c, i = next(s, i)
225234
c != 'T' && @goto error
226235
i > end_pos && @goto done
227236

228-
val, i = tryparsenext_base10(s, i, end_pos, 1, 2)
229-
th = val === nothing ? (@goto error) : val
230-
i > end_pos && @goto done
237+
let val = tryparsenext_base10(s, i, end_pos, 1, 2)
238+
val === nothing && @goto error
239+
th, i = val
240+
i > end_pos && @goto done
241+
end
231242

232243
c, i = next(s, i)
233244
c != ':' && @goto error
234245
i > end_pos && @goto done
235246

236-
val, i = tryparsenext_base10(s, i, end_pos, 1, 2)
237-
tm = val === nothing ? (@goto error) : val
238-
i > end_pos && @goto done
247+
let val = tryparsenext_base10(s, i, end_pos, 1, 2)
248+
val === nothing && @goto error
249+
tm, i = val
250+
i > end_pos && @goto done
251+
end
239252

240253
c, i = next(s, i)
241254
c != ':' && @goto error
242255
i > end_pos && @goto done
243256

244-
val, i = tryparsenext_base10(s, i, end_pos, 1, 2)
245-
ts = val === nothing ? (@goto error) : val
246-
i > end_pos && @goto done
257+
let val = tryparsenext_base10(s, i, end_pos, 1, 2)
258+
val === nothing && @goto error
259+
ts, i = val
260+
i > end_pos && @goto done
261+
end
247262

248263
c, i = next(s, i)
249264
c != '.' && @goto error
250265
i > end_pos && @goto done
251266

252-
val, j = tryparsenext_base10(s, i, end_pos, 1, 3)
253-
tms = val === nothing ? (@goto error) : val
254-
tms *= 10 ^ (3 - (j - i))
255-
256-
j > end_pos || @goto error
267+
let val = tryparsenext_base10(s, i, end_pos, 1, 3)
268+
val === nothing && @goto error
269+
tms, j = val
270+
tms *= 10 ^ (3 - (j - i))
271+
j > end_pos || @goto error
272+
end
257273

258274
@label done
259275
return DateTime(dy, dm, dd, th, tm, ts, tms)
@@ -264,21 +280,22 @@ end
264280

265281
function Base.parse(::Type{T}, str::AbstractString, df::DateFormat=default_format(T)) where T<:TimeType
266282
pos, len = firstindex(str), lastindex(str)
267-
values, pos = tryparsenext_internal(T, str, pos, len, df, true)
268-
T(values...)
283+
val = tryparsenext_internal(T, str, pos, len, df, true)
284+
@assert val !== nothing
285+
values, endpos = val
286+
return T(values...)
269287
end
270288

271289
function Base.tryparse(::Type{T}, str::AbstractString, df::DateFormat=default_format(T)) where T<:TimeType
272290
pos, len = firstindex(str), lastindex(str)
273-
values, pos = tryparsenext_internal(T, str, pos, len, df, false)
274-
if values === nothing
275-
nothing
276-
elseif validargs(T, values...) === nothing
291+
res = tryparsenext_internal(T, str, pos, len, df, false)
292+
res === nothing && return nothing
293+
values, endpos = res
294+
if validargs(T, values...) === nothing
277295
# TODO: validargs gets called twice, since it's called again in the T constructor
278-
T(values...)
279-
else
280-
nothing
296+
return T(values...)
281297
end
298+
return nothing
282299
end
283300

284301
"""
@@ -293,15 +310,16 @@ number of components may be less than the total number of `DatePart`.
293310
letters = character_codes(df)
294311
tokens = Type[CONVERSION_SPECIFIERS[letter] for letter in letters]
295312

296-
quote
313+
return quote
297314
pos, len = firstindex(str), lastindex(str)
298-
values, pos, num_parsed = tryparsenext_core(str, pos, len, df, true)
299-
t = values
315+
val = tryparsenext_core(str, pos, len, df, #=raise=#true)
316+
@assert val !== nothing
317+
values, pos, num_parsed = val
300318
types = $(Expr(:tuple, tokens...))
301319
result = Vector{Any}(undef, num_parsed)
302320
for (i, typ) in enumerate(types)
303321
i > num_parsed && break
304-
result[i] = typ(t[i]) # Constructing types takes most of the time
322+
result[i] = typ(values[i]) # Constructing types takes most of the time
305323
end
306324
return result
307325
end

stdlib/Dates/test/io.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ end
390390
Zulu = String
391391

392392
function Dates.tryparsenext(d::Dates.DatePart{'Z'}, str, i, len)
393-
Dates.tryparsenext_word(str, i, len, Dates.min_width(d), Dates.max_width(d))
393+
return Dates.tryparsenext_word(str, i, len, Dates.min_width(d), Dates.max_width(d))
394394
end
395395

396396
str = "2015-07-24T05:38:19.591Z"

0 commit comments

Comments
 (0)