Skip to content

Commit e7872b3

Browse files
authored
Optimize integer to string conversions (#36470)
* Optimize integer-->string conversions This avoids invalidations caused by invalidating `StringVector(::Integer)`. This also makes `bin()`, `dec`() and `hex()` slightly faster, but does not change the Printf.
1 parent a4bfb9c commit e7872b3

File tree

2 files changed

+76
-35
lines changed

2 files changed

+76
-35
lines changed

base/intfuncs.jl

Lines changed: 73 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -615,75 +615,112 @@ ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, ba
615615

616616
## integer to string functions ##
617617

618-
function bin(x::Unsigned, pad::Integer, neg::Bool)
619-
i = neg + max(pad,sizeof(x)<<3-leading_zeros(x))
620-
a = StringVector(i)
618+
function bin(x::Unsigned, pad::Int, neg::Bool)
619+
m = 8 * sizeof(x) - leading_zeros(x)
620+
n = neg + max(pad, m)
621+
a = StringVector(n)
622+
# for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes
623+
# @inbounds a[n - i] = 0x30 + (((x >> i) % UInt8)::UInt8 & 0x1)
624+
# end
625+
i = n
626+
@inbounds while i >= 4
627+
b = UInt32((x % UInt8)::UInt8)
628+
d = 0x30303030 + ((b * 0x08040201) >> 0x3) & 0x01010101
629+
a[i-3] = (d >> 0x00) % UInt8
630+
a[i-2] = (d >> 0x08) % UInt8
631+
a[i-1] = (d >> 0x10) % UInt8
632+
a[i] = (d >> 0x18) % UInt8
633+
x >>= 0x4
634+
i -= 4
635+
end
621636
while i > neg
622-
@inbounds a[i] = 48+(x&0x1)
623-
x >>= 1
637+
@inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x1)
638+
x >>= 0x1
624639
i -= 1
625640
end
626641
if neg; @inbounds a[1]=0x2d; end
627642
String(a)
628643
end
629644

630-
function oct(x::Unsigned, pad::Integer, neg::Bool)
631-
i = neg + max(pad,div((sizeof(x)<<3)-leading_zeros(x)+2,3))
632-
a = StringVector(i)
645+
function oct(x::Unsigned, pad::Int, neg::Bool)
646+
m = div(8 * sizeof(x) - leading_zeros(x) + 2, 3)
647+
n = neg + max(pad, m)
648+
a = StringVector(n)
649+
i = n
633650
while i > neg
634-
@inbounds a[i] = 48+(x&0x7)
635-
x >>= 3
651+
@inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x7)
652+
x >>= 0x3
636653
i -= 1
637654
end
638655
if neg; @inbounds a[1]=0x2d; end
639656
String(a)
640657
end
641658

642-
function dec(x::Unsigned, pad::Integer, neg::Bool)
643-
i = neg + ndigits(x, base=10, pad=pad)
644-
a = StringVector(i)
645-
while i > neg
646-
@inbounds a[i] = 48+rem(x,10)
647-
x = oftype(x,div(x,10))
648-
i -= 1
659+
# 2-digit decimal characters ("00":"99")
660+
const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
661+
662+
function dec(x::Unsigned, pad::Int, neg::Bool)
663+
n = neg + ndigits(x, pad=pad)
664+
a = StringVector(n)
665+
i = n
666+
@inbounds while i >= 2
667+
d, r = divrem(x, 0x64)
668+
d100 = _dec_d100[(r % Int)::Int + 1]
669+
a[i-1] = d100 % UInt8
670+
a[i] = (d100 >> 0x8) % UInt8
671+
x = oftype(x, d)
672+
i -= 2
673+
end
674+
if i > neg
675+
@inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
649676
end
650677
if neg; @inbounds a[1]=0x2d; end
651678
String(a)
652679
end
653680

654-
function hex(x::Unsigned, pad::Integer, neg::Bool)
655-
i = neg + max(pad,(sizeof(x)<<1)-(leading_zeros(x)>>2))
656-
a = StringVector(i)
657-
while i > neg
658-
d = x & 0xf
659-
@inbounds a[i] = 48+d+39*(d>9)
660-
x >>= 4
661-
i -= 1
681+
function hex(x::Unsigned, pad::Int, neg::Bool)
682+
m = 2 * sizeof(x) - (leading_zeros(x) >> 2)
683+
n = neg + max(pad, m)
684+
a = StringVector(n)
685+
i = n
686+
while i >= 2
687+
b = (x % UInt8)::UInt8
688+
d1, d2 = b >> 0x4, b & 0xf
689+
@inbounds a[i-1] = d1 + ifelse(d1 > 0x9, 0x57, 0x30)
690+
@inbounds a[i] = d2 + ifelse(d2 > 0x9, 0x57, 0x30)
691+
x >>= 0x8
692+
i -= 2
693+
end
694+
if i > neg
695+
d = (x % UInt8)::UInt8 & 0xf
696+
@inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
662697
end
663698
if neg; @inbounds a[1]=0x2d; end
664699
String(a)
665700
end
666701

667-
const base36digits = ['0':'9';'a':'z']
668-
const base62digits = ['0':'9';'A':'Z';'a':'z']
702+
const base36digits = UInt8['0':'9';'a':'z']
703+
const base62digits = UInt8['0':'9';'A':'Z';'a':'z']
669704

670-
function _base(b::Integer, x::Integer, pad::Integer, neg::Bool)
671-
(x >= 0) | (b < 0) || throw(DomainError(x, "For negative `x`, `b` must be negative."))
672-
2 <= abs(b) <= 62 || throw(DomainError(b, "base must satisfy 2 ≤ abs(base) ≤ 62"))
705+
function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
706+
(x >= 0) | (base < 0) || throw(DomainError(x, "For negative `x`, `base` must be negative."))
707+
2 <= abs(base) <= 62 || throw(DomainError(base, "base must satisfy 2 ≤ abs(base) ≤ 62"))
708+
b = (base % Int)::Int
673709
digits = abs(b) <= 36 ? base36digits : base62digits
674-
i = neg + ndigits(x, base=b, pad=pad)
675-
a = StringVector(i)
710+
n = neg + ndigits(x, base=b, pad=pad)
711+
a = StringVector(n)
712+
i = n
676713
@inbounds while i > neg
677714
if b > 0
678-
a[i] = digits[1+rem(x,b)]
715+
a[i] = digits[1 + (rem(x, b) % Int)::Int]
679716
x = div(x,b)
680717
else
681-
a[i] = digits[1+mod(x,-b)]
718+
a[i] = digits[1 + (mod(x, -b) % Int)::Int]
682719
x = cld(x,b)
683720
end
684721
i -= 1
685722
end
686-
if neg; a[1]='-'; end
723+
if neg; @inbounds a[1]=0x2d; end
687724
String(a)
688725
end
689726

@@ -705,6 +742,7 @@ julia> string(13, base = 5, pad = 4)
705742
```
706743
"""
707744
function string(n::Integer; base::Integer = 10, pad::Integer = 1)
745+
pad = (min(max(pad, typemin(Int)), typemax(Int)) % Int)::Int
708746
if base == 2
709747
(n_positive, neg) = split_sign(n)
710748
bin(n_positive, pad, neg)

test/intfuncs.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,7 @@ end
304304
@test string(3, base = 2) == "11"
305305
@test string(3, pad = 2, base = 2) == "11"
306306
@test string(3, pad = Int32(2), base = Int32(2)) == "11"
307+
@test string(3, pad = typemin(Int128) + 3, base = 0x2) == "11"
307308
@test string(3, pad = 3, base = 2) == "011"
308309
@test string(-3, base = 2) == "-11"
309310
@test string(-3, pad = 3, base = 2) == "-011"
@@ -338,6 +339,8 @@ end
338339
@test digits(-3, base = 2) == -[1, 1]
339340
@test digits(-42, base = 4) == -[2, 2, 2]
340341

342+
@test_throws DomainError string(5, base = typemin(Int128) + 10)
343+
341344
@testset "digits/base with bases powers of 2" begin
342345
@test digits(4, base = 2) == [0, 0, 1]
343346
@test digits(5, base = Int32(2), pad=Int32(3)) == [1, 0, 1]

0 commit comments

Comments
 (0)