Skip to content

Commit 69ff43a

Browse files
committed
Improve performance of convert to UTF16Str and UTF32Str from Vector of 32-bit values
1 parent 53ee23c commit 69ff43a

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

src/utf16.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ end
465465
#convert(::Type{T}, str::MaybeSub{T}) where {T<:Str{<:Union{UCS2_CSEs, UTF32_CSEs}}} = str
466466
convert(::Type{<:Str{UTF16CSE}}, str::MaybeSub{<:Str{<:UCS2_CSEs}}) = Str(UTF16CSE, str.data)
467467

468-
function convert(::Type{<:Str{UTF16CSE}}, dat::AbstractArray{UInt16})
468+
function convert(::Type{<:Str{UTF16CSE}}, dat::AbstractArray{C}) where {C<:Union{UInt16,UInt32}}
469469
is_empty(dat) && return empty_utf16
470470
len, flags, num4byte = unsafe_check_string(dat, 1, lastindex(dat))
471471
# Optimize case where no surrogate characters

src/utf32.jl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,20 @@ end
216216

217217
function convert(::Type{<:Str{UTF32CSE}}, dat::Vector{<:Union{UInt32,Int32,Text4Chr}})
218218
is_valid(UTF32Str, dat) || strerror(StrErrors.INVALID)
219-
@preserve dat Str(UTF32CSE, _copysub(pointer(dat), length(dat)))
219+
Str(UTF32CSE, _copysub(dat))
220+
end
221+
222+
function is_valid(::Type{<:Str{UTF32CSE}}, dat::Vector{<:Union{UInt32,Int32,Text4Chr}})
223+
@preserve dat begin
224+
pnt = pointer(dat)
225+
fin = pnt + sizeof(dat)
226+
while pnt < fin
227+
ch = get_codeunit(pnt)
228+
(!is_surrogate_codeunit(ch) && ch <= 0x10ffff) || return false
229+
pnt += 4
230+
end
231+
end
232+
true
220233
end
221234

222235
# Not sure this is valid anymore, want to avoid type piracy

0 commit comments

Comments
 (0)