diff --git a/demos/abstract_analyze_versions.jl b/demos/abstract_analyze_versions.jl new file mode 100644 index 0000000..e860553 --- /dev/null +++ b/demos/abstract_analyze_versions.jl @@ -0,0 +1,107 @@ +using PyPlot + +function parseline(line) + m = match(r"(.*) => (.*)", line) + sigstr, count = m.captures[1], parse(Int, m.captures[2]) + sig = try + ex = Meta.parse(sigstr) + eval(ex) + catch + return nothing + end + return sig, count +end + +function parsedata(filename) + lines = readlines(filename) + sigcount = IdDict{Any,Int}() + for line in lines + ret = parseline(line) + ret === nothing && continue + sig, count = ret + sigcount[sig] = count + end + return sigcount +end + +function split_comparable(sigc1, sigc2) + c1, c2, sigs = Int[], Int[], Any[] + for (sig, c) in sigc1 + push!(sigs, sig) + push!(c1, sigc1[sig]) + push!(c2, get(sigc2, sig, 0)) + end + for (sig, c) in sigc2 + if !haskey(sigc1, sig) + push!(sigs, sig) + push!(c1, 0) + push!(c2, c) + end + end + return sigs, c1, c2 +end + +sigc16 = parsedata("/tmp/methdata_$VERSION.log") +sigc14 = parsedata("/tmp/methdata_1.4.3-pre.0.log") + +sigs, c1, c2 = split_comparable(sigc14, sigc16) +mx1, mx2 = maximum(c1), maximum(c2) +isexported(sig) = (ft = Base.unwrap_unionall(sig).parameters[1]; isdefined(Main, ft.name.mt.name)) +colors = [isexported(sig) ? "magenta" : "green" for sig in sigs] + +function on_click(event) + x, y = event.xdata, event.ydata + normsqrdist(pr) = ((pr[1]-x)/mx1)^2 + ((pr[2]-y)/mx2)^2 + idx = argmin(normsqrdist.(zip(c1, c2))) + println(sigs[idx]) +end +begin + fig, ax = plt.subplots() + ax.scatter(c1 .+ 1, c2 .+ 1, c=colors) # + 1 for the log-scaling + ax.set_xlabel("# backedges + 1, 1.4") + ax.set_ylabel("# backedges + 1, 1.6") + ax.set_xscale("log") + ax.set_yscale("log") + fig.canvas.callbacks.connect("button_press_event", on_click) + fig +end + +# Ones we've made progress on: +# ==(::Any, Symbol) +# ==(::Symbol, ::Any) +# ==(::Any, ::Nothing) +# ==(::UUID, ::Any) +# ==(::AbstractString, ::String) +# isequal(::Symbol, ::Any) +# isequal(::Any, ::Symbol) +# isequal(::Any, ::Nothing) +# isequal(::UUID, ::Any) +# cmp(::AbstractString, ::String) +# convert(::Type{Int}, ::Integer) +# convert(::Type{UInt}, ::Integer) +# convert(::Type{Union{Nothing,Module}}, ::Any) +# Base.to_index(::Integer) +# iterate(::Base.OneTo, ::Any) +# repr(::Any) +# thisind(::AbstractString, ::Int) +# getindex(::String, ::Any) +# string(::String, ::Integer, ::String) +# ^(::String, ::Integer) +# repeat(::String, ::Integer) +# Base.isidentifier(::AbstractString) +# +(::Ptr{UInt8}, ::Integer) +# Base._show_default(::Base.GenericIOBuffer{Array{UInt8,1}}, ::Any) + +# Ones that are better but I don't remember helping with +# isconcretetype(::Any) +# pointer(::String, ::Integer) + +# Regressions: +# basename(::AbstractString) +# splitdir(::AbstractString) +# isfile(::Any) +# joinpath(::AbstractString, ::String) +# sizeof(::Unsigned) +# +(::Int, ::Any, ::Any) +# Base.split_sign(::Integer) +# in(::Any, ::Tuple{Symbol}) diff --git a/demos/abstract_gen_data.jl b/demos/abstract_gen_data.jl new file mode 100644 index 0000000..6acd9c9 --- /dev/null +++ b/demos/abstract_gen_data.jl @@ -0,0 +1,129 @@ +using MethodAnalysis + +# Analyze MethodInstance signatures and select those that seem at risk for being invalidated. +function atrisktype(@nospecialize(typ)) + # signatures like `convert(Vector, a)`, `foo(::Vararg{Synbol,N}) where N` do not seem to pose a problem + isa(typ, TypeVar) && return false + # isbits parameters are not a problem + isa(typ, Type) || return false + if isa(typ, UnionAll) + typ = Base.unwrap_unionall(typ) + end + # Exclude signatures with Union{} + typ === Union{} && return false + isa(typ, Union) && return atrisktype(typ.a) | atrisktype(typ.b) + # Type{T}: signatures like `convert(::Type{AbstractString}, ::String)` are not problematic, so mark Type as OK + typ <: Type && return false + if typ <: Tuple && length(typ.parameters) >= 1 + p1 = typ.parameters[1] + # Constructor calls are not themselves a problem (any `convert`s they trigger might be, but those are covered) + isa(p1, Type) && p1 <: Type && return false + # convert(::Type{T}, ::S) where S<:T is not problematic + if p1 === typeof(Base.convert) || p1 === typeof(Core.convert) || p1 === typeof(Core.Compiler.convert) + p2, p3 = typ.parameters[2], typ.parameters[3] + if isa(p2, Type) + p2 = Base.unwrap_unionall(p2) + if isa(p2, DataType) && length(p2.parameters) === 1 + T = p2.parameters[1] + isa(p3, Type) && isa(T, Type) && p3 <: T && return false + end + end + # `getindex`, `length`, etc are OK for various Tuple{T1,T2,...} + elseif (p1 === typeof(Base.getindex) || p1 === typeof(Core.Compiler.getindex)) || + (p1 === typeof(Base.length) || p1 === typeof(Core.Compiler.length)) || + (p1 === typeof(Base.isempty) || p1 === typeof(Core.Compiler.isempty)) || + (p1 === typeof(Base.iterate) || p1 === typeof(Core.iterate) || p1 === typeof(Core.Compiler.iterate)) + p2 = typ.parameters[2] + if isa(p2, Type) + p2 = Base.unwrap_unionall(p2) + p2 <: Tuple && return false + end + # show(io::IO, x) is OK as long as typeof(x) is safe + elseif p1 === typeof(Base.show) + atrisktype(typ.parameters[2]) && return true + length(typ.parameters) == 3 && atrisktype(typ.parameters[3]) && return true + return false + end + end + # Standard DataTypes + isconcretetype(typ) && return false + # ::Function args are excluded + typ === Function && return false + !isempty(typ.parameters) && (any(atrisktype, typ.parameters) || return false) + return true +end + +# A few tests +@assert atrisktype(Tuple{typeof(==),Any,Any}) +@assert atrisktype(Tuple{typeof(==),Symbol,Any}) +@assert atrisktype(Tuple{typeof(==),Any,Symbol}) +@assert !atrisktype(Tuple{typeof(==),Symbol,Symbol}) +@assert !atrisktype(Tuple{typeof(convert),Type{Any},Any}) +@assert !atrisktype(Tuple{typeof(convert),Type{AbstractString},AbstractString}) +@assert !atrisktype(Tuple{typeof(convert),Type{AbstractString},String}) +@assert atrisktype(Tuple{typeof(convert),Type{String},AbstractString}) +@assert !atrisktype(Tuple{typeof(map),Function,Vector{Any}}) +@assert !atrisktype(Tuple{typeof(getindex),Dict{Union{String,Int},Any},Union{String,Int}}) +@assert atrisktype(Tuple{typeof(getindex),Dict{Union{String,Int},Any},Any}) +@assert !atrisktype(Tuple{Type{BoundsError},Any,Any}) +@assert atrisktype(Tuple{typeof(sin),Any}) +@assert !atrisktype(Tuple{typeof(length),Tuple{Any,Any}}) + +function collect_mis(m::Method) + list = Core.MethodInstance[] + visit(m) do item + if isa(item, Core.MethodInstance) + push!(list, item) + return false + end + return true + end + return list +end + +const mis = Dict{Method,Vector{Core.MethodInstance}}() +visit() do item + if item isa Method + m = item + mis[m] = collect_mis(m) + return false + end + return true +end + +# Count # of backedges for MethodInstances with abstract types +const becounter = Dict{Core.MethodInstance,Int}() +visit() do item + if item isa Core.MethodInstance + if atrisktype(item.specTypes) + becounter[item] = length(all_backedges(item)) + end + return false + end + return true +end + +prs = sort!(collect(becounter); by=last) + +# Organize them by method instead + +const mcounter = Dict{Method,Int}() +for (mi, c) in becounter + oc = get(mcounter, mi.def, 0) + mcounter[mi.def] = oc + c +end + +mprs = sort!(collect(mcounter); by=last) + +open("/tmp/methinstdata_$VERSION.log", "w") do io + for (mi, c) in prs + c == 0 && continue + println(io, mi.specTypes=>c) + end +end +open("/tmp/methdata_$VERSION.log", "w") do io + for (m, c) in mprs + c == 0 && continue + println(io, m.sig=>c) + end +end diff --git a/demos/abstract_inference.jl b/demos/abstract_inference.jl new file mode 100644 index 0000000..e2f1c93 --- /dev/null +++ b/demos/abstract_inference.jl @@ -0,0 +1,304 @@ +using MethodAnalysis +using Base: _methods_by_ftype, get_world_counter, to_tuple_type, func_for_method_checked, remove_linenums! +using Core: CodeInfo, SSAValue, SlotNumber, SimpleVector + +if isdefined(Core.Compiler, :NativeInterpreter) + getcode(meth, x, world, optimize; interp=Core.Compiler.NativeInterpreter(world)) = + Core.Compiler.typeinf_code(interp, meth, x[1], x[2], optimize) +else + getcode(meth, x, world, optimize; params=Core.Compiler.Params(world)) = + Core.Compiler.typeinf_code(meth, x[1], x[2], optimize, params) +end + +function infer_with_sig(m::Method; optimize=true, debuginfo=:none, world=get_world_counter(), kwargs...) + tt = to_tuple_type(m.sig) + meths = _methods_by_ftype(tt, -1, world) + for x in meths + x[3] == m || continue + meth = func_for_method_checked(x[3], tt, x[2]) + (code, ty) = getcode(meth, x, world, optimize; kwargs...) + debuginfo === :none && code !== nothing && remove_linenums!(code) + return (code, x[2])=>ty + end + error("no match for ", m) +end + +struct BadCall + callee::GlobalRef + argtyps + rettype +end + +function peeltype(@nospecialize(T)) + isa(T, Core.Compiler.Const) && return Core.Typeof(T.val) + isa(T, Core.Compiler.PartialStruct) && return T.typ + isa(T, Core.Compiler.MaybeUndef) && return T.typ + return T +end + +resolve(g::GlobalRef) = isdefined(g.mod, g.name) ? getfield(g.mod, g.name) : nothing +resolve(T::Type) = T + +""" + `tfunc(argtyps, rettype)` returns `true` if `rettype` is the expected type +""" +function bad_calls(src::CodeInfo, sparams::SimpleVector, @nospecialize(ty), tfuncs::AbstractDict) + function lookup(a; typof::Bool=true) + if isa(a, SSAValue) + return peeltype(src.ssavaluetypes[a.id]) + elseif isa(a, SlotNumber) + return peeltype(src.slottypes[a.id]) + elseif isdefined(Core.Compiler, :Argument) && isa(a, Core.Compiler.Argument) + return peeltype(src.slottypes[a.n]) + elseif isa(a, GlobalRef) && isdefined(a.mod, a.name) + return Core.Typeof(getfield(a.mod, a.name)) + elseif isa(a, Expr) + if a.head === :static_parameter + n = a.args[1] + t = Any + if 1 <= n <= length(sparams) + t = sparams[n] + end + return t + else + error("unrecognized Expr head ", a.head) + end + end + return typof ? Core.Typeof(peeltype(a)) : peeltype(a) + end + + badstmts = Pair{Int,BadCall}[] + for (i, stmt) in enumerate(src.code) + if isa(stmt, Expr) + stmt.head === :call || continue + g = stmt.args[1] + isa(g, GlobalRef) || isa(g, Type) || continue + tfunc = get(tfuncs, resolve(g), nothing) + if tfunc !== nothing + atyps = [] + for j = 2:length(stmt.args) + a = stmt.args[j] + push!(atyps, lookup(a)) + end + sttyp = peeltype(src.ssavaluetypes[i]) + # Check to see if the next line has a typeassert + if i < length(src.code) + nextstmt = src.code[i+1] + if isa(nextstmt, Expr) && nextstmt.head === :call + c = nextstmt.args[1] + if isa(c, GlobalRef) && c.mod === Core && c.name === :typeassert && nextstmt.args[2] == SSAValue(i) + tatyp = lookup(nextstmt.args[3]; typof=false) + sttyp = typeintersect(sttyp, tatyp) + end + end + end + if !tfunc(atyps, sttyp) + push!(badstmts, i => BadCall(g, atyps, sttyp)) + end + end + end + end + return badstmts +end + +function tfunc_promote(atyps, @nospecialize(rettyp)) + # peeltyp(T) = T<:Type ? T.parameters[1] : T + T = atyps[1] + isa(T, TypeVar) && return true + for i = 2:length(atyps) + T = promote_type(T, atyps[i]) + end + return rettyp === T +end + +function tfunc_promote_or_subtype(atyps, @nospecialize(rettyp)) + tfunc_promote(atyps, rettyp) && return true + for a in atyps + rettyp <: a && return true + end + return false +end + +function tfunc_sub1(atyps, @nospecialize(rettyp), @nospecialize(U)) + T = only(atyps) + return T<:U && rettyp<:U +end + +tfunc_returns(atyps, @nospecialize(rettyp), @nospecialize(U)) = rettyp <: U + +function gettyp(T) + if isa(T, TypeVar) + return gettyp(T.ub) + elseif isa(T, UnionAll) + return gettyp(Base.unwrap_unionall(T)) + elseif isa(T, DataType) && T<:Type + return length(T.parameters) == 1 ? gettyp(T.parameters[1]) : Any + else + return T + end +end + +function tfunc_convert(atyps, @nospecialize(rettyp)) + T = gettyp(atyps[1]) + return gettyp(rettyp) <: T +end + +function tfunc_iterate(atyps, @nospecialize(rettyp)) + atyps[1] <: AbstractString && return rettyp <: Union{Nothing,Tuple{AbstractChar,Int}} + if atyps[1] <: AbstractArray + T = eltype(atyps[1]) + return rettyp <: Union{Nothing,Tuple{T,Union{Int,CartesianIndex}}} + end + return rettyp <: Union{Nothing,Tuple{Any,Any}} +end + +function tfunc_getindex(atyps, @nospecialize(rettyp)) + Tel = gettyp(eltype(atyps[1])) + if all(T->(Tt = gettyp(T); isa(Tt,Type) ? Tt<:Integer : false), atyps[2:end]) + return gettyp(rettyp) <: Tel + end + return true # don't try to infer non-scalar indexing +end + +tfuncs = IdDict{Any,Function}( + Base.:(&) => tfunc_promote, + Base.:(|) => tfunc_promote, + Base.:(!) => (a,t)->tfunc_returns(a,t,Union{Bool,Missing}), + Base.:(+) => tfunc_promote_or_subtype, + Base.:(-) => tfunc_promote_or_subtype, + Base.:((==)) => (a,t)->tfunc_returns(a,t,Union{Bool,Missing}), + Base.:((<)) => (a,t)->tfunc_returns(a,t,Union{Bool,Missing}), + Base.:((<=)) => (a,t)->tfunc_returns(a,t,Union{Bool,Missing}), + Base.:((>)) => (a,t)->tfunc_returns(a,t,Union{Bool,Missing}), + Base.:((>=)) => (a,t)->tfunc_returns(a,t,Union{Bool,Missing}), + Base.:(cmp) => (a,t)->tfunc_returns(a,t,Int), + Base.:(convert) => tfunc_convert, + Base.:(cconvert) => tfunc_convert, + Base.:(unsafe_convert) => tfunc_convert, + Base.:(iterate) => tfunc_iterate, + Base.:(getindex) => tfunc_getindex, + Base.:(leading_zeros) => (a,t)->tfunc_sub1(a, t, Integer), + Base.:(thisind) => (a,t)->tfunc_returns(a,t,Int), + Base.:(prevind) => (a,t)->tfunc_returns(a,t,Int), + Base.:(nextind) => (a,t)->tfunc_returns(a,t,Int), + Base.:(ncodeunits) => (a,t)->tfunc_returns(a,t,Int), + Base.:(codeunit) => (a,t)->tfunc_returns(a,t,Type{Union{UInt8,UInt16,UInt32}}), + Base.:(eof) => (a,t)->tfunc_returns(a,t,Bool), + Base.:(readline) => (a,t)->tfunc_returns(a,t,AbstractString), + Base.:(displaysize) => (a,t)->tfunc_returns(a,t,Tuple{Int,Int}), + Base.:(sizeof) => (a,t)->tfunc_returns(a,t,Int), + Base.:(length) => (a,t)->tfunc_returns(a,t,Union{Int,UInt}), + Base.:(size) => (a,t)->tfunc_returns(a,t,length(a) == 1 ? Tuple{Vararg{Int}} : Int), + Base.:(axes) => (a,t)->tfunc_returns(a,t,length(a) == 1 ? Tuple{Vararg{<:AbstractUnitRange}} : AbstractUnitRange), + Base.:(resize!) => (a,t)->tfunc_returns(a,t,a[1]), + Base.:(copyto!) => (a,t)->tfunc_returns(a,t,a[1]), +) +for sym in ( + :isabspath, + :isapprox, + :isascii, + :isblockdev, + :ischardev, + :iscntrl, + :isdigit, + :isdir, + :isdirpath, + :isdisjoint, + :isempty, + :isequal, + :iseven, + :isfifo, + :isfile, + :isfinite, + :isinf, + :isinteger, + :isinteractive, + :isless, + :isletter, + :islink, + :islocked, + :islowercase, + :ismarked, + :ismissing, + :ismount, + :isnan, + :isnothing, + :isnumeric, + :isodd, + :isone, + :isopen, + :ispath, + :isperm, + :ispow2, + :isprint, + :ispunct, + :isreadable, + :isreadonly, + :isready, + :isreal, + :issetequal, + :issetgid, + :issetuid, + :issocket, + :issorted, + :isspace, + :issticky, + :issubnormal, + :issubset, + :istaskdone, + :istaskfailed, + :istaskstarted, + :istextmime, + :isuppercase, + :isvalid, + :iswritable, + :isxdigit, + :iszero, + ) + f = resolve(GlobalRef(Base, sym)) + f === nothing && continue + tfuncs[f] = (a,t)->tfunc_returns(a,t,Union{Bool,Missing}) +end + +function parcel_by_callee(badcalls::Dict{Method,Any}) + callers = IdDict{Any,Set{Method}}() + for (m, prs) in badcalls + for (idx, bc) in prs + g = resolve(bc.callee) + list = get!(callers, g, Set{Method}()) + push!(list, m) + end + end + return callers +end +function print_sorted(callees) + strs = String[] + for (callee, list) in callees + push!(strs, string(callee, ": ", length(list))) + end + sort!(strs) + for str in strs + println(str) + end + nothing +end + +bfs = Dict{Method,Any}() +visit(Base) do item + if isa(item, Method) + isdefined(item, :generator) && return false + try + (src, sparams), ty = infer_with_sig(item) + bs = bad_calls(src, sparams, ty, tfuncs) + isempty(bs) || (bfs[item] = bs) + catch err + @show item + throw(err) + end + return false + end + return true +end + +callees = parcel_by_callee(bfs) +print_sorted(callees)