Skip to content

Commit 969e7c9

Browse files
authored
switch to a new CFG selection logic (#116)
* switch to a new CFG selection logic This commit aims to port the new CFG selection logic implemented in aviatesk/JET.jl#654 to LCU, so that it can be shared between LCU and JET. The new algorithm is based on what was proposed in [Wei84][^Wei84]. If there is even one active block in the blocks reachable from a conditional branch up to its successors' nearest common post-dominator (referred to as "𝑰𝑵𝑭𝑳" in the paper), it is necessary to follow that conditional branch and execute the code. Otherwise, execution can be short-circuited[^short-circuit] from the conditional branch to the nearest common post-dominator. Regarding the `GotoNode`, it is now marked only for active blocks after all requirements have converged, rather than marking it inside the `add_loop!` or such. This approach eliminates the need to add unnecessary blocks inside the loop, and the need to use `add_loop!` while allowing the required CFG to be executed safely. [^Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984. https://ieeexplore.ieee.org/document/5010248 [^short-circuit]: It is important to note that in Julia's IR (`CodeInfo`), "short-circuiting" a specific code region is not a simple task. Simply ignoring the path to the post-dominator does not guarantee fall-through to the post-dominator. Therefore, a more careful implementation is required for this aspect.
1 parent 75da0d8 commit 969e7c9

File tree

3 files changed

+158
-59
lines changed

3 files changed

+158
-59
lines changed

src/codeedges.jl

Lines changed: 128 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ function print_with_code(preprint, postprint, io::IO, src::CodeInfo)
108108
:displaysize=>displaysize(io),
109109
:SOURCE_SLOTNAMES => Base.sourceinfo_slotnames(src))
110110
used = BitSet()
111-
cfg = Core.Compiler.compute_basic_blocks(src.code)
111+
cfg = compute_basic_blocks(src.code)
112112
for stmt in src.code
113113
Core.Compiler.scan_ssa_use!(push!, used, stmt)
114114
end
@@ -629,8 +629,7 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo,
629629
objs = add_requests!(isrequired, objs, edges, norequire)
630630

631631
# Compute basic blocks, which we'll use to make sure we mark necessary control-flow
632-
cfg = Core.Compiler.compute_basic_blocks(src.code) # needed for control-flow analysis
633-
domtree = construct_domtree(cfg.blocks)
632+
cfg = compute_basic_blocks(src.code) # needed for control-flow analysis
634633
postdomtree = construct_postdomtree(cfg.blocks)
635634

636635
# We'll mostly use generic graph traversal to discover all the lines we need,
@@ -650,15 +649,18 @@ function lines_required!(isrequired::AbstractVector{Bool}, objs, src::CodeInfo,
650649
changed |= add_named_dependencies!(isrequired, edges, objs, norequire)
651650

652651
# Add control-flow
653-
changed |= add_loops!(isrequired, cfg)
654-
changed |= add_control_flow!(isrequired, cfg, domtree, postdomtree)
652+
changed |= add_control_flow!(isrequired, src, cfg, postdomtree)
655653

656654
# So far, everything is generic graph traversal. Now we add some domain-specific information
657655
changed |= add_typedefs!(isrequired, src, edges, typedefs, norequire)
658656
changed |= add_inplace!(isrequired, src, edges, norequire)
659657

660658
iter += 1 # just for diagnostics
661659
end
660+
661+
# now mark the active goto nodes
662+
add_active_gotos!(isrequired, src, cfg, postdomtree)
663+
662664
return isrequired
663665
end
664666

@@ -728,72 +730,140 @@ end
728730

729731
## Add control-flow
730732

731-
# Mark loops that contain evaluated statements
732-
function add_loops!(isrequired, cfg)
733+
using Core: CodeInfo
734+
using Core.Compiler: CFG, BasicBlock, compute_basic_blocks
735+
736+
# The goal of this function is to request concretization of the minimal necessary control
737+
# flow to evaluate statements whose concretization have already been requested.
738+
# The basic algorithm is based on what was proposed in [^Wei84]. If there is even one active
739+
# block in the blocks reachable from a conditional branch up to its successors' nearest
740+
# common post-dominator (referred to as 𝑰𝑵𝑭𝑳 in the paper), it is necessary to follow
741+
# that conditional branch and execute the code. Otherwise, execution can be short-circuited
742+
# from the conditional branch to the nearest common post-dominator.
743+
#
744+
# COMBAK: It is important to note that in Julia's intermediate code representation (`CodeInfo`),
745+
# "short-circuiting" a specific code region is not a simple task. Simply ignoring the path
746+
# to the post-dominator does not guarantee fall-through to the post-dominator. Therefore,
747+
# a more careful implementation is required for this aspect.
748+
#
749+
# [Wei84]: M. Weiser, "Program Slicing," IEEE Transactions on Software Engineering, 10, pages 352-357, July 1984.
750+
function add_control_flow!(isrequired, src::CodeInfo, cfg::CFG, postdomtree)
751+
local changed::Bool = false
752+
function mark_isrequired!(idx::Int)
753+
if !isrequired[idx]
754+
changed |= isrequired[idx] = true
755+
return true
756+
end
757+
return false
758+
end
759+
for bbidx = 1:length(cfg.blocks) # forward traversal
760+
bb = cfg.blocks[bbidx]
761+
nsuccs = length(bb.succs)
762+
if nsuccs == 0
763+
continue
764+
elseif nsuccs == 1
765+
continue # leave a fall-through terminator unmarked: `GotoNode`s are marked later
766+
elseif nsuccs == 2
767+
termidx = bb.stmts[end]
768+
@assert is_conditional_terminator(src.code[termidx]) "invalid IR"
769+
if is_conditional_block_active(isrequired, bb, cfg, postdomtree)
770+
mark_isrequired!(termidx)
771+
else
772+
# fall-through to the post dominator block (by short-circuiting all statements between)
773+
end
774+
end
775+
end
776+
return changed
777+
end
778+
779+
is_conditional_terminator(@nospecialize stmt) = stmt isa GotoIfNot ||
780+
(@static @isdefined(EnterNode) ? stmt isa EnterNode : isexpr(stmt, :enter))
781+
782+
function is_conditional_block_active(isrequired, bb::BasicBlock, cfg::CFG, postdomtree)
783+
return visit_𝑰𝑵𝑭𝑳_blocks(bb, cfg, postdomtree) do postdominator::Int, 𝑰𝑵𝑭𝑳::BitSet
784+
for blk in 𝑰𝑵𝑭𝑳
785+
if blk == postdominator
786+
continue # skip the post-dominator block and continue to a next infl block
787+
end
788+
if any(@view isrequired[cfg.blocks[blk].stmts])
789+
return true
790+
end
791+
end
792+
return false
793+
end
794+
end
795+
796+
function visit_𝑰𝑵𝑭𝑳_blocks(func, bb::BasicBlock, cfg::CFG, postdomtree)
797+
succ1, succ2 = bb.succs
798+
postdominator = nearest_common_dominator(postdomtree, succ1, succ2)
799+
𝑰𝑵𝑭𝑳 = reachable_blocks(cfg, succ1, postdominator) reachable_blocks(cfg, succ2, postdominator)
800+
return func(postdominator, 𝑰𝑵𝑭𝑳)
801+
end
802+
803+
function reachable_blocks(cfg, from_bb::Int, to_bb::Int)
804+
worklist = Int[from_bb]
805+
visited = BitSet(from_bb)
806+
if to_bb == from_bb
807+
return visited
808+
end
809+
push!(visited, to_bb)
810+
function visit!(bb::Int)
811+
if bb visited
812+
push!(visited, bb)
813+
push!(worklist, bb)
814+
end
815+
end
816+
while !isempty(worklist)
817+
foreach(visit!, cfg.blocks[pop!(worklist)].succs)
818+
end
819+
return visited
820+
end
821+
822+
function add_active_gotos!(isrequired, src::CodeInfo, cfg::CFG, postdomtree)
823+
dead_blocks = compute_dead_blocks(isrequired, src, cfg, postdomtree)
733824
changed = false
734-
for (ibb, bb) in enumerate(cfg.blocks)
735-
needed = false
736-
for ibbp in bb.preds
737-
# Is there a backwards-pointing predecessor, and if so are there any required statements between the two?
738-
ibbp > ibb || continue # not a loop-block predecessor
739-
r, rp = rng(bb), rng(cfg.blocks[ibbp])
740-
r = first(r):first(rp)-1
741-
needed |= any(view(isrequired, r))
742-
end
743-
if needed
744-
# Mark the final statement of all predecessors
745-
for ibbp in bb.preds
746-
rp = rng(cfg.blocks[ibbp])
747-
changed |= !isrequired[last(rp)]
748-
isrequired[last(rp)] = true
825+
for bbidx = 1:length(cfg.blocks)
826+
if bbidx dead_blocks
827+
bb = cfg.blocks[bbidx]
828+
nsuccs = length(bb.succs)
829+
if nsuccs == 1
830+
termidx = bb.stmts[end]
831+
if src.code[termidx] isa GotoNode
832+
changed |= isrequired[termidx] = true
833+
end
749834
end
750835
end
751836
end
752837
return changed
753838
end
754839

755-
function add_control_flow!(isrequired, cfg, domtree, postdomtree)
756-
changed, _changed = false, true
757-
blocks = cfg.blocks
758-
nblocks = length(blocks)
759-
while _changed
760-
_changed = false
761-
for (ibb, bb) in enumerate(blocks)
762-
r = rng(bb)
763-
if any(view(isrequired, r))
764-
# Walk up the dominators
765-
jbb = ibb
766-
while jbb != 1
767-
jdbb = domtree.idoms_bb[jbb]
768-
dbb = blocks[jdbb]
769-
# Check the successors; if jbb doesn't post-dominate, mark the last statement
770-
for s in dbb.succs
771-
if !postdominates(postdomtree, jbb, s)
772-
idxlast = rng(dbb)[end]
773-
_changed |= !isrequired[idxlast]
774-
isrequired[idxlast] = true
775-
break
776-
end
840+
# find dead blocks using the same approach as `add_control_flow!`, for the converged `isrequired`
841+
function compute_dead_blocks(isrequired, src::CodeInfo, cfg::CFG, postdomtree)
842+
dead_blocks = BitSet()
843+
for bbidx = 1:length(cfg.blocks)
844+
bb = cfg.blocks[bbidx]
845+
nsuccs = length(bb.succs)
846+
if nsuccs == 2
847+
termidx = bb.stmts[end]
848+
@assert is_conditional_terminator(src.code[termidx]) "invalid IR"
849+
visit_𝑰𝑵𝑭𝑳_blocks(bb, cfg, postdomtree) do postdominator::Int, 𝑰𝑵𝑭𝑳::BitSet
850+
is_𝑰𝑵𝑭𝑳_active = false
851+
for blk in 𝑰𝑵𝑭𝑳
852+
if blk == postdominator
853+
continue # skip the post-dominator block and continue to a next infl block
777854
end
778-
jbb = jdbb
779-
end
780-
# Walk down the post-dominators, including self
781-
jbb = ibb
782-
while jbb != 0 && jbb < nblocks
783-
pdbb = blocks[jbb]
784-
# Check if the exit of this block is a GotoNode or `return`
785-
if length(pdbb.succs) < 2
786-
idxlast = rng(pdbb)[end]
787-
_changed |= !isrequired[idxlast]
788-
isrequired[idxlast] = true
855+
if any(@view isrequired[cfg.blocks[blk].stmts])
856+
is_𝑰𝑵𝑭𝑳_active |= true
857+
break
789858
end
790-
jbb = postdomtree.idoms_bb[jbb]
859+
end
860+
if !is_𝑰𝑵𝑭𝑳_active
861+
union!(dead_blocks, delete!(𝑰𝑵𝑭𝑳, postdominator))
791862
end
792863
end
793864
end
794-
changed |= _changed
795865
end
796-
return changed
866+
return dead_blocks
797867
end
798868

799869
# Do a traveral of "numbered" predecessors and find statement ranges and names of type definitions

src/domtree.jl

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,3 +412,30 @@ function _dominates(domtree::GenericDomTree, bb1::BBNumber, bb2::BBNumber)
412412
end
413413
return bb1 == bb2
414414
end
415+
416+
"""
417+
nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
418+
419+
Compute the nearest common (post-)dominator of `a` and `b`.
420+
"""
421+
function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
422+
a == 0 && return a
423+
b == 0 && return b
424+
alevel = domtree.nodes[a].level
425+
blevel = domtree.nodes[b].level
426+
# W.l.g. assume blevel <= alevel
427+
if alevel < blevel
428+
a, b = b, a
429+
alevel, blevel = blevel, alevel
430+
end
431+
while alevel > blevel
432+
a = domtree.idoms_bb[a]
433+
alevel -= 1
434+
end
435+
while a != b && a != 0
436+
a = domtree.idoms_bb[a]
437+
b = domtree.idoms_bb[b]
438+
end
439+
@assert a == b
440+
return a
441+
end

src/packagedef.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ if isdefined(Base, :Experimental) && isdefined(Base.Experimental, Symbol("@optle
22
@eval Base.Experimental.@optlevel 1
33
end
44

5-
using Core: SimpleVector, CodeInfo, NewvarNode, GotoNode
5+
using Core: SimpleVector
6+
using Core.IR
67
using Base.Meta: isexpr
78

89
const SSAValues = Union{Core.Compiler.SSAValue, JuliaInterpreter.SSAValue}
@@ -22,6 +23,7 @@ else
2223
const construct_domtree = Core.Compiler.construct_domtree
2324
const construct_postdomtree = Core.Compiler.construct_postdomtree
2425
const postdominates = Core.Compiler.postdominates
26+
const nearest_common_dominator = Core.Compiler.nearest_common_dominator
2527
end
2628

2729
# precompilation

0 commit comments

Comments
 (0)