From 88fa433bdf8330f5670c99c9d67629cd2ef25a7d Mon Sep 17 00:00:00 2001
From: Jarrett Revels <jarrettrevels@gmail.com>
Date: Mon, 15 May 2017 14:13:07 -0400
Subject: [PATCH] redesign type inference outer loop (PR #21677)

removes the global work queue, which allows increasing the precision of cycle detection and resolution, and decreases the need for the threading synchronization lock surrounding inference

updates `inInference` flag usage to be merely a hint for `jl_type_infer` to not bother trying to infer a method (helps avoid accidental infinite recursion over inferring type inference), enable inferring inference

(cherry picked from commit 5847317e5e3cc4d353bbfa2225fcc4b36c0e23cd)
---
 base/inference.jl  | 513 +++++++++++++++++++--------------------------
 base/precompile.jl |   7 +-
 base/sysimg.jl     |   5 +-
 src/gf.c           |  65 +++---
 src/interpreter.c  |   2 +-
 test/staged.jl     |   3 +-
 6 files changed, 254 insertions(+), 341 deletions(-)

diff --git a/base/inference.jl b/base/inference.jl
index 17220ed7cd4d6..997d66a743df6 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -131,14 +131,11 @@ mutable struct InferenceState
     # ssavalue sparsity and restart info
     ssavalue_uses::Vector{IntSet}
     ssavalue_init::Vector{Any}
-    # call-graph edges connecting from a caller to a callee (and back)
-    # we shouldn't need to iterate edges very often, so we use it to optimize the lookup from edge -> linenum
-    # whereas backedges is optimized for iteration
-    edges::ObjectIdDict # a Dict{InferenceState, Vector{LineNum}}
-    backedges::Vector{Tuple{InferenceState, Vector{LineNum}}}
-    # iteration fixed-point detection
-    fixedpoint::Bool
-    inworkq::Bool
+
+    backedges::Vector{Tuple{InferenceState, LineNum}} # call-graph backedges connecting from callee to caller
+    callers_in_cycle::Vector{InferenceState}
+    parent::Union{Void, InferenceState}
+
     const_api::Bool
     const_ret::Bool
 
@@ -148,6 +145,8 @@ mutable struct InferenceState
 
     inferred::Bool
 
+    dont_work_on_me::Bool
+
     # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
     function InferenceState(linfo::MethodInstance, src::CodeInfo,
                             optimize::Bool, cached::Bool, params::InferenceParams)
@@ -268,41 +267,43 @@ mutable struct InferenceState
             Union{}, W, 1, n,
             cur_hand, handler_at, n_handlers,
             ssavalue_uses, ssavalue_init,
-            ObjectIdDict(), # Dict{InferenceState, Vector{LineNum}}(),
-            Vector{Tuple{InferenceState, Vector{LineNum}}}(),
-            false, false, false, false, optimize, cached, false)
-        push!(active, frame)
-        nactive[] += 1
+            Vector{Tuple{InferenceState,LineNum}}(), # backedges
+            Vector{InferenceState}(), # callers_in_cycle
+            #=parent=#nothing,
+            false, false, optimize, cached, false, false)
         return frame
     end
 end
 
-# create copies of the CodeInfo definition, and any fields that type-inference might modify
-# TODO: post-inference see if we can swap back to the original arrays
-function get_source(li::MethodInstance)
-    if isa(li.def.source, Array{UInt8,1})
-        src = ccall(:jl_uncompress_ast, Any, (Any, Any), li.def, li.def.source)
+function InferenceState(linfo::MethodInstance,
+                        optimize::Bool, cached::Bool, params::InferenceParams)
+    # prepare an InferenceState object for inferring lambda
+    # create copies of the CodeInfo definition, and any fields that type-inference might modify
+    if linfo.def.isstaged
+        try
+            # user code might throw errors – ignore them
+            src = get_staged(linfo)
+        catch
+            return nothing
+        end
     else
-        src = ccall(:jl_copy_code_info, Ref{CodeInfo}, (Any,), li.def.source)
-        src.code = copy_exprargs(src.code)
-        src.slotnames = copy(src.slotnames)
-        src.slotflags = copy(src.slotflags)
+        # TODO: post-inference see if we can swap back to the original arrays?
+        if isa(linfo.def.source, Array{UInt8,1})
+            src = ccall(:jl_uncompress_ast, Any, (Any, Any), linfo.def, linfo.def.source)
+        else
+            src = ccall(:jl_copy_code_info, Ref{CodeInfo}, (Any,), linfo.def.source)
+            src.code = copy_exprargs(src.code)
+            src.slotnames = copy(src.slotnames)
+            src.slotflags = copy(src.slotflags)
+        end
     end
-    return src
+    return InferenceState(linfo, src, optimize, cached, params)
 end
 
 function get_staged(li::MethodInstance)
     return ccall(:jl_code_for_staged, Any, (Any,), li)::CodeInfo
 end
 
-
-#### current global inference state ####
-
-const active = Vector{Any}() # set of all InferenceState objects being processed
-const nactive = Array{Int,0}()
-nactive[] = 0
-const workq = Vector{InferenceState}() # set of InferenceState objects that can make immediate progress
-
 #### helper functions ####
 
 @inline slot_id(s) = isa(s, SlotNumber) ? (s::SlotNumber).id : (s::TypedSlot).id # using a function to ensure we can infer this
@@ -1092,7 +1093,9 @@ function invoke_tfunc(f::ANY, types::ANY, argtype::ANY, sv::InferenceState)
     meth = entry.func
     (ti, env) = ccall(:jl_match_method, Ref{SimpleVector}, (Any, Any),
                       argtype, meth.sig)
-    return typeinf_edge(meth::Method, ti, env, sv)
+    rt, edge = typeinf_edge(meth::Method, ti, env, sv)
+    edge !== nothing && add_backedge!(edge::MethodInstance, sv)
+    return rt
 end
 
 function tuple_tfunc(argtype::ANY)
@@ -1287,43 +1290,31 @@ function abstract_call_gf_by_type(f::ANY, atype::ANY, sv::InferenceState)
         # this means too many methods matched
         return Any
     end
-    x::Array{Any,1} = applicable
+    applicable = applicable::Array{Any,1}
     fullmatch = false
-    for (m::SimpleVector) in x
+    for (m::SimpleVector) in applicable
         sig = m[1]
         sigtuple = unwrap_unionall(sig)::DataType
         method = m[3]::Method
         sparams = m[2]::SimpleVector
         recomputesvec = false
-        if !fullmatch && typeseq(sig, argtype)
+        if !fullmatch && (argtype <: method.sig)
             fullmatch = true
         end
 
         # limit argument type tuple growth
-        msig = unwrap_unionall(m[3].sig)
+        msig = unwrap_unionall(method.sig)
         lsig = length(msig.parameters)
         ls = length(sigtuple.parameters)
         td = type_depth(sig)
-        # look at the existing edges to detect growing argument lists
         mightlimitlength = ls > lsig + 1
         mightlimitdepth = td > 2
-
         limitlength = false
-        if mightlimitlength
-            for (callee, _) in sv.edges
-                callee = callee::InferenceState
-                if method === callee.linfo.def && ls > length(unwrap_unionall(callee.linfo.specTypes).parameters)
-                    limitlength = true
-                    break
-                end
-            end
-        end
-
-        # limit argument type size growth
         if mightlimitlength || mightlimitdepth
             # TODO: FIXME: this heuristic depends on non-local state making type-inference unpredictable
-            for infstate in active
-                infstate === nothing && continue
+            cyclei = 0
+            infstate = sv
+            while infstate !== nothing
                 infstate = infstate::InferenceState
                 if isdefined(infstate.linfo, :def) && method === infstate.linfo.def
                     if mightlimitlength && ls > length(unwrap_unionall(infstate.linfo.specTypes).parameters)
@@ -1363,6 +1354,14 @@ function abstract_call_gf_by_type(f::ANY, atype::ANY, sv::InferenceState)
                         end
                     end
                 end
+                # iterate through the cycle before walking to the parent
+                if cyclei < length(infstate.callers_in_cycle)
+                    cyclei += 1
+                    infstate = infstate.callers_in_cycle[cyclei]
+                else
+                    cyclei = 0
+                    infstate = infstate.parent
+                end
             end
         end
 
@@ -1400,7 +1399,8 @@ function abstract_call_gf_by_type(f::ANY, atype::ANY, sv::InferenceState)
             end
             sparams = recomputed[2]::SimpleVector
         end
-        rt = typeinf_edge(method, sig, sparams, sv)
+        rt, edge = typeinf_edge(method, sig, sparams, sv)
+        edge !== nothing && add_backedge!(edge::MethodInstance, sv)
         rettype = tmerge(rettype, rt)
         if rettype === Any
             break
@@ -1412,7 +1412,7 @@ function abstract_call_gf_by_type(f::ANY, atype::ANY, sv::InferenceState)
         add_mt_backedge(ftname.mt, argtype, sv)
         update_valid_age!(min_valid[1], max_valid[1], sv)
     end
-    if isempty(x)
+    if isempty(applicable)
         # TODO: this is needed because type intersection is wrong in some cases
         return Any
     end
@@ -2333,31 +2333,6 @@ end
 inlining_enabled() = (JLOptions().can_inline == 1)
 coverage_enabled() = (JLOptions().code_coverage != 0)
 
-# TODO: track the worlds for which this InferenceState
-# is being used, and split it if the WIP requires it?
-function converge_valid_age!(sv::InferenceState)
-    # push the validity range of sv into its fixedpoint callers
-    # recursing as needed to cover the graph
-    for (i, _) in sv.backedges
-        if i.fixedpoint
-            updated = false
-            if i.min_valid < sv.min_valid
-                i.min_valid = sv.min_valid
-                updated = true
-            end
-            if i.max_valid > sv.max_valid
-                i.max_valid = sv.max_valid
-                updated = true
-            end
-            @assert !isdefined(i.linfo, :def) || !i.cached || i.min_valid <= i.params.world <= i.max_valid "invalid age range update"
-            if updated
-                converge_valid_age!(i)
-            end
-        end
-    end
-    nothing
-end
-
 # work towards converging the valid age range for sv
 function update_valid_age!(min_valid::UInt, max_valid::UInt, sv::InferenceState)
     sv.min_valid = max(sv.min_valid, min_valid)
@@ -2369,7 +2344,7 @@ update_valid_age!(edge::InferenceState, sv::InferenceState) = update_valid_age!(
 update_valid_age!(li::MethodInstance, sv::InferenceState) = update_valid_age!(min_world(li), max_world(li), sv)
 
 # temporarily accumulate our edges to later add as backedges in the callee
-function add_backedge(li::MethodInstance, caller::InferenceState)
+function add_backedge!(li::MethodInstance, caller::InferenceState)
     isdefined(caller.linfo, :def) || return # don't add backedges to toplevel exprs
     if caller.stmt_edges[caller.currpc] === ()
         caller.stmt_edges[caller.currpc] = []
@@ -2435,72 +2410,92 @@ function code_for_method(method::Method, atypes::ANY, sparams::SimpleVector, wor
     return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any, UInt), method, atypes, sparams, world)
 end
 
-function typeinf_active(linfo::MethodInstance)
-    for infstate in active
-        infstate === nothing && continue
-        infstate = infstate::InferenceState
-        if linfo === infstate.linfo && infstate.cached
-            return infstate
-        end
+function typeinf_active(linfo::MethodInstance, sv::InferenceState)
+    for infstate in sv.callers_in_cycle
+        linfo === infstate.linfo && return infstate
     end
     return nothing
 end
 
-function add_backedge(frame::InferenceState, caller::InferenceState, currpc::Int)
+function add_backedge!(frame::InferenceState, caller::InferenceState, currpc::Int)
     update_valid_age!(frame, caller)
-    if haskey(caller.edges, frame)
-        Ws = caller.edges[frame]::Vector{Int}
-        if !(currpc in Ws)
-            push!(Ws, currpc)
+    backedge = (caller, currpc)
+    contains_is(frame.backedges, backedge) || push!(frame.backedges, backedge)
+    return frame
+end
+
+# at the end, all items in b's cycle
+# will now be added to a's cycle
+function union_caller_cycle!(a::InferenceState, b::InferenceState)
+    callers_in_cycle = b.callers_in_cycle
+    b.parent = a.parent
+    b.callers_in_cycle = a.callers_in_cycle
+    contains_is(a.callers_in_cycle, b) || push!(a.callers_in_cycle, b)
+    if callers_in_cycle !== a.callers_in_cycle
+        for caller in callers_in_cycle
+            if caller !== b
+                caller.parent = a.parent
+                caller.callers_in_cycle = a.callers_in_cycle
+                push!(a.callers_in_cycle, caller)
+            end
         end
-    else
-        Ws = Int[currpc]
-        caller.edges[frame] = Ws
-        push!(frame.backedges, (caller, Ws))
     end
+    return
 end
 
-# build (and start inferring) the inference frame for the linfo
-function typeinf_frame(linfo::MethodInstance, caller, optimize::Bool, cached::Bool,
-                       params::InferenceParams)
-    # println(params.world, ' ', linfo)
-    if cached && linfo.inInference
-        # inference on this signature may be in progress,
-        # find the corresponding frame in the active list
-        frame = typeinf_active(linfo)
-        # TODO: this assertion seems iffy
-        assert(frame !== nothing)
-    else
-        # inference not started yet, make a new frame for a new lambda
-        if linfo.def.isstaged
-            try
-                # user code might throw errors – ignore them
-                src = get_staged(linfo)
-            catch
-                return nothing
+function merge_call_chain!(parent::InferenceState, ancestor::InferenceState, child::InferenceState)
+    # add backedge of parent <- child
+    # then add all backedges of parent <- parent.parent
+    # and merge all of the callers into ancestor.callers_in_cycle
+    # and ensure that walking the parent list will get the same result (DAG) from everywhere
+    while true
+        add_backedge!(child, parent, parent.currpc)
+        union_caller_cycle!(ancestor, child)
+        child = parent
+        parent = child.parent
+        child === ancestor && break
+    end
+end
+
+# Walk through `linfo`'s upstream call chain, starting at `parent`. If a parent
+# frame matching `linfo` is encountered, then there is a cycle in the call graph
+# (i.e. `linfo` is a descendant callee of itself). Upon encountering this cycle,
+# we "resolve" it by merging the call chain, which entails unioning each intermediary
+# frame's `callers_in_cycle` field and adding the appropriate backedges. Finally,
+# we return `linfo`'s pre-existing frame. If no cycles are found, `nothing` is
+# returned instead.
+function resolve_call_cycle!(linfo::MethodInstance, parent::InferenceState)
+    frame = parent
+    while isa(frame, InferenceState)
+        if frame.linfo === linfo
+            merge_call_chain!(parent, frame, frame)
+            return frame
+        end
+        for caller in frame.callers_in_cycle
+            if caller.linfo === linfo
+                merge_call_chain!(parent, frame, caller)
+                return caller
             end
-        else
-            src = get_source(linfo)
         end
-        cached && (linfo.inInference = true)
-        frame = InferenceState(linfo, src, optimize, cached, params)
+        frame = frame.parent
     end
-    frame = frame::InferenceState
+    return nothing
+end
 
-    if isa(caller, InferenceState)
-        # if we were called from inside inference, the caller will be the InferenceState object
-        # for which the edge was required
-        @assert caller.currpc > 0
-        add_backedge(frame, caller, caller.currpc)
-    end
-    typeinf_loop(frame)
+# build (and start inferring) the inference frame for the linfo
+function typeinf_frame(linfo::MethodInstance,
+                       optimize::Bool, cached::Bool, params::InferenceParams)
+    frame = InferenceState(linfo, optimize, cached, params)
+    frame === nothing && return nothing
+    cached && (linfo.inInference = true)
+    typeinf(frame)
     return frame
 end
 
 # compute (and cache) an inferred AST and return the current best estimate of the result type
 function typeinf_edge(method::Method, atypes::ANY, sparams::SimpleVector, caller::InferenceState)
     code = code_for_method(method, atypes, sparams, caller.params.world)
-    code === nothing && return Any
+    code === nothing && return Any, nothing
     code = code::MethodInstance
     if isdefined(code, :inferred)
         # return rettype if the code is already inferred
@@ -2508,18 +2503,27 @@ function typeinf_edge(method::Method, atypes::ANY, sparams::SimpleVector, caller
         # so need to check whether the code itself is also inferred
         inf = code.inferred
         if !isa(inf, CodeInfo) || (inf::CodeInfo).inferred
-            add_backedge(code, caller)
             if isdefined(code, :inferred_const)
-                return abstract_eval_constant(code.inferred_const)
+                return abstract_eval_constant(code.inferred_const), code
             else
-                return code.rettype
+                return code.rettype, code
             end
         end
     end
-    frame = typeinf_frame(code, caller, true, true, caller.params)
-    frame === nothing && return Any
+    frame = resolve_call_cycle!(code, caller)
+    if frame === nothing
+        code.inInference = true
+        frame = InferenceState(code, true, true, caller.params) # always optimize and cache edge targets
+        if frame === nothing
+            code.inInference = false
+            return Any, nothing
+        end
+        frame.parent = caller
+        typeinf(frame)
+        return frame.bestguess, frame.inferred ? frame.linfo : nothing
+    end
     frame = frame::InferenceState
-    return frame.bestguess
+    return frame.bestguess, nothing
 end
 
 #### entry points for inferring a MethodInstance given a type signature ####
@@ -2563,7 +2567,7 @@ function typeinf_code(linfo::MethodInstance, optimize::Bool, cached::Bool,
             end
         end
     end
-    frame = typeinf_frame(linfo, nothing, optimize, cached, params)
+    frame = typeinf_frame(linfo, optimize, cached, params)
     ccall(:jl_typeinf_end, Void, ())
     frame === nothing && return svec(nothing, nothing, Any)
     frame = frame::InferenceState
@@ -2591,7 +2595,7 @@ function typeinf_type(method::Method, atypes::ANY, sparams::SimpleVector,
             end
         end
     end
-    frame = typeinf_frame(code, nothing, cached, cached, params)
+    frame = typeinf_frame(code, cached, cached, params)
     ccall(:jl_typeinf_end, Void, ())
     frame === nothing && return nothing
     frame = frame::InferenceState
@@ -2605,11 +2609,10 @@ function typeinf_ext(linfo::MethodInstance, world::UInt)
         return typeinf_code(linfo, true, true, InferenceParams(world))
     else
         # toplevel lambda - infer directly
-        linfo.inInference = true
         ccall(:jl_typeinf_begin, Void, ())
         frame = InferenceState(linfo, linfo.inferred::CodeInfo,
                                true, true, InferenceParams(world))
-        typeinf_loop(frame)
+        typeinf(frame)
         ccall(:jl_typeinf_end, Void, ())
         @assert frame.inferred # TODO: deal with this better
         @assert frame.linfo === linfo
@@ -2619,83 +2622,9 @@ end
 
 #### do the work of inference ####
 
-in_typeinf_loop = false
-function typeinf_loop(frame)
-    global in_typeinf_loop
-    if in_typeinf_loop
-        frame.inworkq || typeinf_frame(frame)
-        return
-    end
-    try
-        in_typeinf_loop = true
-        # the core type-inference algorithm
-        # processes everything in workq,
-        # and returns when there is nothing left
-        while nactive[] > 0
-            while active[end] === nothing
-                pop!(active)
-            end
-            if isempty(workq)
-                frame = active[end]::InferenceState
-            else
-                frame = pop!(workq)
-            end
-            typeinf_frame(frame)
-            if isempty(workq) && nactive[] > 0
-                # nothing in active has an edge that hasn't reached a fixed-point
-                # so all of them can be considered finished now
-                fplist = Any[]
-                for i in active
-                    i === nothing && continue
-                    i = i::InferenceState
-                    if i.fixedpoint
-                        push!(fplist, i)
-                        i.inworkq = true
-                    end
-                end
-                for i in length(fplist):-1:1
-                    # optimize and record the results
-                    # the reverse order makes it more likely to inline a callee into its caller
-                    optimize(fplist[i]::InferenceState) # this may add incomplete work to active
-                end
-                for i in fplist
-                    # push valid ages from each node across the graph cycle
-                    converge_valid_age!(i::InferenceState)
-                end
-                for i in fplist
-                    # record the results
-                    finish(i::InferenceState)
-                end
-                for i in fplist
-                    # update and record all of the back edges for the finished world
-                    finalize_backedges(i::InferenceState)
-                end
-            end
-        end
-        # cleanup the active queue
-        empty!(active)
-    #    while active[end] === nothing
-    #        # this pops everything, but with exaggerated care just in case
-    #        # something managed to add something to the queue at the same time
-    #        # (or someone decides to use an alternative termination condition)
-    #        pop!(active)
-    #    end
-        in_typeinf_loop = false
-    catch ex
-        println("WARNING: An error occurred during inference. Type inference is now partially disabled.")
-        println(ex)
-        ccall(:jlbacktrace, Void, ())
-    end
-    nothing
-end
-
-global_sv = nothing
-function typeinf_frame(frame)
-    global global_sv # TODO: actually pass this to all functions that need it
-    last_global_sv = global_sv
-    global_sv = frame
+function typeinf_work(frame::InferenceState)
     @assert !frame.inferred
-    frame.inworkq = true
+    frame.dont_work_on_me = true # mark that this function is currently on the stack
     W = frame.ip
     s = frame.stmt_types
     n = frame.nstmts
@@ -2781,18 +2710,15 @@ function typeinf_frame(frame)
                     if tchanged(rt, frame.bestguess)
                         # new (wider) return type for frame
                         frame.bestguess = tmerge(frame.bestguess, rt)
-                        for (caller, callerW) in frame.backedges
+                        for (caller, caller_pc) in frame.backedges
                             # notify backedges of updated type information
-                            for caller_pc in callerW
-                                if caller.stmt_types[caller_pc] !== ()
-                                    if caller_pc < caller.pc´´
-                                        caller.pc´´ = caller_pc
-                                    end
-                                    push!(caller.ip, caller_pc)
+                            if caller.stmt_types[caller_pc] !== ()
+                                if caller_pc < caller.pc´´
+                                    caller.pc´´ = caller_pc
                                 end
+                                push!(caller.ip, caller_pc)
                             end
                         end
-                        unmark_fixedpoint(frame)
                     end
                 elseif hd === :enter
                     l = stmt.args[1]::Int
@@ -2840,53 +2766,72 @@ function typeinf_frame(frame)
             end
         end
     end
+    frame.dont_work_on_me = false
+end
 
-    # with no active ip's, type inference on frame is done if there are no outstanding (unfinished) edges
-    #@assert isempty(W)
-    @assert !frame.inferred
-    finished = isempty(frame.edges)
-    if isempty(workq)
-        # oops, there's a cycle somewhere in the `edges` graph
-        # so we've run out off the tree and will need to start work on the loop
-        frame.fixedpoint = true
-    end
-
-    if finished || frame.fixedpoint
-        if finished
-            optimize(frame)
-            finish(frame)
-            finalize_backedges(frame)
-        else # fixedpoint propagation
-            for (i, _) in frame.edges
-                i = i::InferenceState
-                if !i.fixedpoint
-                    update_valid_age!(i, frame) # work towards converging age at the same time
-                    if !i.inworkq
-                        push!(workq, i)
-                        i.inworkq = true
-                    end
-                    i.fixedpoint = true
-                end
+function typeinf(frame::InferenceState)
+
+    typeinf_work(frame)
+
+    # If the current frame is part of a cycle, solve the cycle before finishing
+    no_active_ips_in_callers = false
+    while !no_active_ips_in_callers
+        no_active_ips_in_callers = true
+        for caller in frame.callers_in_cycle
+            caller.dont_work_on_me && return
+            if caller.pc´´ <= caller.nstmts # equivalent to `isempty(caller.ip)`
+                # Note that `typeinf_work(caller)` can potentially modify the other frames
+                # `frame.callers_in_cycle`, which is why making incremental progress requires the
+                # outer while loop.
+                typeinf_work(caller)
+                no_active_ips_in_callers = false
+            end
+            if caller.min_valid < frame.min_valid
+                caller.min_valid = frame.min_valid
+            end
+            if caller.max_valid > frame.max_valid
+                caller.max_valid = frame.max_valid
             end
         end
     end
-    frame.inworkq = false
-    global_sv = last_global_sv
-    nothing
-end
 
-function unmark_fixedpoint(frame::InferenceState)
-    # type information changed for frame, so its edges are no longer stuck
-    # recursively unmark any nodes that had previously been thought to be at a fixedpoint
-    # based upon (recursively) assuming that frame was stuck
-    if frame.fixedpoint
-        frame.fixedpoint = false
-        for (i, _) in frame.backedges
-            unmark_fixedpoint(i)
+    # with no active ip's, type inference on frame is done
+
+    if isempty(frame.callers_in_cycle)
+        @assert !(frame.dont_work_on_me)
+        frame.dont_work_on_me = true
+        optimize(frame)
+        finish(frame)
+        finalize_backedges(frame)
+    else # frame is in frame.callers_in_cycle
+        for caller in frame.callers_in_cycle
+            @assert !(caller.dont_work_on_me)
+            caller.dont_work_on_me = true
+        end
+        for caller in frame.callers_in_cycle
+            optimize(caller)
+            if frame.min_valid < caller.min_valid
+                frame.min_valid = caller.min_valid
+            end
+            if frame.max_valid > caller.max_valid
+                frame.max_valid = caller.max_valid
+            end
+        end
+        for caller in frame.callers_in_cycle
+            caller.min_valid = frame.min_valid
+        end
+        for caller in frame.callers_in_cycle
+            finish(caller)
+        end
+        for caller in frame.callers_in_cycle
+            finalize_backedges(caller)
         end
     end
+
+    nothing
 end
 
+
 function record_ssa_assign(ssa_id::Int, new::ANY, frame::InferenceState)
     old = frame.src.ssavaluetypes[ssa_id]
     if old === NF || !(new ⊑ old)
@@ -2932,16 +2877,6 @@ end
 # inference completed on `me`
 # now converge the optimization work
 function optimize(me::InferenceState)
-    for (i, _) in me.edges
-        i = i::InferenceState
-        @assert i.fixedpoint
-    end
-    # below may call back into inference and
-    # see this InferenceState is in an incomplete state
-    # set `inworkq` to prevent it from trying to look
-    # at the object in any detail
-    @assert me.inworkq
-
     # annotate fulltree with type information
     type_annotate!(me)
 
@@ -3034,7 +2969,7 @@ function finish(me::InferenceState)
 
         # check if the existing me.linfo metadata is also sufficient to describe the current inference result
         # to decide if it is worth caching it again (which would also clear any generated code)
-        already_inferred = false
+        already_inferred = !me.linfo.inInference
         if isdefined(me.linfo, :inferred)
             inf = me.linfo.inferred
             if !isa(inf, CodeInfo) || (inf::CodeInfo).inferred
@@ -3085,20 +3020,9 @@ function finish(me::InferenceState)
         end
     end
 
-    # lazy-delete the item from active for several reasons:
-    # efficiency, correctness, and recursion-safety
-    nactive[] -= 1
-    active[findlast(active, me)] = nothing
-
-    # update all of the callers by traversing the backedges
+    # update all of the callers with real backedges by traversing the temporary list of backedges
     for (i, _) in me.backedges
-        if !me.fixedpoint || !i.fixedpoint
-            # wake up each backedge, unless both me and it already reached a fixed-point (cycle resolution stage)
-            delete!(i.edges, me)
-            i.inworkq || push!(workq, i)
-            i.inworkq = true
-        end
-        add_backedge(me.linfo, i)
+        add_backedge!(me.linfo, i)
     end
 
     # finalize and record the linfo result
@@ -3611,7 +3535,7 @@ function invoke_NF(argexprs, etype::ANY, atypes, sv, atype_unlimited::ANY,
                 local sig = argtypes_to_type(atypes)
                 local li = get_spec_lambda(sig, sv, invoke_data)
                 li === nothing && return false
-                add_backedge(li, sv)
+                add_backedge!(li, sv)
                 local stmt = []
                 push!(stmt, Expr(:(=), linfo_var, li))
                 spec_hit === nothing && (spec_hit = genlabel(sv))
@@ -3682,7 +3606,7 @@ function invoke_NF(argexprs, etype::ANY, atypes, sv, atype_unlimited::ANY,
     else
         local cache_linfo = get_spec_lambda(atype_unlimited, sv, invoke_data)
         cache_linfo === nothing && return NF
-        add_backedge(cache_linfo, sv)
+        add_backedge!(cache_linfo, sv)
         unshift!(argexprs, cache_linfo)
         ex = Expr(:invoke)
         ex.args = argexprs
@@ -3920,7 +3844,7 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
     linfo = linfo::MethodInstance
     if linfo.jlcall_api == 2
         # in this case function can be inlined to a constant
-        add_backedge(linfo, sv)
+        add_backedge!(linfo, sv)
         return inline_as_constant(linfo.inferred_const, argexprs, sv, invoke_data)
     end
 
@@ -3937,22 +3861,19 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
         # as we'll be able to fix that up at the end of inlinable when we verify the return type.
         # But `next` and `indexed_next` make tuples which would end up burying some of that information in the AST
         # where we can't easily correct it afterwards.
-        frame = InferenceState(linfo, get_source(linfo), #=optimize=#true, #=cache=#false, sv.params)
+        frame = InferenceState(linfo, #=optimize=#true, #=cache=#false, sv.params)
         frame.stmt_types[1][3] = VarState(atypes[3], false)
-        typeinf_loop(frame)
+        typeinf(frame)
     else
         if isdefined(linfo, :inferred) && linfo.inferred !== nothing
             # use cache
             inferred = linfo.inferred
-        elseif linfo.inInference
-            # use WIP
-            frame = typeinf_active(linfo)
         elseif force_infer
             # create inferred code on-demand
             # but if we decided in the past not to try to infer this particular signature
             # (due to signature coarsening in abstract_call_gf_by_type)
             # don't infer it now, as attempting to force it now would be a bad idea (non terminating)
-            frame = typeinf_frame(linfo, nothing, #=optimize=#true, #=cache=#true, sv.params)
+            frame = typeinf_frame(linfo, #=optimize=#true, #=cache=#true, sv.params)
         end
     end
 
@@ -3963,9 +3884,9 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
         inferred = frame.src
         if frame.const_api # handle like jlcall_api == 2
             if frame.inferred || !frame.cached
-                add_backedge(frame.linfo, sv)
+                add_backedge!(frame.linfo, sv)
             else
-                add_backedge(frame, sv, 0)
+                add_backedge!(frame, sv, 0)
             end
             if isa(frame.bestguess, Const)
                 inferred_const = (frame.bestguess::Const).val
@@ -4026,9 +3947,9 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
     if isa(frame, InferenceState) && !frame.inferred && frame.cached
         # in this case, the actual backedge linfo hasn't been computed
         # yet, but will be when inference on the frame finishes
-        add_backedge(frame, sv, 0)
+        add_backedge!(frame, sv, 0)
     else
-        add_backedge(linfo, sv)
+        add_backedge!(linfo, sv)
     end
 
     spvals = Any[]
@@ -5481,7 +5402,7 @@ end
 # especially try to make sure any recursive and leaf functions have concrete signatures,
 # since we won't be able to specialize & infer them at runtime
 
-let fs = Any[typeinf_ext, typeinf_loop, typeinf_edge, occurs_outside_getfield, pure_eval_call],
+let fs = Any[typeinf_ext, typeinf, typeinf_edge, occurs_outside_getfield, pure_eval_call],
     world = ccall(:jl_get_world_counter, UInt, ())
     for x in t_ffunc_val
         push!(fs, x[3])
diff --git a/base/precompile.jl b/base/precompile.jl
index 8bc84db37f499..9bd0f90f30572 100644
--- a/base/precompile.jl
+++ b/base/precompile.jl
@@ -822,9 +822,8 @@ precompile(Tuple{getfield(Base.Cartesian, Symbol("#@nexprs")), Int64, Expr})
 precompile(Tuple{typeof(Base.Cartesian._nexprs), Int64, Expr})
 precompile(Tuple{typeof(Core.Inference.findnext), Array{Function, 1}, typeof(===), Int64})
 precompile(Tuple{typeof(Core.Inference.builtin_tfunction), typeof(===), Array{Any, 1}, Core.Inference.InferenceState, Core.Inference.InferenceParams})
-precompile(Tuple{typeof(Core.Inference.typeinf_frame), Core.MethodInstance, Void, Bool, Bool, Core.Inference.InferenceParams})
-precompile(Tuple{typeof(Core.Inference.typeinf_loop), Core.Inference.InferenceState})
-precompile(Tuple{typeof(Core.Inference.typeinf_frame), Core.Inference.InferenceState})
+precompile(Tuple{typeof(Core.Inference.typeinf_frame), Core.MethodInstance, Bool, Bool, Core.Inference.InferenceParams})
+precompile(Tuple{typeof(Core.Inference.typeinf), Core.Inference.InferenceState})
 precompile(Tuple{typeof(Base.Cartesian.inlineanonymous), Expr, Int64})
 precompile(Tuple{typeof(Base.Cartesian.lreplace), Expr, Symbol, Int64})
 precompile(Tuple{typeof(Base.copy), Expr})
@@ -869,7 +868,6 @@ precompile(Tuple{typeof(Core.Inference._widen_all_consts!), Expr, Array{Bool, 1}
 precompile(Tuple{typeof(Core.Inference._delete!), Core.Inference.IntSet, Int64})
 precompile(Tuple{typeof(Core.Inference.promote_type), Type{Float16}, Type{Int64}})
 precompile(Tuple{typeof(Core.Inference.mk_tuplecall), Array{Any, 1}, Core.Inference.InferenceState})
-precompile(Tuple{typeof(Core.Inference.get_source), Core.MethodInstance})
 precompile(Tuple{typeof(Core.Inference.inlining_pass), Expr, Core.Inference.InferenceState, Array{Any, 1}, Int64})
 precompile(Tuple{typeof(Core.Inference.annotate_slot_load!), Expr, Array{Any, 1}, Core.Inference.InferenceState, Array{Bool, 1}})
 precompile(Tuple{typeof(Core.Inference.record_slot_assign!), Core.Inference.InferenceState})
@@ -886,7 +884,6 @@ precompile(Tuple{typeof(Core.Inference.return_type_tfunc), Array{Any, 1}, Array{
 precompile(Tuple{typeof(Core.Inference.abstract_call), typeof(===), Tuple{}, Array{Any, 1}, Array{Any, 1}, Core.Inference.InferenceState})
 precompile(Tuple{typeof(Core.Inference.abstract_call), typeof(===), Array{Any, 1}, Array{Any, 1}, Array{Any, 1}, Core.Inference.InferenceState})
 precompile(Tuple{typeof(Core.Inference.type_too_complex), TypeVar, Int64})
-precompile(Tuple{typeof(Core.Inference.typeinf_frame), Core.MethodInstance, Core.Inference.InferenceState, Bool, Bool, Core.Inference.InferenceParams})
 precompile(Tuple{typeof(Core.Inference.abstract_eval), Expr, Array{Any, 1}, Core.Inference.InferenceState})
 precompile(Tuple{typeof(Core.Inference._setint!), Core.Inference.IntSet, Int64, Bool})
 precompile(Tuple{typeof(Core.Inference.stupdate1!), Array{Any, 1}, Core.Inference.StateUpdate})
diff --git a/base/sysimg.jl b/base/sysimg.jl
index d9851dbf30a0d..acfb4c7b68ee4 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -21,7 +21,6 @@ include("coreio.jl")
 
 eval(x) = Core.eval(Base, x)
 eval(m, x) = Core.eval(m, x)
-(::Type{T})(arg) where {T} = convert(T, arg)::T # Hidden from the REPL.
 VecElement{T}(arg) where {T} = VecElement{T}(convert(T, arg))
 convert(::Type{T}, arg)  where {T<:VecElement} = T(arg)
 convert(::Type{T}, arg::T) where {T<:VecElement} = arg
@@ -73,6 +72,10 @@ include("refpointer.jl")
 include("checked.jl")
 importall .Checked
 
+# buggy handling of ispure in type-inference means this should be
+# after re-defining the basic operations that they might try to call
+(::Type{T})(arg) where {T} = convert(T, arg)::T # Hidden from the REPL.
+
 # vararg Symbol constructor
 Symbol(x...) = Symbol(string(x...))
 
diff --git a/src/gf.c b/src/gf.c
index 44dbb4ad28005..822417e2e0897 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -238,46 +238,39 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t **pli, size_t world, int forc
     JL_TIMING(INFERENCE);
     if (jl_typeinf_func == NULL)
         return NULL;
-    jl_code_info_t *src = NULL;
 #ifdef ENABLE_INFERENCE
     jl_method_instance_t *li = *pli;
-    jl_module_t *mod = NULL;
-    if (li->def != NULL)
-        mod = li->def->module;
-    static int inInference = 0;
-    int lastIn = inInference;
-    size_t last_age = jl_get_ptls_states()->world_age;
-    inInference = 1;
-    if (force ||
-        (last_age != jl_typeinf_world &&
-         mod != jl_gf_mtable(jl_typeinf_func)->module &&
-         (mod != jl_core_module || !lastIn))) { // avoid any potential recursion in calling jl_typeinf_func on itself
-        assert(li->inInference == 0 && "unexpectedly asked to infer a method that is already being inferred");
-        jl_value_t **fargs;
-        JL_GC_PUSHARGS(fargs, 3);
-        fargs[0] = (jl_value_t*)jl_typeinf_func;
-        fargs[1] = (jl_value_t*)li;
-        fargs[2] = jl_box_ulong(world);
+    if (li->inInference && !force)
+        return NULL;
+
+    jl_value_t **fargs;
+    JL_GC_PUSHARGS(fargs, 3);
+    fargs[0] = (jl_value_t*)jl_typeinf_func;
+    fargs[1] = (jl_value_t*)li;
+    fargs[2] = jl_box_ulong(world);
 #ifdef TRACE_INFERENCE
-        if (li->specTypes != (jl_value_t*)jl_emptytuple_type) {
-            jl_printf(JL_STDERR,"inference on ");
-            jl_static_show_func_sig(JL_STDERR, (jl_value_t*)li->specTypes);
-            jl_printf(JL_STDERR, "\n");
-        }
+    if (li->specTypes != (jl_value_t*)jl_emptytuple_type) {
+        jl_printf(JL_STDERR,"inference on ");
+        jl_static_show_func_sig(JL_STDERR, (jl_value_t*)li->specTypes);
+        jl_printf(JL_STDERR, "\n");
+    }
 #endif
-        jl_get_ptls_states()->world_age = jl_typeinf_world;
-        jl_svec_t *linfo_src_rettype = (jl_svec_t*)jl_apply_with_saved_exception_state(fargs, 3, 0);
-        jl_get_ptls_states()->world_age = last_age;
-        assert((li->def || li->inInference == 0) && "inference failed on a toplevel expr");
-        if (jl_is_svec(linfo_src_rettype) && jl_svec_len(linfo_src_rettype) == 3 &&
-            jl_is_method_instance(jl_svecref(linfo_src_rettype, 0)) &&
-            jl_is_code_info(jl_svecref(linfo_src_rettype, 1))) {
-            *pli = (jl_method_instance_t*)jl_svecref(linfo_src_rettype, 0);
-            src = (jl_code_info_t*)jl_svecref(linfo_src_rettype, 1);
-        }
-        JL_GC_POP();
+    jl_ptls_t ptls = jl_get_ptls_states();
+    size_t last_age = ptls->world_age;
+    ptls->world_age = jl_typeinf_world;
+    li->inInference = 1;
+    jl_svec_t *linfo_src_rettype = (jl_svec_t*)jl_apply_with_saved_exception_state(fargs, 3, 0);
+    ptls->world_age = last_age;
+    assert((li->def || li->inInference == 0) && "inference failed on a toplevel expr");
+
+    jl_code_info_t *src = NULL;
+    if (jl_is_svec(linfo_src_rettype) && jl_svec_len(linfo_src_rettype) == 3 &&
+        jl_is_method_instance(jl_svecref(linfo_src_rettype, 0)) &&
+        jl_is_code_info(jl_svecref(linfo_src_rettype, 1))) {
+        *pli = (jl_method_instance_t*)jl_svecref(linfo_src_rettype, 0);
+        src = (jl_code_info_t*)jl_svecref(linfo_src_rettype, 1);
     }
-    inInference = lastIn;
+    JL_GC_POP();
 #endif
     return src;
 }
@@ -1661,7 +1654,7 @@ jl_llvm_functions_t jl_compile_for_dispatch(jl_method_instance_t **pli, size_t w
         return decls;
 
     jl_code_info_t *src = NULL;
-    if (li->def && !jl_is_rettype_inferred(li) && !li->inInference &&
+    if (li->def && !jl_is_rettype_inferred(li) &&
              jl_symbol_name(li->def->name)[0] != '@') {
         // don't bother with typeinf on macros or toplevel thunks
         // but try to infer everything else
diff --git a/src/interpreter.c b/src/interpreter.c
index 4e78fea57383a..99fc01c7c7bb0 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -85,7 +85,7 @@ static jl_value_t *do_invoke(jl_value_t **args, size_t nargs, interpreter_state
     for (i = 1; i < nargs; i++)
         argv[i - 1] = eval(args[i], s);
     jl_method_instance_t *meth = (jl_method_instance_t*)args[0];
-    assert(jl_is_method_instance(meth) && !meth->inInference);
+    assert(jl_is_method_instance(meth));
     jl_value_t *result = jl_call_method_internal(meth, argv, nargs - 1);
     JL_GC_POP();
     return result;
diff --git a/test/staged.jl b/test/staged.jl
index 34fc90d552a29..ba62fc2d91816 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -147,7 +147,6 @@ module TestGeneratedThrow
     foo() = (bar(rand() > 0.5 ? 1 : 1.0); error("foo"))
     function __init__()
         code_typed(foo,(); optimize = false)
-        @test Core.Inference.isempty(Core.Inference.active) && Core.Inference.isempty(Core.Inference.workq)
         cfunction(foo,Void,())
     end
 end
@@ -175,7 +174,7 @@ let gf_err, tsk = @async nothing # create a Task for yield to try to run
     end
     @test_throws ErrorException gf_err()
     @test_throws ErrorException gf_err()
-    @test gf_err_ref[] == 4
+    @test gf_err_ref[] == 3
 end
 
 gf_err_ref[] = 0