1515# function iterate()
1616
1717# end
18- function isnopidentity (op:: Operation , u₁loop:: Symbol , u₂loop:: Symbol , suffix)
18+
19+ function dependent_outer_reducts (ls:: LoopSet , op)
20+ for i ∈ ls. outer_reductions
21+ search_tree (parents (operations (ls)[i]), name (op)) && return true
22+ end
23+ false
24+ end
25+
26+ function isnopidentity (ls:: LoopSet , op:: Operation , u₁loop:: Symbol , u₂loop:: Symbol , vectorized:: Symbol , suffix)
1927 parents_op = parents (op)
2028 if iscompute (op) && instruction (op). instr === :identity && name (first (parents_op)) === name (op) && isone (length (parents_op))
2129 mvar, u₁unrolledsym, u₂unrolledsym = variable_name_and_unrolled (op, u₁loop, u₂loop, suffix)
2230 parents_u₁syms, parents_u₂syms = parent_unroll_status (op, u₁loop, u₂loop, suffix)
2331 if (u₁unrolledsym == first (parents_u₁syms)) && ((! isnothing (suffix)) == parents_u₂syms[1 ])
24- true
32+ # TODO : identifer(first(parents_op)) ∉ ls.outer_reductions is going to miss a lot of cases
33+ # Should probably replace that with `DVec` (demoting Vec) types, that demote to scalar.
34+ if (vectorized ∈ loopdependencies (first (parents_op)) && vectorized ∉ loopdependencies (op)) && ! dependent_outer_reducts (ls, op)
35+ op. instruction = reduction_to_scalar (instruction (first (parents_op)))
36+ op. mangledvariable = gensym (op. mangledvariable)
37+ false
38+ else
39+ true
40+ end
41+ else
42+ false
2543 end
2644 else
2745 false
3048
3149function set_upstream_family! (adal:: Vector{T} , op:: Operation , val:: T , ld:: Vector{Symbol} , id:: Int ) where {T}
3250 adal[identifier (op)] == val && return # must already have been set
33- # @show op
3451 if ld != loopdependencies (op) || id == identifier (op)
3552 (adal[identifier (op)] = val)
3653 end
@@ -41,26 +58,27 @@ function set_upstream_family!(adal::Vector{T}, op::Operation, val::T, ld::Vector
4158end
4259
4360function addoptoorder! (
44- lo:: LoopOrder , included_vars:: Vector{Bool} , place_after_loop:: Vector{Bool} , op:: Operation , loopsym:: Symbol , _n:: Int , u₁loop:: Symbol , u₂loop:: Symbol , loopistiled:: Bool
61+ ls:: LoopSet , included_vars:: Vector{Bool} , place_after_loop:: Vector{Bool} , op:: Operation ,
62+ loopsym:: Symbol , _n:: Int , u₁loop:: Symbol , u₂loop:: Symbol , vectorized:: Symbol , loopistiled:: Bool
4563)
64+ lo = ls. loop_order
4665 id = identifier (op)
4766 included_vars[id] && return nothing
4867 loopsym ∈ loopdependencies (op) || return nothing
4968 for opp ∈ parents (op) # ensure parents are added first
50- addoptoorder! (lo , included_vars, place_after_loop, opp, loopsym, _n, u₁loop, u₂loop, loopistiled)
69+ addoptoorder! (ls , included_vars, place_after_loop, opp, loopsym, _n, u₁loop, u₂loop, vectorized , loopistiled)
5170 end
5271 included_vars[id] && return nothing
5372 included_vars[id] = true
5473 isunrolled = (u₁loop ∈ loopdependencies (op)) + 1
5574 istiled = u₂loop ∈ loopdependencies (op)
5675 # optype = Int(op.node_type) + 1
5776 after_loop = place_after_loop[id] + 1
58- # @show place_after_loop[id], op
5977 if ! isloopvalue (op)
6078 if istiled
61- isnopidentity (op, u₁loop, u₂loop, 0 ) || push! (lo[isunrolled,2 ,after_loop,_n], op)
79+ isnopidentity (ls, op, u₁loop, u₂loop, vectorized , 0 ) || push! (lo[isunrolled,2 ,after_loop,_n], op)
6280 else
63- isnopidentity (op, u₁loop, u₂loop, nothing ) || push! (lo[isunrolled,1 ,after_loop,_n], op)
81+ isnopidentity (ls, op, u₁loop, u₂loop, vectorized , nothing ) || push! (lo[isunrolled,1 ,after_loop,_n], op)
6482 end
6583 end
6684 # isloopvalue(op) || push!(lo[isunrolled,istiled,after_loop,_n], op)
@@ -69,7 +87,7 @@ function addoptoorder!(
6987 nothing
7088end
7189
72- function fillorder! (ls:: LoopSet , order:: Vector{Symbol} , unrolled :: Symbol , tiled :: Symbol , loopistiled:: Bool )
90+ function fillorder! (ls:: LoopSet , order:: Vector{Symbol} , u₁loop :: Symbol , u₂loop :: Symbol , loopistiled:: Bool , vectorized :: Symbol )
7391 lo = ls. loop_order
7492 resize! (lo, length (ls. loopsymbols))
7593 ro = lo. loopnames # reverse order; will have same order as lo
@@ -85,7 +103,7 @@ function fillorder!(ls::LoopSet, order::Vector{Symbol}, unrolled::Symbol, tiled:
85103 ro[_n] = loopsym = order[n]
86104 # loopsym = order[n]
87105 for op ∈ ops
88- addoptoorder! ( lo , included_vars, place_after_loop, op, loopsym, _n, unrolled, tiled , loopistiled )
106+ addoptoorder! ( ls , included_vars, place_after_loop, op, loopsym, _n, u₁loop, u₂loop, vectorized , loopistiled )
89107 end
90108 end
91109end
0 commit comments