@@ -408,41 +408,55 @@ function maybedemotesize(uβ::Int, N::Int, U::Int, Uloop::Loop, maxuβbase::In
408408 end
409409 uβ
410410end
411+
411412function solve_unroll (
412- ls:: LoopSet , uβloopsym:: Symbol , tiled :: Symbol ,
413+ ls:: LoopSet , uβloopsym:: Symbol , uβloopsym :: Symbol ,
413414 cost_vec:: AbstractVector{Float64} ,
414415 reg_pressure:: AbstractVector{Float64} ,
415416 W:: Int , vectorized:: Symbol
417+ )
418+ uβloop = getloop (ls, uβloopsym)
419+ uβloop = getloop (ls, uβloopsym)
420+ solve_unroll (
421+ uβloopsym, uβloopsym, cost_vec, reg_pressure, W, vectorized, uβloop, uβloop
422+ )
423+ end
424+
425+ function solve_unroll (
426+ uβloopsym:: Symbol , uβloopsym:: Symbol ,
427+ cost_vec:: AbstractVector{Float64} ,
428+ reg_pressure:: AbstractVector{Float64} ,
429+ W:: Int , vectorized:: Symbol ,
430+ uβloop:: Loop , uβloop:: Loop
416431)
417432 maxuβbase = maxuβbase = VectorizationBase. REGISTER_COUNT == 32 ? 6 : 4 # 8
418433 maxuβ = maxuβbase# 8
419434 maxuβ = maxuβbase# 8
420- tiledloop = getloop (ls, tiled)
421- unrolledloop = getloop (ls, uβloopsym)
422- if isstaticloop (tiledloop)
423- if length (tiledloop) β€ 4
424- uβ = length (tiledloop)
425- uβ = max (1 , solve_unroll_constT (cost_vec, reg_pressure, uβ))
426- return uβ, uβ, unroll_cost (cost_vec, uβ, uβ, length (unrolledloop), uβ)
435+ if isstaticloop (uβloop)
436+ uβL = length (uβloop)
437+ if uβloopsym != = vectorized && uβL β€ 4
438+ uβ = max (1 , solve_unroll_constT (cost_vec, reg_pressure, uβL))
439+ return uβ, uβL, unroll_cost (cost_vec, uβ, uβL, length (uβloop), uβL)
427440 end
428- maxuβ = min (4 maxuβ, length (tiledloop))
441+ uβL = uβloopsym === vectorized ? cld (uβL,W) : uβL
442+ maxuβ = min (4 maxuβ, uβL)
429443 end
430- if isstaticloop (unrolledloop )
431- uβL = length (unrolledloop )
444+ if isstaticloop (uβloop )
445+ uβL = length (uβloop )
432446 if uβloopsym != = vectorized && uβL β€ 4
433447 uβ = max (1 , solve_unroll_constU (cost_vec, reg_pressure, uβL))
434- return uβL, uβ, unroll_cost (cost_vec, uβL, uβ, uβL, length (tiledloop ))
448+ return uβL, uβ, unroll_cost (cost_vec, uβL, uβ, uβL, length (uβloop ))
435449 end
436450 uβL = uβloopsym === vectorized ? cld (uβL,W) : uβL
437451 maxuβ = min (4 maxuβ, uβL)
438452 end
439- uβ, uβ, cost = solve_unroll (cost_vec, reg_pressure, maxuβ, maxuβ, length (unrolledloop ), length (tiledloop ))
453+ uβ, uβ, cost = solve_unroll (cost_vec, reg_pressure, maxuβ, maxuβ, length (uβloop ), length (uβloop ))
440454 # heuristic to more evenly divide small numbers of iterations
441- if isstaticloop (tiledloop )
442- uβ = maybedemotesize (uβ, length (tiledloop ), uβ, unrolledloop , maxuβbase)
455+ if isstaticloop (uβloop )
456+ uβ = maybedemotesize (uβ, length (uβloop ), uβ, uβloop , maxuβbase)
443457 end
444- if isstaticloop (unrolledloop )
445- uβ = maybedemotesize (uβ, length (unrolledloop ), uβ, tiledloop , maxuβbase)
458+ if isstaticloop (uβloop )
459+ uβ = maybedemotesize (uβ, length (uβloop ), uβ, uβloop , maxuβbase)
446460 end
447461 uβ, uβ, cost
448462end
@@ -641,8 +655,8 @@ function evaluate_cost_tile(
641655 size_T = biggest_type_size (ls)
642656 W, Wshift = VectorizationBase. pick_vector_width_shift (length (ls, vectorized), size_T):: Tuple{Int,Int}
643657 # costs =
644- # cost_mat[1] / ( unrolled * tiled )
645- # cost_mat[2] / ( tiled )
658+ # cost_mat[1] / ( unrolled * uβloopsym )
659+ # cost_mat[2] / ( uβloopsym )
646660 # cost_mat[3] / ( unrolled)
647661 # cost_mat[4]
648662 # @show order
@@ -681,7 +695,7 @@ function evaluate_cost_tile(
681695 # cost is reduced by unrolling uβ if it is interior to uβloop (true if either uβreached, or if depends on uβ [or uβ]) and doesn't depend on uβ
682696 reduced_by_unrolling[1 ,id] = (uβreached | depends_on_uβ) & ! depends_on_uβ
683697 reduced_by_unrolling[2 ,id] = (uβreached | depends_on_uβ) & ! depends_on_uβ
684- # @show op iter, unrolledtiled [:,id]
698+ # @show op iter, unrolleduβloopsym [:,id]
685699 iters[id] = iter
686700 innerloop β loopdependencies (op) && set_upstream_family! (descendentsininnerloop, op, true )
687701 end
0 commit comments