@@ -277,41 +277,46 @@ function lower_no_unroll(ls::LoopSet, us::UnrollSpecification, n::Int, inclmask:
277277 tc = terminatecondition (ls, us, n, inclmask, 1 )
278278 body = lower_block (ls, us, n, inclmask, 1 )
279279 # align_loop = isone(n) & (ls.align_loops[] > 0)
280- isstatic = isstaticloop (loop) # & (!align_loop )
281- if ! isstatic && (usorig. u₁ == us. u₁) && (usorig. u₂ == us. u₂) && ! inclmask
280+ loopisstatic = isstaticloop (loop)
281+ if ! loopisstatic && (usorig. u₁ == us. u₁) && (usorig. u₂ == us. u₂) && ! inclmask
282282 tc = expect (tc)
283283 end
284+ W = nisvectorized ? ls. vector_width[] : 1
285+ loopisstatic &= (! iszero (W))
284286 # q = if align_loop
285287 # Expr(:block, align_inner_loop_expr(ls, us, loop), Expr(:while, tc, body))
286288 # elseif nisvectorized
287- q = if nisvectorized
289+ if loopisstatic && length (loop) ≤ 8 W
290+ q = Expr (:block )
291+ foreach (_ -> push! (q. args, body), 1 : (length (loop) ÷ W))
292+ elseif nisvectorized
288293 # Expr(:block, loopiteratesatleastonce(loop, true), Expr(:while, expect(tc), body))
289- Expr (:block , Expr (:while , tc, body))
290- elseif isstatic && length (loop) ≤ 8
291- bodyq = Expr (:block )
292- foreach (_ -> push! (bodyq. args, body), 1 : length (loop))
293- bodyq
294+ q = Expr (:block , Expr (:while , tc, body))
294295 else
295296 termcond = gensym (:maybeterm )
296297 push! (body. args, Expr (:(= ), termcond, tc))
297- Expr (:block , Expr (:(= ), termcond, true ), Expr (:while , termcond, body))
298+ q = Expr (:block , Expr (:(= ), termcond, true ), Expr (:while , termcond, body))
298299 # Expr(:block, Expr(:while, expect(tc), body))
299300 # Expr(:block, assume(tc), Expr(:while, tc, body))
300301 # push!(body.args, Expr(:&&, expect(Expr(:call, :!, tc)), Expr(:break)))
301302 # Expr(:block, assume(tc), Expr(:while, true, body))
302303 # push!(body.args, Expr(:||, expect(tc), Expr(:break)))
303304 # Expr(:block, Expr(:while, true, body))
304305 end
305- if nisvectorized
306+ if nisvectorized && ! (loopisstatic && iszero ( length (loop) & (W - 1 )))
306307 # tc = terminatecondition(loop, us, n, loopsym, true, 1)
307- tc = terminatecondition (ls, us, n, true , 1 )
308308 body = lower_block (ls, us, n, true , 1 )
309309 if isone (num_loops (ls))
310310 pushfirst! (body. args, definemask (loop))
311311 # elseif align_loop
312312 # pushfirst!(body.args, definemask_for_alignment_cleanup(loop))
313313 end
314- push! (q. args, Expr (:if , tc, body))
314+ if loopisstatic
315+ push! (q. args, body)
316+ else
317+ tc = terminatecondition (ls, us, n, true , 1 )
318+ push! (q. args, Expr (:if , tc, body))
319+ end
315320 end
316321 Expr (:block , Expr (:let , sl, q))
317322end
@@ -353,6 +358,7 @@ function lower_unrolled_dynamic(ls::LoopSet, us::UnrollSpecification, n::Int, in
353358 remblock = init_remblock (loop, ls. lssm[], n)# loopsym)
354359 q = Expr (:while , tc, body)
355360 end
361+ # @show loopsym, loopisstatic, UFW
356362 q = if unsigned (Ureduct) < unsigned (UF) # unsigned(-1) == typemax(UInt); is logic relying on twos-complement bad?
357363 UF_cleanup = UF - Ureduct
358364 us_cleanup = nisunrolled ? UnrollSpecification (us, UF_cleanup, u₂) : UnrollSpecification (us, u₁, UF_cleanup)
0 commit comments