@@ -245,6 +245,19 @@ function tile_cost(X, U, T, UL, TL)
245245 # X[1]*Tfactor*Ufactor + X[4] + X[2] * Tfactor + X[3] * Ufactor
246246 X[1 ] + X[4 ] + X[2 ] * Tfactor + X[3 ] * Ufactor
247247end
248+ # function itertilesize(X, UL, TL)
249+ # cb = Inf
250+ # Ub = 1; Tb = 1
251+ # for U ∈ 1:4, T ∈ 1:4
252+ # c = tile_cost(X, U, T, UL, TL)
253+ # @show U, T, c
254+ # if cb > c
255+ # cb = c
256+ # Ub = U; Tb = T
257+ # end
258+ # end
259+ # Ub, Tb, cb
260+ # end
248261function solve_tilesize (X, R, UL, TL)
249262 # @inbounds any(iszero, (R[1],R[2],R[3])) && return -1,-1,Inf #solve_smalltilesize(X, R, Umax, Tmax)
250263 first (iszero (R)) && return - 1 ,- 1 ,Inf # solve_smalltilesize(X, R, Umax, Tmax)
@@ -253,14 +266,17 @@ function solve_tilesize(X, R, UL, TL)
253266 # first solving for U via quadratic formula
254267 # X is vector of costs, and R is of register pressures
255268 RR = REGISTER_COUNT - R[3 ] - R[4 ] # RR ≡ RemainingRegisters
269+ R[1 ] + R[2 ] > 0.5 RR && return 1 ,1 , tile_cost (X, 1 , 1 , UL, TL)
256270 a = (R[1 ])^ 2 * X[2 ] - (R[2 ])^ 2 * R[1 ]* X[3 ]/ RR
257271 b = 2 * R[1 ]* R[2 ]* X[3 ]
258272 c = - RR* R[1 ]* X[3 ]
259- Ufloat = (sqrt (b^ 2 - 4 a* c) - b) / (2 a)
260- Tfloat = (RR - Ufloat* R[2 ])/ (Ufloat* R[1 ])
261- # @show Ufloat, Tfloat
273+ discriminant = b^ 2 - 4 a* c
274+ discriminant < 0 && return - 1 ,- 1 ,Inf
275+ Ufloat = (sqrt (discriminant) - b) / (2 a)
276+ Tfloat = (RR - max (1.0 ,Ufloat)* R[2 ])/ (max (1.0 ,Ufloat)* R[1 ])
262277 if ! (isfinite (Tfloat) && isfinite (Ufloat))
263278 return 4 , 4 , tile_cost (X, 4 , 4 , UL, TL)
279+ # return itertilesize(X, UL, TL)
264280 end
265281 Ulow = max (1 , floor (Int, Ufloat)) # must be at least 1
266282 Tlow = max (1 , floor (Int, Tfloat)) # must be at least 1
0 commit comments