Skip to content

Commit 36588c5

Browse files
committed
luajit: experimentally add sink optimization
1 parent f5648c9 commit 36588c5

File tree

5 files changed

+201
-146
lines changed

5 files changed

+201
-146
lines changed

luajit/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Included changes:
77
\- [+] Added block debug functionality for function to prevent someone from getting functions they shouldn't have.<br>
88
\- [+] Added read only table functionality to prevent someone from modifying a table while its being used by another thread<br>
99
\- [+] Exposed `luaopen_jit_profile` & `lua_index2adr` for HolyLib.<br>
10-
\- [+] Implemented fix for FFI Sandwich/LUA VM re-entry through JIT trace (See https://github.com/LuaJIT/LuaJIT/pull/1165)<br>
10+
\- [+] Implemented fix for FFI Sandwich/LUA VM re-entry through JIT trace (See https://github.com/LuaJIT/LuaJIT/pull/1165)<br>
11+
\- [+] Experimentally implemented Sink optimization (See https://github.com/LuaJIT/LuaJIT/pull/652)<br>
1112
\- [#] Made `cdata` return the type as `LUA_TUSERDATA` so that we can more easily allow FFI -> C calls without needing to hook 10 functions (& the TypeID also conflicted with gmod)<br>
1213
\- [#] Improved `GMODLUA_GetUserType` to directly do it's stuff without using the Lua stack<br>

luajit/src/jit/dump.lua

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -396,11 +396,15 @@ local function dump_snap(tr)
396396
end
397397

398398
-- Return a register name or stack slot for a rid/sp location.
399-
local function ridsp_name(ridsp, ins)
399+
local function ridsp_name(ridsp, ins, op)
400400
if not disass then disass = require("jit.dis_"..jit.arch) end
401401
local rid, slot = band(ridsp, 0xff), shr(ridsp, 8)
402402
if rid == 253 or rid == 254 then
403-
return (slot == 0 or slot == 255) and " {sink" or format(" {%04d", ins-slot)
403+
if op == "TNEW " or op == "TDUP " or op == "CNEW " then
404+
return (slot == 0) and " {sink" or format(" {ri%02d", slot)
405+
else
406+
return (slot == 0 or slot == 255) and " {sink" or format(" {%04d", ins-slot)
407+
end
404408
end
405409
if ridsp > 255 then return format("[%x]", slot*4) end
406410
if rid < 128 then return disass.regname(rid) end
@@ -485,7 +489,7 @@ local function dump_ir(tr, dumpsnap, dumpreg)
485489
(dumpreg or op ~= "RENAME") then
486490
local rid = band(ridsp, 255)
487491
if dumpreg then
488-
out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins)))
492+
out:write(format("%04d %-6s", ins, ridsp_name(ridsp, ins, op)))
489493
else
490494
out:write(format("%04d ", ins))
491495
end

luajit/src/lj_asm.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ static void ra_dprintf(ASMState *as, const char *fmt, ...)
348348
#define ra_weak(as, r) rset_set(as->weakset, (r))
349349
#define ra_noweak(as, r) rset_clear(as->weakset, (r))
350350

351-
#define ra_used(ir) (ra_hasreg((ir)->r) || ra_hasspill((ir)->s))
351+
#define ra_used(ir) (ra_hasreg((ir)->r) || ((ir)->r != RID_SUNK && (ir)->r != RID_SINK && ra_hasspill((ir)->s))
352352

353353
/* Setup register allocator. */
354354
static void ra_setup(ASMState *as)
@@ -941,7 +941,8 @@ static int asm_sunk_store(ASMState *as, IRIns *ira, IRIns *irs)
941941
}
942942
return 0;
943943
} else {
944-
return (ira + irs->s == irs); /* Quick check. */
944+
if (ira + irs->s != irs) return 0;
945+
return irs->o == IR_ASTORE || irs->o == IR_HSTORE || irs->o == IR_FSTORE || irs->o == IR_XSTORE;
945946
}
946947
}
947948

luajit/src/lj_opt_sink.c

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "lj_jit.h"
1515
#include "lj_iropt.h"
1616
#include "lj_target.h"
17+
#include "lj_dispatch.h"
1718

1819
/* Some local macros to save typing. Undef'd at the end. */
1920
#define IR(ref) (&J->cur.ir[(ref)])
@@ -54,6 +55,9 @@ static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
5455
IRIns *ir = IR(ref);
5556
if (irt_isphi(ir->t) || (ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT &&
5657
irt_isphi(IR(ir->op1)->t))) {
58+
if ((ira->prev & 0x1FFF) == 0x1FFF)
59+
return 0; /* This would cause an overflow, just force the allocation to not be sunken. */
60+
5761
ira->prev++;
5862
return 1; /* Sinkable PHI. */
5963
}
@@ -69,6 +73,14 @@ static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
6973
return 1; /* Constant (non-PHI). */
7074
}
7175

76+
/* Set prev of all instructions to 0. */
77+
static void sink_prepare(jit_State *J) {
78+
IRIns *ir, *irlast = IR(J->cur.nins-1);
79+
for (ir = irlast ; ir->o != IR_BASE; ir--) {
80+
ir->prev = 0;
81+
}
82+
}
83+
7284
/* Mark non-sinkable allocations using single-pass backward propagation.
7385
**
7486
** Roots for the marking process are:
@@ -79,7 +91,7 @@ static int sink_checkphi(jit_State *J, IRIns *ira, IRRef ref)
7991
** - Stores with non-constant keys.
8092
** - All stored values.
8193
*/
82-
static void sink_mark_ins(jit_State *J)
94+
static int sink_mark_ins(jit_State *J, int lightsink)
8395
{
8496
IRIns *ir, *irlast = IR(J->cur.nins-1);
8597
for (ir = irlast ; ; ir--) {
@@ -152,26 +164,34 @@ static void sink_mark_snap(jit_State *J, SnapShot *snap)
152164
}
153165

154166
/* Iteratively remark PHI refs with differing marks or PHI value counts. */
155-
static void sink_remark_phi(jit_State *J)
167+
static int sink_remark_phi(jit_State *J)
156168
{
157169
IRIns *ir;
158170
int remark;
171+
int require_remark = 0;
159172
do {
160173
remark = 0;
161174
for (ir = IR(J->cur.nins-1); ir->o == IR_PHI; ir--) {
162175
IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
163-
if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && irl->prev == irr->prev)
176+
if (!((irl->t.irt ^ irr->t.irt) & IRT_MARK) && (irl->prev & 0x1FFF) == (irr->prev & 0x1FFF))
164177
continue;
165178
remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK);
179+
if ((IR(ir->op1)->prev & 0x2000) || (IR(ir->op2)->prev & 0x2000)) {
180+
IR(ir->op1)->prev &= ~0x2000;
181+
IR(ir->op2)->prev &= ~0x2000;
182+
require_remark |= (~(irl->t.irt & irr->t.irt) & IRT_MARK);
183+
}
166184
irt_setmark(IR(ir->op1)->t);
167185
irt_setmark(IR(ir->op2)->t);
168186
}
169187
} while (remark);
188+
return require_remark;
170189
}
171190

172191
/* Sweep instructions and tag sunken allocations and stores. */
173192
static void sink_sweep_ins(jit_State *J)
174193
{
194+
int index = 0;
175195
IRIns *ir, *irbase = IR(REF_BASE);
176196
for (ir = IR(J->cur.nins-1) ; ir >= irbase; ir--) {
177197
switch (ir->o) {
@@ -198,8 +218,14 @@ static void sink_sweep_ins(jit_State *J)
198218
#endif
199219
case IR_TNEW: case IR_TDUP:
200220
if (!irt_ismarked(ir->t)) {
221+
if (ir->prev & 0x8000) {
222+
index++; /* A sunken store requires this for unsinking. */
223+
lj_assertJ(index <= 0xFF, "Too many heavy sinks");
224+
ir->prev = REGSP(RID_SINK, index);
225+
} else {
226+
ir->prev = REGSP(RID_SINK, 0);
227+
}
201228
ir->t.irt &= ~IRT_GUARD;
202-
ir->prev = REGSP(RID_SINK, 0);
203229
J->cur.sinktags = 1; /* Signal present SINK tags to assembler. */
204230
} else {
205231
irt_clearmark(ir->t);
@@ -244,15 +270,19 @@ void lj_opt_sink(jit_State *J)
244270
if ((J->flags & need) == need &&
245271
(J->chain[IR_TNEW] || J->chain[IR_TDUP] ||
246272
(LJ_HASFFI && (J->chain[IR_CNEW] || J->chain[IR_CNEWI])))) {
273+
sink_prepare(J);
247274
if (!J->loopref)
248275
sink_mark_snap(J, &J->cur.snap[J->cur.nsnap-1]);
249-
sink_mark_ins(J);
250-
if (J->loopref)
251-
sink_remark_phi(J);
276+
int heavysinks;
277+
int dolightsink = 0;
278+
do {
279+
heavysinks = sink_mark_ins(J, dolightsink);
280+
dolightsink |= heavysinks >= 0xFF;
281+
} while ((J->loopref && sink_remark_phi(J)) || heavysinks >= 0xFF);
252282
sink_sweep_ins(J);
253283
}
254284
}
255285

256286
#undef IR
257287

258-
#endif
288+
#endif

0 commit comments

Comments
 (0)