Skip to content

Commit 04360ab

Browse files
committed
runtime (gc_blocks.go): make sweep branchless
Instead of looping over each block, we can use bit hacks to operate on an entire state byte. This deinterleaves the state bits in order to enable these tricks.
1 parent 20e22d4 commit 04360ab

File tree

2 files changed

+79
-71
lines changed

2 files changed

+79
-71
lines changed

builder/sizes_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) {
4242
// This is a small number of very diverse targets that we want to test.
4343
tests := []sizeTest{
4444
// microcontrollers
45-
{"hifive1b", "examples/echo", 3896, 280, 0, 2268},
46-
{"microbit", "examples/serial", 2860, 360, 8, 2272},
47-
{"wioterminal", "examples/pininterrupt", 7361, 1491, 116, 6912},
45+
{"hifive1b", "examples/echo", 3848, 296, 0, 2268},
46+
{"microbit", "examples/serial", 2844, 376, 8, 2272},
47+
{"wioterminal", "examples/pininterrupt", 7301, 1507, 116, 6912},
4848

4949
// TODO: also check wasm. Right now this is difficult, because
5050
// wasm binaries are run through wasm-opt and therefore the

src/runtime/gc_blocks.go

Lines changed: 76 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -71,19 +71,20 @@ var zeroSizedAlloc uint8
7171
type blockState uint8
7272

7373
const (
74-
blockStateFree blockState = 0 // 00
75-
blockStateHead blockState = 1 // 01
76-
blockStateTail blockState = 2 // 10
77-
blockStateMark blockState = 3 // 11
78-
blockStateMask blockState = 3 // 11
74+
blockStateLow blockState = 1
75+
blockStateHigh blockState = 1 << blocksPerStateByte
76+
77+
blockStateFree blockState = 0
78+
blockStateHead blockState = blockStateLow
79+
blockStateTail blockState = blockStateHigh
80+
blockStateMark blockState = blockStateLow | blockStateHigh
81+
blockStateMask blockState = blockStateLow | blockStateHigh
7982
)
8083

84+
const blockStateEach = 1<<blocksPerStateByte - 1
85+
8186
// The byte value of a block where every block is a 'tail' block.
82-
const blockStateByteAllTails = 0 |
83-
uint8(blockStateTail<<(stateBits*3)) |
84-
uint8(blockStateTail<<(stateBits*2)) |
85-
uint8(blockStateTail<<(stateBits*1)) |
86-
uint8(blockStateTail<<(stateBits*0))
87+
const blockStateByteAllTails = byte(blockStateTail) * blockStateEach
8788

8889
// String returns a human-readable version of the block state, for debugging.
8990
func (s blockState) String() string {
@@ -180,7 +181,7 @@ func (b gcBlock) stateByte() byte {
180181
// Return the block state given a state byte. The state byte must have been
181182
// obtained using b.stateByte(), otherwise the result is incorrect.
182183
func (b gcBlock) stateFromByte(stateByte byte) blockState {
183-
return blockState(stateByte>>((b%blocksPerStateByte)*stateBits)) & blockStateMask
184+
return blockState(stateByte>>(b%blocksPerStateByte)) & blockStateMask
184185
}
185186

186187
// State returns the current block state.
@@ -193,38 +194,12 @@ func (b gcBlock) state() blockState {
193194
// from head to mark.
194195
func (b gcBlock) setState(newState blockState) {
195196
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
196-
*stateBytePtr |= uint8(newState << ((b % blocksPerStateByte) * stateBits))
197+
*stateBytePtr |= uint8(newState << (b % blocksPerStateByte))
197198
if gcAsserts && b.state() != newState {
198199
runtimePanic("gc: setState() was not successful")
199200
}
200201
}
201202

202-
// markFree sets the block state to free, no matter what state it was in before.
203-
func (b gcBlock) markFree() {
204-
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
205-
*stateBytePtr &^= uint8(blockStateMask << ((b % blocksPerStateByte) * stateBits))
206-
if gcAsserts && b.state() != blockStateFree {
207-
runtimePanic("gc: markFree() was not successful")
208-
}
209-
if gcAsserts {
210-
*(*[wordsPerBlock]uintptr)(unsafe.Pointer(b.address())) = [wordsPerBlock]uintptr{}
211-
}
212-
}
213-
214-
// unmark changes the state of the block from mark to head. It must be marked
215-
// before calling this function.
216-
func (b gcBlock) unmark() {
217-
if gcAsserts && b.state() != blockStateMark {
218-
runtimePanic("gc: unmark() on a block that is not marked")
219-
}
220-
clearMask := blockStateMask ^ blockStateHead // the bits to clear from the state
221-
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
222-
*stateBytePtr &^= uint8(clearMask << ((b % blocksPerStateByte) * stateBits))
223-
if gcAsserts && b.state() != blockStateHead {
224-
runtimePanic("gc: unmark() was not successful")
225-
}
226-
}
227-
228203
func isOnHeap(ptr uintptr) bool {
229204
return ptr >= heapStart && ptr < uintptr(metadataStart)
230205
}
@@ -685,36 +660,69 @@ func markRoot(addr, root uintptr) {
685660
// Sweep goes through all memory and frees unmarked memory.
686661
// It returns how many bytes are free in the heap after the sweep.
687662
func sweep() (freeBytes uintptr) {
688-
freeCurrentObject := false
689-
var freed uint64
690-
for block := gcBlock(0); block < endBlock; block++ {
691-
switch block.state() {
692-
case blockStateHead:
693-
// Unmarked head. Free it, including all tail blocks following it.
694-
block.markFree()
695-
freeCurrentObject = true
696-
gcFrees++
697-
freed++
698-
case blockStateTail:
699-
if freeCurrentObject {
700-
// This is a tail object following an unmarked head.
701-
// Free it now.
702-
block.markFree()
703-
freed++
704-
}
705-
case blockStateMark:
706-
// This is a marked object. The next tail blocks must not be freed,
707-
// but the mark bit must be removed so the next GC cycle will
708-
// collect this object if it is unreferenced then.
709-
block.unmark()
710-
freeCurrentObject = false
711-
case blockStateFree:
712-
freeBytes += bytesPerBlock
713-
}
714-
}
715-
gcFreedBlocks += freed
716-
freeBytes += uintptr(freed) * bytesPerBlock
717-
return
663+
endBlock := endBlock
664+
metadataEnd := unsafe.Add(metadataStart, (endBlock+(blocksPerStateByte-1))/blocksPerStateByte)
665+
var oldFreeBlocks, freedHeads, freedTails uintptr
666+
var carry byte
667+
// Pre-subtract the blocks that do not actually exist from oldFreeBlocks.
668+
oldFreeBlocks -= (blocksPerStateByte - 1) - uintptr(endBlock+(blocksPerStateByte-1))%blocksPerStateByte
669+
for meta := metadataStart; meta != metadataEnd; meta = unsafe.Add(meta, 1) {
670+
// Fetch the state byte.
671+
stateBytePtr := (*byte)(unsafe.Pointer(meta))
672+
stateByte := *stateBytePtr
673+
674+
// Count existing free blocks in the state byte.
675+
lowState := stateByte & blockStateEach
676+
highState := stateByte >> blocksPerStateByte
677+
freeBlocks := lowState | highState
678+
oldFreeBlocks += uintptr(count4LUT[freeBlocks])
679+
680+
// Count unmarked heads in the state byte.
681+
unmarkedHeads := lowState &^ highState
682+
freedHeads += uintptr(count4LUT[unmarkedHeads])
683+
684+
// Identify and seperate live and free tails.
685+
// Adding 1 to a run of bits will clear the run.
686+
// We can use this to clear tails after a freed head.
687+
tails := highState &^ lowState
688+
tailClear := tails + (unmarkedHeads << 1) + carry
689+
carry = tailClear >> blocksPerStateByte
690+
freedTails += uintptr(count4LUT[tails&^tailClear])
691+
tails &= tailClear
692+
693+
// Construct the new state byte.
694+
markedHeads := highState & lowState
695+
*stateBytePtr = markedHeads | (tails << blocksPerStateByte)
696+
}
697+
698+
// Update the GC metrics.
699+
gcFrees += uint64(freedHeads)
700+
freedBlocks := freedHeads + freedTails
701+
gcFreedBlocks += uint64(freedBlocks)
702+
freeBlocks := oldFreeBlocks + freedBlocks
703+
704+
return freeBlocks * bytesPerBlock
705+
}
706+
707+
// count4LUT is a lookup table used to count set bits in a 4-bit mask.
708+
// TODO: replace with popcnt when available
709+
var count4LUT = [16]uint8{
710+
0b0000: 0,
711+
0b0001: 1,
712+
0b0010: 1,
713+
0b0011: 2,
714+
0b0100: 1,
715+
0b0101: 2,
716+
0b0110: 2,
717+
0b0111: 3,
718+
0b1000: 1,
719+
0b1001: 2,
720+
0b1010: 2,
721+
0b1011: 3,
722+
0b1100: 2,
723+
0b1101: 3,
724+
0b1110: 3,
725+
0b1111: 4,
718726
}
719727

720728
// dumpHeap can be used for debugging purposes. It dumps the state of each heap

0 commit comments

Comments
 (0)