Skip to content

Commit b058b63

Browse files
committed
Minimize the blast radius of lock-counter bugs
Recycling objects with a sync.Pool risks amplifying the impact of bugs. If the object is not reset to be functionally indistinguishable from a newly-constructed one before being returned to the pool, the unlucky recipient of the recycled object could misbehave through no fault of its own. And what's worse, such bugs would likely manifest as Heisenbugs as they would only happen when objects returned to the pool are being reused, i.e. a program with lots of concurrency and lots of lock churn. Make the pool optimization functionally transparent so the impact of counter bugs does not extend beyond the scope it would have had without the pool optimization. Defensively reset the lock counters before returning them to the pool and after taking them from the pool. And add testing-only code which asserts that the invariants are upheld to ensure that the defensive coding is not papering over real bugs. Signed-off-by: Cory Snider <[email protected]>
1 parent 4817df9 commit b058b63

File tree

4 files changed

+87
-8
lines changed

4 files changed

+87
-8
lines changed

istesting.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
//go:build go1.21
2+
3+
package locker
4+
5+
import "testing"
6+
7+
func isTesting() bool {
8+
return testing.Testing()
9+
}

istesting_go120.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
//go:build !go1.21
2+
3+
package locker
4+
5+
func isTesting() bool {
6+
return false
7+
}

mutexmap.go

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ free up more global locks to handle other tasks.
55
package locker
66

77
import (
8+
"strconv"
89
"sync"
910
"sync/atomic"
1011
)
@@ -37,13 +38,29 @@ func (l *MutexMap[T]) Lock(key T) {
3738
if !exists {
3839
nameLock = lockCtrPool.Get().(*lockCtr)
3940
l.locks[key] = nameLock
41+
42+
if isTesting() {
43+
if !nameLock.TryLock() {
44+
panic("MutexMap: lock taken from pool is already locked")
45+
}
46+
nameLock.Unlock()
47+
if w := nameLock.waiters.Load(); w != 0 {
48+
panic("MutexMap: lock taken from pool has nonzero waiters counter" +
49+
" (waiters=" + strconv.Itoa(int(w)) + ")")
50+
}
51+
}
52+
// Defensive coding: force the waiters counter to a known value
53+
// just in case the lockCtr we got from the pool was recycled
54+
// and not properly reset.
55+
nameLock.waiters.Store(1)
56+
} else {
57+
// Increment the nameLock waiters while inside the main mutex.
58+
// This makes sure that the lock isn't deleted if `Lock` and
59+
// `Unlock` are called concurrently.
60+
nameLock.waiters.Add(1)
4061
}
4162

42-
// Increment the nameLock waiters while inside the main mutex.
43-
// This makes sure that the lock isn't deleted if `Lock` and `Unlock` are called concurrently.
44-
nameLock.waiters.Add(1)
4563
l.mu.Unlock()
46-
4764
// Lock the nameLock outside the main mutex so we don't block other operations.
4865
// Once locked then we can decrement the number of waiters for this lock.
4966
nameLock.Lock()
@@ -62,8 +79,23 @@ func (l *MutexMap[T]) Unlock(key T) {
6279
(&sync.Mutex{}).Unlock()
6380
}
6481

65-
if nameLock.waiters.Load() <= 0 {
82+
if w := nameLock.waiters.Load(); w <= 0 {
6683
delete(l.locks, key)
84+
85+
// Putting a lock back into the pool when the waiters counter is
86+
// nonzero would result in some really nasty, subtle
87+
// misbehaviour -- which would only manifest when pool items get
88+
// reused, i.e. in situations with lots of lock churn. The
89+
// counters should be 0 when the lock is deleted from the map
90+
// and returned to the pool.
91+
if isTesting() && w < 0 {
92+
panic("MutexMap: lock unlocked with negative counter" +
93+
" (waiters=" + strconv.Itoa(int(w)) + ")")
94+
}
95+
// But given the consequences of that invariant being violated,
96+
// zero out the counter just in case to minimize the impact of
97+
// such bugs.
98+
nameLock.waiters.Store(0)
6799
defer lockCtrPool.Put(nameLock)
68100
}
69101
nameLock.Unlock()

rwmutexmap.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package locker
22

33
import (
4+
"strconv"
45
"sync"
56
"sync/atomic"
67
)
@@ -21,6 +22,36 @@ type rwlockCtr struct {
2122
readers atomic.Int32 // Number of readers currently holding the lock
2223
}
2324

25+
// assertReset asserts that l is zeroed out. Under testing it panics if the
26+
// assertions are false. Otherwise it zeroes out the struct to minimize the
27+
// impact of whichever bugs caused the invariant to be violated.
28+
//
29+
// This function returns l.
30+
func (l *rwlockCtr) assertReset() *rwlockCtr {
31+
// Putting a lock back into the pool when the counters are nonzero would
32+
// result in some really nasty, subtle misbehaviour -- which would only
33+
// manifest when pool items get reused, i.e. in situations with lots of
34+
// lock churn. The counters should be 0 when the lock is deleted from
35+
// the map and returned to the pool.
36+
if isTesting() {
37+
if !l.TryLock() {
38+
panic("RWMutexMap: lock is unexpectedly locked")
39+
}
40+
l.Unlock()
41+
r, w := l.readers.Load(), l.waiters.Load()
42+
if r != 0 || w != 0 {
43+
panic("RWMutexMap: lock has nonzero counters " +
44+
"(readers=" + strconv.Itoa(int(r)) +
45+
", waiters=" + strconv.Itoa(int(w)) + ")")
46+
}
47+
}
48+
// But given the consequences of that invariant being violated, zero out
49+
// the counters just in case to minimize the impact of such bugs.
50+
l.waiters.Store(0)
51+
l.readers.Store(0)
52+
return l
53+
}
54+
2455
var rwlockCtrPool = sync.Pool{New: func() any { return new(rwlockCtr) }}
2556

2657
func (l *RWMutexMap[T]) get(key T) *rwlockCtr {
@@ -31,7 +62,7 @@ func (l *RWMutexMap[T]) get(key T) *rwlockCtr {
3162
nameLock, exists := l.locks[key]
3263
if !exists {
3364
nameLock = rwlockCtrPool.Get().(*rwlockCtr)
34-
l.locks[key] = nameLock
65+
l.locks[key] = nameLock.assertReset()
3566
}
3667
return nameLock
3768
}
@@ -80,7 +111,7 @@ func (l *RWMutexMap[T]) Unlock(key T) {
80111

81112
if nameLock.waiters.Load() <= 0 && nameLock.readers.Load() <= 0 {
82113
delete(l.locks, key)
83-
defer rwlockCtrPool.Put(nameLock)
114+
defer rwlockCtrPool.Put(nameLock.assertReset())
84115
}
85116
nameLock.Unlock()
86117
}
@@ -96,7 +127,7 @@ func (l *RWMutexMap[T]) RUnlock(key T) {
96127

97128
if nameLock.waiters.Load() <= 0 && nameLock.readers.Load() <= 0 {
98129
delete(l.locks, key)
99-
defer rwlockCtrPool.Put(nameLock)
130+
defer rwlockCtrPool.Put(nameLock.assertReset())
100131
}
101132
nameLock.RUnlock()
102133
}

0 commit comments

Comments
 (0)