Skip to content

Commit 18e33d6

Browse files
authored
Various improvements/fixes on long-running stability (#176)
Core Stability & Bug Fixes Fix stability issues and race conditions - Prevent NPE when landscape lacks local descriptor - Fix config change failure by forcing WAL compaction on FSM apply - Send EnsureRequest only from leader during config changes - Quit zombie replicas only when not in current config - Fix race causing pipeline retargeting to stall - Correctly handle duplicate matchinfo in inbox ingestion - Fix RedundantRangeRemovalBalancer mistakenly removing valid ranges - Fix ReplicaCntBalancer unbalanced state in edge cases Balancing & Resource Management Improve balancer stability and efficiency - Enhance RangeSplitBalancer and ReplicaCntBalancer for edge cases - Support partial load rules in BalancerController - Reduce HostMemberList and AGENT_HOST_MAP sync overhead - Exclude terminated ranges from effective routing - Optimize balancer logging output - Improve built-in balancer efficiency CRDT & Anti-Entropy CRDT and anti-entropy improvements - Correct log context and support MDCLogger with lambdas - Expose refute signal to speed up CRDT convergence - Improve housekeeping logic in CRDT-based metadata service - Correctly meter delta send rate and throughput - Improve stale member cleanup logic - Anti-entropy refinements: * Reset resendCount on ACK to avoid spurious resets * Continue syncing after ACK to drain deltas * Leverage late/unmatched ACKs when possible Performance & Reliability Optimize performance and backpressure handling - Reduce memory overhead in argument formatter - Improve backpressure when downstream stalls - Optimize bootstrap and config change workflow Miscellaneous Chores and maintenance - Remove deprecated proto fields - Correct code format in Settings file - Enable manually triggered Coverity build
1 parent d2ea0a0 commit 18e33d6

File tree

116 files changed

+3576
-1256
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+3576
-1256
lines changed

.github/workflows/build-cov.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
name: Cov-Build
22

33
on:
4+
workflow_dispatch:
45
pull_request:
56
branches:
67
- 'main'

base-cluster/pom.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@
3333
<groupId>org.apache.bifromq</groupId>
3434
<artifactId>base-env-provider</artifactId>
3535
</dependency>
36+
<dependency>
37+
<groupId>org.apache.bifromq</groupId>
38+
<artifactId>base-util</artifactId>
39+
</dependency>
3640
<dependency>
3741
<groupId>org.apache.bifromq</groupId>
3842
<artifactId>base-hlc</artifactId>

base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,31 @@
1414
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1515
* KIND, either express or implied. See the License for the
1616
* specific language governing permissions and limitations
17-
* under the License.
17+
* under the License.
1818
*/
1919

2020
package org.apache.bifromq.basecluster;
2121

22-
import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI;
2322
import static com.google.common.base.Preconditions.checkArgument;
23+
import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI;
2424

25+
import com.google.common.base.Preconditions;
26+
import com.google.common.base.Strings;
27+
import com.google.protobuf.ByteString;
28+
import io.micrometer.core.instrument.Metrics;
29+
import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics;
30+
import io.reactivex.rxjava3.core.Observable;
31+
import io.reactivex.rxjava3.core.Scheduler;
32+
import io.reactivex.rxjava3.disposables.CompositeDisposable;
33+
import io.reactivex.rxjava3.schedulers.Schedulers;
34+
import java.net.InetSocketAddress;
35+
import java.time.Duration;
36+
import java.util.Map;
37+
import java.util.Set;
38+
import java.util.concurrent.CompletableFuture;
39+
import java.util.concurrent.ScheduledThreadPoolExecutor;
40+
import java.util.concurrent.atomic.AtomicReference;
41+
import lombok.extern.slf4j.Slf4j;
2542
import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint;
2643
import org.apache.bifromq.basecluster.fd.FailureDetector;
2744
import org.apache.bifromq.basecluster.fd.IFailureDetector;
@@ -43,23 +60,6 @@
4360
import org.apache.bifromq.basecrdt.store.ICRDTStore;
4461
import org.apache.bifromq.basecrdt.store.proto.CRDTStoreMessage;
4562
import org.apache.bifromq.baseenv.EnvProvider;
46-
import com.google.common.base.Preconditions;
47-
import com.google.common.base.Strings;
48-
import com.google.protobuf.ByteString;
49-
import io.micrometer.core.instrument.Metrics;
50-
import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics;
51-
import io.reactivex.rxjava3.core.Observable;
52-
import io.reactivex.rxjava3.core.Scheduler;
53-
import io.reactivex.rxjava3.disposables.CompositeDisposable;
54-
import io.reactivex.rxjava3.schedulers.Schedulers;
55-
import java.net.InetSocketAddress;
56-
import java.time.Duration;
57-
import java.util.Map;
58-
import java.util.Set;
59-
import java.util.concurrent.CompletableFuture;
60-
import java.util.concurrent.ScheduledThreadPoolExecutor;
61-
import java.util.concurrent.atomic.AtomicReference;
62-
import lombok.extern.slf4j.Slf4j;
6363

6464
@Slf4j
6565
final class AgentHost implements IAgentHost {
@@ -173,6 +173,11 @@ public Observable<Map<HostEndpoint, Set<String>>> landscape() {
173173
return memberList.landscape();
174174
}
175175

176+
@Override
177+
public Observable<Long> refuteSignal() {
178+
return memberList.refuteSignal();
179+
}
180+
176181
@Override
177182
public void close() {
178183
if (state.compareAndSet(State.STARTED, State.STOPPING)) {

base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,23 +14,23 @@
1414
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1515
* KIND, either express or implied. See the License for the
1616
* specific language governing permissions and limitations
17-
* under the License.
17+
* under the License.
1818
*/
1919

2020
package org.apache.bifromq.basecluster;
2121

22+
import io.reactivex.rxjava3.core.Observable;
23+
import java.net.InetSocketAddress;
24+
import java.util.Map;
25+
import java.util.Set;
26+
import java.util.concurrent.CompletableFuture;
2227
import org.apache.bifromq.basecluster.memberlist.HostAddressResolver;
2328
import org.apache.bifromq.basecluster.memberlist.IHostAddressResolver;
2429
import org.apache.bifromq.basecluster.memberlist.agent.IAgent;
2530
import org.apache.bifromq.basecluster.membership.proto.HostEndpoint;
2631
import org.apache.bifromq.basecluster.transport.ITransport;
2732
import org.apache.bifromq.basecluster.transport.TCPTransport;
2833
import org.apache.bifromq.basecluster.transport.Transport;
29-
import io.reactivex.rxjava3.core.Observable;
30-
import java.net.InetSocketAddress;
31-
import java.util.Map;
32-
import java.util.Set;
33-
import java.util.concurrent.CompletableFuture;
3434

3535
/**
3636
* Agent host defines the interface for hosting agents and joining the cluster.
@@ -101,6 +101,14 @@ static IAgentHost newInstance(AgentHostOptions options) {
101101
*/
102102
Observable<Map<HostEndpoint, Set<String>>> landscape();
103103

104+
/**
105+
* Emits a signal whenever the local host actively refutes a suspicion of being dead.
106+
* Each emission carries the timestamp (in millis) when the refutation occurred.
107+
*
108+
* @return an observable stream of refutation timestamps
109+
*/
110+
Observable<Long> refuteSignal();
111+
104112
/**
105113
* Shutdown the agent host.
106114
*/

base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java

Lines changed: 73 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1515
* KIND, either express or implied. See the License for the
1616
* specific language governing permissions and limitations
17-
* under the License.
17+
* under the License.
1818
*/
1919

2020
package org.apache.bifromq.basecluster.memberlist;
@@ -25,28 +25,8 @@
2525
import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg;
2626
import static org.apache.bifromq.basecrdt.store.ReplicaIdGenerator.generate;
2727

28-
import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint;
29-
import org.apache.bifromq.basecluster.memberlist.agent.Agent;
30-
import org.apache.bifromq.basecluster.memberlist.agent.AgentAddressProvider;
31-
import org.apache.bifromq.basecluster.memberlist.agent.AgentMessenger;
32-
import org.apache.bifromq.basecluster.memberlist.agent.IAgent;
33-
import org.apache.bifromq.basecluster.membership.proto.Doubt;
34-
import org.apache.bifromq.basecluster.membership.proto.Fail;
35-
import org.apache.bifromq.basecluster.membership.proto.HostEndpoint;
36-
import org.apache.bifromq.basecluster.membership.proto.HostMember;
37-
import org.apache.bifromq.basecluster.membership.proto.Join;
38-
import org.apache.bifromq.basecluster.membership.proto.Quit;
39-
import org.apache.bifromq.basecluster.messenger.IMessenger;
40-
import org.apache.bifromq.basecluster.proto.ClusterMessage;
41-
import org.apache.bifromq.basecrdt.core.api.IORMap;
42-
import org.apache.bifromq.basecrdt.core.api.MVRegOperation;
43-
import org.apache.bifromq.basecrdt.core.api.ORMapOperation;
44-
import org.apache.bifromq.basecrdt.proto.Replica;
45-
import org.apache.bifromq.basecrdt.store.ICRDTStore;
46-
import org.apache.bifromq.basehlc.HLC;
4728
import com.google.common.base.Preconditions;
4829
import com.google.common.collect.Maps;
49-
import com.google.common.collect.Sets;
5030
import com.google.protobuf.AbstractMessageLite;
5131
import com.google.protobuf.ByteString;
5232
import io.micrometer.core.instrument.Gauge;
@@ -57,6 +37,7 @@
5737
import io.reactivex.rxjava3.core.Scheduler;
5838
import io.reactivex.rxjava3.disposables.CompositeDisposable;
5939
import io.reactivex.rxjava3.subjects.BehaviorSubject;
40+
import io.reactivex.rxjava3.subjects.PublishSubject;
6041
import java.net.InetSocketAddress;
6142
import java.util.HashSet;
6243
import java.util.Iterator;
@@ -69,7 +50,30 @@
6950
import java.util.concurrent.atomic.AtomicReference;
7051
import java.util.stream.Collectors;
7152
import lombok.extern.slf4j.Slf4j;
53+
import org.apache.bifromq.base.util.RendezvousHash;
54+
import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint;
55+
import org.apache.bifromq.basecluster.memberlist.agent.Agent;
56+
import org.apache.bifromq.basecluster.memberlist.agent.AgentAddressProvider;
57+
import org.apache.bifromq.basecluster.memberlist.agent.AgentMessenger;
58+
import org.apache.bifromq.basecluster.memberlist.agent.IAgent;
59+
import org.apache.bifromq.basecluster.membership.proto.Doubt;
60+
import org.apache.bifromq.basecluster.membership.proto.Fail;
61+
import org.apache.bifromq.basecluster.membership.proto.HostEndpoint;
62+
import org.apache.bifromq.basecluster.membership.proto.HostMember;
63+
import org.apache.bifromq.basecluster.membership.proto.Join;
64+
import org.apache.bifromq.basecluster.membership.proto.Quit;
65+
import org.apache.bifromq.basecluster.messenger.IMessenger;
66+
import org.apache.bifromq.basecluster.proto.ClusterMessage;
67+
import org.apache.bifromq.basecrdt.core.api.IORMap;
68+
import org.apache.bifromq.basecrdt.core.api.MVRegOperation;
69+
import org.apache.bifromq.basecrdt.core.api.ORMapOperation;
70+
import org.apache.bifromq.basecrdt.proto.Replica;
71+
import org.apache.bifromq.basecrdt.store.ICRDTStore;
72+
import org.apache.bifromq.basehlc.HLC;
7273

74+
/**
75+
* HostMemberList implementation using CRDT for achieving a consistent view of the host members in the cluster.
76+
*/
7377
@Slf4j
7478
public class HostMemberList implements IHostMemberList {
7579
private final AtomicReference<State> state = new AtomicReference<>(State.JOINED);
@@ -79,12 +83,25 @@ public class HostMemberList implements IHostMemberList {
7983
private final IHostAddressResolver addressResolver;
8084
private final BehaviorSubject<Map<HostEndpoint, HostMember>> membershipSubject = BehaviorSubject.createDefault(
8185
new ConcurrentHashMap<>());
86+
private final PublishSubject<Long> refuteSubject = PublishSubject.create();
8287
private final Map<String, Agent> agentMap = new ConcurrentHashMap<>();
8388
private final IORMap hostListCRDT;
8489
private final CompositeDisposable disposables = new CompositeDisposable();
8590
private final MetricManager metricManager;
8691
private final String[] tags;
8792
private volatile HostMember local;
93+
94+
/**
95+
* Constructor of HostMemberList.
96+
*
97+
* @param bindAddr the address to bind the host member
98+
* @param port the port to bind the host member
99+
* @param messenger the messenger to use for communication
100+
* @param scheduler the scheduler to use for scheduling tasks
101+
* @param store the CRDT store to use for storing internal OR-Map
102+
* @param addressResolver the address resolver to resolve host endpoints to addresses
103+
* @param tags the tags to be used for metrics
104+
*/
88105
public HostMemberList(String bindAddr,
89106
int port,
90107
IMessenger messenger,
@@ -134,10 +151,13 @@ private boolean join(HostMember member) {
134151
if (joined) {
135152
// add it into crdt
136153
log.debug("Member[{}] joins the cluster: local={}", member, local);
137-
Optional<HostMember> memberInCRDT = getHostMember(hostListCRDT, member.getEndpoint());
138-
if (memberInCRDT.isEmpty() || memberInCRDT.get().getIncarnation() < member.getIncarnation()) {
139-
hostListCRDT.execute(ORMapOperation.update(member.getEndpoint().toByteString())
140-
.with(MVRegOperation.write(member.toByteString())));
154+
if (member == local) {
155+
// only update crdt if it's local member
156+
Optional<HostMember> memberInCRDT = getHostMember(hostListCRDT, member.getEndpoint());
157+
if (memberInCRDT.isEmpty() || memberInCRDT.get().getIncarnation() < member.getIncarnation()) {
158+
hostListCRDT.execute(ORMapOperation.update(member.getEndpoint().toByteString())
159+
.with(MVRegOperation.write(member.toByteString())));
160+
}
141161
}
142162
// update crdt landscape
143163
store.join(hostListCRDT.id(), currentMembers().keySet().stream()
@@ -148,12 +168,11 @@ private boolean join(HostMember member) {
148168
}
149169
}
150170

151-
private void drop(HostEndpoint memberEndpoint, int incarnation) {
171+
private void drop(HostEndpoint memberEndpoint, int incarnation, boolean fromQuit) {
152172
synchronized (this) {
153173
boolean removed = removeMember(memberEndpoint, incarnation);
154174
Optional<HostMember> memberInCRDT = getHostMember(hostListCRDT, memberEndpoint);
155-
if (memberInCRDT.isPresent()) {
156-
// remove it from crdt if any
175+
if (!fromQuit && memberInCRDT.isPresent() && shouldReportFailure(memberInCRDT.get().getEndpoint())) {
157176
hostListCRDT.execute(ORMapOperation.remove(memberEndpoint.toByteString()).of(mvreg));
158177
}
159178
if (removed) {
@@ -165,6 +184,17 @@ private void drop(HostEndpoint memberEndpoint, int incarnation) {
165184
}
166185
}
167186

187+
private boolean shouldReportFailure(HostEndpoint failedMemberEndpoint) {
188+
// if local member is responsible for removing the failed member from CRDT
189+
RendezvousHash<HostEndpoint, HostEndpoint> hash = RendezvousHash.<HostEndpoint, HostEndpoint>builder()
190+
.keyFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer()))
191+
.nodeFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer()))
192+
.nodes(currentMembers().keySet())
193+
.build();
194+
HostEndpoint reporter = hash.get(failedMemberEndpoint);
195+
return reporter.getId().equals(local.getEndpoint().getId());
196+
}
197+
168198
@Override
169199
public boolean isZombie(HostEndpoint endpoint) {
170200
return !endpoint.getId().equals(local.getEndpoint().getId())
@@ -207,6 +237,7 @@ public CompletableFuture<Void> stop() {
207237
.thenCompose(v -> store.stopHosting(hostListCRDT.id()))
208238
.whenComplete((v, e) -> {
209239
membershipSubject.onComplete();
240+
refuteSubject.onComplete();
210241
metricManager.close();
211242
state.set(State.QUITED);
212243
});
@@ -226,6 +257,8 @@ private void renew(int atLeastIncarnation) {
226257
synchronized (this) {
227258
local = local.toBuilder().setIncarnation(Math.max(local.getIncarnation(), atLeastIncarnation) + 1).build();
228259
join(local);
260+
agentMap.values().forEach(Agent::refreshRegistration);
261+
refuteSubject.onNext(HLC.INST.get());
229262
}
230263
}
231264

@@ -247,7 +280,6 @@ public IAgent host(String agentId) {
247280
tags));
248281
local = local.toBuilder()
249282
.setIncarnation(local.getIncarnation() + 1)
250-
.addAgentId(agentId) // deprecate since 3.3.3
251283
.putAgent(agentId, agentEndpoint.getIncarnation())
252284
.build();
253285
join(local);
@@ -265,8 +297,6 @@ public CompletableFuture<Void> stopHosting(String agentId) {
265297
synchronized (this) {
266298
local = local.toBuilder()
267299
.setIncarnation(local.getIncarnation() + 1)
268-
.clearAgentId()
269-
.addAllAgentId(agentMap.keySet()) // deprecate since 3.3.3
270300
.clearAgent()
271301
.putAllAgent(Maps.transformValues(agentMap, a -> a.local().getIncarnation()))
272302
.build();
@@ -279,7 +309,12 @@ public CompletableFuture<Void> stopHosting(String agentId) {
279309

280310
@Override
281311
public Observable<Map<HostEndpoint, Set<String>>> landscape() {
282-
return membershipSubject.map(m -> Maps.transformValues(m, v -> Sets.newHashSet(v.getAgentIdList())));
312+
return membershipSubject.map(m -> Maps.transformValues(m, v -> v.getAgentMap().keySet()));
313+
}
314+
315+
@Override
316+
public Observable<Long> refuteSignal() {
317+
return refuteSubject;
283318
}
284319

285320
private Map<HostEndpoint, HostMember> currentMembers() {
@@ -327,6 +362,9 @@ private void handleMessage(ClusterMessage message) {
327362
case QUIT -> handleQuit(message.getQuit());
328363
case FAIL -> handleFail(message.getFail());
329364
case DOUBT -> handleDoubt(message.getDoubt());
365+
default -> {
366+
// never happen
367+
}
330368
}
331369
}
332370

@@ -363,15 +401,15 @@ private void handleFail(Fail fail) {
363401
} else if (isZombie(failedEndpoint)) {
364402
clearZombie(failedEndpoint);
365403
} else {
366-
drop(failedEndpoint, fail.getIncarnation());
404+
drop(failedEndpoint, fail.getIncarnation(), false);
367405
}
368406
}
369407

370408
private void handleQuit(Quit quit) {
371409
HostEndpoint quitEndpoint = quit.getEndpoint();
372410
log.debug("Member[{}] quits the cluster", quitEndpoint);
373411
if (!quitEndpoint.equals(local.getEndpoint()) && !isZombie(quitEndpoint)) {
374-
drop(quitEndpoint, quit.getIncarnation());
412+
drop(quitEndpoint, quit.getIncarnation(), true);
375413
}
376414
}
377415

@@ -388,7 +426,7 @@ private void handleDoubt(Doubt doubt) {
388426

389427
private void clearZombie(HostEndpoint zombieEndpoint) {
390428
// drop zombie if any, and broadcast a quit on behalf of it
391-
drop(zombieEndpoint, Integer.MAX_VALUE);
429+
drop(zombieEndpoint, Integer.MAX_VALUE, false);
392430
messenger.spread(ClusterMessage.newBuilder()
393431
.setQuit(Quit.newBuilder().setEndpoint(zombieEndpoint).setIncarnation(Integer.MAX_VALUE).build())
394432
.build());

0 commit comments

Comments
 (0)