From 7f639aa2cd7ca1814376731a686a060dfb0f1f97 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Sun, 17 Aug 2025 12:56:26 +0800 Subject: [PATCH 01/20] add shortcut to prevent NPE when landscape doesn't contain local descriptor --- .../UnreachableReplicaRemovalBalancer.java | 23 ++++---- ...UnreachableReplicaRemovalBalancerTest.java | 53 ++++++++++++++----- 2 files changed, 53 insertions(+), 23 deletions(-) diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java index b3ea01c5b..abf7b204f 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java @@ -14,13 +14,21 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.balance.impl; import static org.apache.bifromq.basekv.proto.State.StateType.Normal; +import com.google.common.collect.Sets; +import java.time.Duration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Supplier; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; @@ -34,14 +42,6 @@ import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import com.google.common.collect.Sets; -import java.time.Duration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Supplier; /** * The UnreachableReplicaRemovalBalancer is a specialized balancer responsible for managing and removing unreachable @@ -99,7 +99,10 @@ public UnreachableReplicaRemovalBalancer(String clusterId, String localStoreId, public void update(Set landscape) { Map> descriptorMap = build(landscape); latestDescriptorMap = descriptorMap; - + if (!descriptorMap.containsKey(localStoreId)) { + replicaSuspicionTimeMap.clear(); + return; // No need to process if local store is not present in the landscape + } // Track the current leaders Set currentLeaders = new HashSet<>(); diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java index 752ebbcff..dc2ccb358 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java @@ -25,6 +25,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertSame; +import java.time.Duration; +import java.util.Arrays; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.function.Supplier; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.BalanceResultType; @@ -36,22 +42,16 @@ import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; -import java.time.Duration; -import java.util.Arrays; -import java.util.Collections; -import java.util.Map; -import java.util.Set; -import java.util.function.Supplier; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class UnreachableReplicaRemovalBalancerTest { - private UnreachableReplicaRemovalBalancer balancer; - private Supplier mockTimeSource; private final String localStoreId = "localStore"; private final String peerStoreId = "peerStore"; private final KVRangeId rangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + private UnreachableReplicaRemovalBalancer balancer; + private Supplier mockTimeSource; @BeforeMethod public void setUp() { @@ -61,6 +61,36 @@ public void setUp() { new UnreachableReplicaRemovalBalancer("clusterId", localStoreId, Duration.ofSeconds(15), mockTimeSource); } + @Test + public void noChangeWhenLocalStoreMissingInitially() { + KVRangeStoreDescriptor peerStoreDescriptor = createStoreDescriptor(peerStoreId); + balancer.update(Set.of(peerStoreDescriptor)); + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void noNPEWhenLocalStoreDisappearsAfterBeingLeader() { + KVRangeStoreDescriptor localStoreDescriptor = createStoreDescriptor( + localStoreId, + createRangeDescriptor( + rangeId, + RaftNodeStatus.Leader, + Map.of(localStoreId, RaftNodeSyncState.Replicating, peerStoreId, RaftNodeSyncState.Probing), + Set.of(localStoreId, peerStoreId), + Set.of() + ) + ); + KVRangeStoreDescriptor peerStoreDescriptor = createStoreDescriptor(peerStoreId); + + when(mockTimeSource.get()).thenReturn(System.currentTimeMillis()); + balancer.update(Set.of(localStoreDescriptor, peerStoreDescriptor)); + + when(mockTimeSource.get()).thenReturn(System.currentTimeMillis() + 16000); + balancer.update(Set.of(peerStoreDescriptor)); + + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + @Test public void noChangeWhenAllReplicasAreReachable() { KVRangeStoreDescriptor storeDescriptor = createStoreDescriptor( @@ -73,7 +103,6 @@ public void noChangeWhenAllReplicasAreReachable() { balancer.update(Set.of(storeDescriptor)); - assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); } @@ -101,7 +130,7 @@ public void removesUnreachableVoterReplicaAfterTimeout() { // Verify that the unhealthy replica is scheduled for removal assertEquals(localStoreId, command.getToStore()); assertEquals(rangeId, command.getKvRangeId()); - assertEquals(5, command.getExpectedVer()); + assertEquals(command.getExpectedVer(), 5); assertFalse(command.getVoters().contains(peerStoreId)); } @@ -129,11 +158,10 @@ public void removesUnreachableLearnerReplicaAfterTimeout() { // Verify that the unhealthy replica is scheduled for removal assertEquals(localStoreId, command.getToStore()); assertEquals(rangeId, command.getKvRangeId()); - assertEquals(5, command.getExpectedVer()); + assertEquals(command.getExpectedVer(), 5); assertFalse(command.getLearners().contains(peerStoreId)); } - @Test public void noCommandIfReplicaReachableAgain() { KVRangeStoreDescriptor localStoreDescriptor = createStoreDescriptor( @@ -177,7 +205,6 @@ public void removesReplicaIfLeaderChanged() { Set.of(localStoreId), Set.of(peerStoreId)) ); - balancer.update(Set.of(storeDescriptor, peerStoreDescriptor)); // Simulate a leader change From e1e628b2f58a9f3e60aa37cdbd2774fb8b148d09 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Sun, 17 Aug 2025 14:04:52 +0800 Subject: [PATCH 02/20] 1. fixed an identified config change failure process by: 1)make wal compaction whenever fsm applied config entry, 2. send EnsureRequest only from leader during applying RequestConfigChange 2. quit zombie only when local replica is not in current config --- .../basekv/store/range/KVRangeFSM.java | 235 ++++++++++-------- 1 file changed, 127 insertions(+), 108 deletions(-) diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java index af807d3f1..f58dc2b99 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java @@ -336,7 +336,7 @@ public void tick() { statsCollector.tick(); dumpSessions.values().forEach(KVRangeDumpSession::tick); shrinkWAL(); - checkZombieState(); + checkAndRepairFromZombieState(); estimateSplitHint(); } @@ -372,12 +372,10 @@ private CompletableFuture doClose() { .thenCompose(v -> statsCollector.stop()) .thenCompose(v -> mgmtTaskRunner.awaitDone()) .thenCompose(v -> wal.close()) - .thenCompose(v -> { + .thenCompose(v -> awaitShutdown(fsmExecutor)) + .whenComplete((v, e) -> { kvRange.close(); metricManager.close(); - return awaitShutdown(fsmExecutor); - }) - .whenComplete((v, e) -> { cmdFutures.values() .forEach(f -> f.completeExceptionally(new KVRangeException.TryLater("Range closed"))); queryRunner.close(); @@ -646,29 +644,27 @@ private CompletableFuture apply(LogEntry entry) { switch (entry.getTypeCase()) { case CONFIG -> { IKVRangeWriter rangeWriter = kvRange.toWriter(); - applyConfigChange(entry.getTerm(), entry.getIndex(), entry.getConfig(), rangeWriter) - .whenComplete((callback, e) -> { - if (onDone.isCancelled()) { - rangeWriter.abort(); - } else { - try { - if (e != null) { - rangeWriter.abort(); - onDone.completeExceptionally(e); - } else { - rangeWriter.lastAppliedIndex(entry.getIndex()); - rangeWriter.done(); - callback.run(); - linearizer.afterLogApplied(entry.getIndex()); - metricManager.reportLastAppliedIndex(entry.getIndex()); - onDone.complete(null); - } - } catch (Throwable t) { - log.error("Failed to apply log", t); - onDone.completeExceptionally(t); + try { + Supplier> afterLogApplied = applyConfigChange(entry.getTerm(), + entry.getIndex(), entry.getConfig(), rangeWriter); + rangeWriter.lastAppliedIndex(entry.getIndex()); + rangeWriter.done(); + afterLogApplied.get() + .whenComplete((v, e) -> { + if (e != null) { + log.error("Failed to apply config change", e); + onDone.completeExceptionally(e); + } else { + linearizer.afterLogApplied(entry.getIndex()); + metricManager.reportLastAppliedIndex(entry.getIndex()); + onDone.complete(null); } - } - }); + }); + } catch (Throwable t) { + rangeWriter.abort(); + log.error("Failed to apply command", t); + onDone.completeExceptionally(t); + } } case DATA -> { try { @@ -723,18 +719,17 @@ private CompletableFuture apply(LogEntry entry) { return onDone; } - private CompletableFuture applyConfigChange(long term, long index, - ClusterConfig config, - IKVRangeWritable rangeWriter) { - CompletableFuture onDone = new CompletableFuture<>(); + private Supplier> applyConfigChange(long term, + long index, + ClusterConfig config, + IKVRangeWritable rangeWriter) { State state = rangeWriter.state(); log.info("Apply new config[term={}, index={}]: state={}, leader={}\n{}", term, index, state, wal.isLeader(), config); rangeWriter.clusterConfig(config); if (config.getNextVotersCount() != 0 || config.getNextLearnersCount() != 0) { // skip joint-config - onDone.complete(NOOP); - return onDone; + return () -> CompletableFuture.completedFuture(null); } Set members = newHashSet(); members.addAll(config.getVotersList()); @@ -753,16 +748,17 @@ private CompletableFuture applyConfigChange(long term, long index, .setType(Removed) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> { quitSignal.complete(null); finishCommand(taskId); - }); + return CompletableFuture.completedFuture(null); + }; } else { rangeWriter.state(State.newBuilder() .setType(Normal) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> compactWAL().thenRun(() -> { finishCommand(taskId); }); } @@ -774,16 +770,17 @@ private CompletableFuture applyConfigChange(long term, long index, .setType(Removed) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> { quitSignal.complete(null); finishCommand(taskId); - }); + return CompletableFuture.completedFuture(null); + }; } else { rangeWriter.state(State.newBuilder() .setType(Normal) .setTaskId(taskId) .build()); - onDone.complete(() -> finishCommand(taskId)); + return () -> compactWAL().thenRun(() -> finishCommand(taskId)); } } } @@ -802,12 +799,13 @@ private CompletableFuture applyConfigChange(long term, long index, .build()); } rangeWriter.bumpVer(false); - onDone.complete(() -> { + return () -> { finishCommand(taskId); if (remove) { quitSignal.complete(null); } - }); + return CompletableFuture.completedFuture(null); + }; } case ToBePurged -> { String taskId = state.getTaskId(); @@ -817,25 +815,28 @@ private CompletableFuture applyConfigChange(long term, long index, .setType(Removed) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> { finishCommand(taskId); quitSignal.complete(null); - }); + return CompletableFuture.completedFuture(null); + }; } else { rangeWriter.state(State.newBuilder() .setType(Normal) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> compactWAL().thenRun(() -> { + // purge failed due to leader change, reset back to normal + log.debug("Purge failed due to leader change[newConfig={}]", config); finishCommand(taskId); }); } } - default -> + default -> { // skip internal config change triggered by leadership change - onDone.complete(NOOP); + return this::compactWAL; + } } - return onDone; } private CompletableFuture applyCommand(long ver, @@ -905,67 +906,84 @@ logTerm, logIndex, taskId, print(ver), state, newConfig.getVotersList(), ? newHashSet(clusterConfig.getVotersList()) : newHashSet(newConfig.getVotersList()); Set nextLearners = toBePurged ? emptySet() : newHashSet(newConfig.getLearnersList()); - List> onceFutures = newHostingStoreIds.stream() - .map(storeId -> messenger - .once(m -> { - if (m.hasEnsureRangeReply()) { - EnsureRangeReply reply = m.getEnsureRangeReply(); - return reply.getResult() == EnsureRangeReply.Result.OK; - } - return false; - }) - .orTimeout(5, TimeUnit.SECONDS) - ) - .collect(Collectors.toList()); - CompletableFuture.allOf(onceFutures.toArray(CompletableFuture[]::new)) - .whenCompleteAsync((v1, t) -> { - if (t != null) { - String errorMessage = String.format("ConfigChange aborted[taskId=%s] due to %s", - taskId, t.getMessage()); - log.warn(errorMessage); - finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage)); - wal.stepDown(); - return; - } - wal.changeClusterConfig(taskId, nextVoters, nextLearners) - .whenCompleteAsync((v2, e2) -> { - if (e2 != null) { - String errorMessage = - String.format("ConfigChange aborted[taskId=%s] due to %s", - taskId, e2.getMessage()); - log.debug(errorMessage); - finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage)); - wal.stepDown(); + if (wal.isLeader()) { + List> onceFutures = newHostingStoreIds.stream() + .map(storeId -> messenger + .once(m -> { + if (m.hasEnsureRangeReply()) { + EnsureRangeReply reply = m.getEnsureRangeReply(); + return reply.getResult() == EnsureRangeReply.Result.OK; } - // postpone finishing command when config entry is applied - }, fsmExecutor); - }, fsmExecutor); - newHostingStoreIds.forEach(storeId -> { - log.debug("Send EnsureRequest: taskId={}, targetStoreId={}", taskId, storeId); - ClusterConfig ensuredClusterConfig = ClusterConfig.getDefaultInstance(); - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(id) - .setHostStoreId(storeId) - .setEnsureRange(EnsureRange.newBuilder() - .setVer(ver) // ensure the new kvrange is compatible in target store - .setBoundary(boundary) - .setInitSnapshot(Snapshot.newBuilder() - .setTerm(0) - .setIndex(0) - .setClusterConfig(ensuredClusterConfig) // empty voter set - .setData(KVRangeSnapshot.newBuilder() - .setVer(ver) - .setId(id) - // no checkpoint specified - .setLastAppliedIndex(0) - .setBoundary(boundary) - .setState(state) - .setClusterConfig(ensuredClusterConfig) - .build().toByteString()) + return false; + }) + .orTimeout(5, TimeUnit.SECONDS) + ) + .collect(Collectors.toList()); + CompletableFuture.allOf(onceFutures.toArray(CompletableFuture[]::new)) + .whenCompleteAsync((v1, t) -> { + if (t != null) { + String errorMessage = String.format("ConfigChange aborted[taskId=%s] due to %s", + taskId, t.getMessage()); + log.warn(errorMessage); + finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage)); + wal.stepDown(); + return; + } + wal.changeClusterConfig(taskId, nextVoters, nextLearners) + .whenCompleteAsync((v2, e2) -> { + if (e2 != null) { + String errorMessage = + String.format("ConfigChange aborted[taskId=%s] due to %s", + taskId, e2.getMessage()); + log.debug(errorMessage); + finishCommandWithError(taskId, + new KVRangeException.TryLater(errorMessage)); + wal.stepDown(); + } + // postpone finishing command when config entry is applied + }, fsmExecutor); + }, fsmExecutor); + newHostingStoreIds.forEach(storeId -> { + log.debug("Send EnsureRequest: taskId={}, targetStoreId={}", taskId, storeId); + ClusterConfig ensuredClusterConfig = ClusterConfig.getDefaultInstance(); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(id) + .setHostStoreId(storeId) + .setEnsureRange(EnsureRange.newBuilder() + .setVer(ver) // ensure the new kvrange is compatible in target store + .setBoundary(boundary) + .setInitSnapshot(Snapshot.newBuilder() + .setTerm(0) + .setIndex(0) + .setClusterConfig(ensuredClusterConfig) // empty voter set + .setData(KVRangeSnapshot.newBuilder() + .setVer(ver) + .setId(id) + // no checkpoint specified + .setLastAppliedIndex(0) + .setBoundary(boundary) + .setState(state) + .setClusterConfig(ensuredClusterConfig) + .build().toByteString()) + .build()) .build()) - .build()) - .build()); - }); + .build()); + }); + } else { + wal.changeClusterConfig(taskId, nextVoters, nextLearners) + .whenCompleteAsync((v2, e2) -> { + if (e2 != null) { + String errorMessage = + String.format("ConfigChange aborted[taskId=%s] due to %s", + taskId, e2.getMessage()); + log.debug(errorMessage); + finishCommandWithError(taskId, + new KVRangeException.TryLater(errorMessage)); + wal.stepDown(); + } + // postpone finishing command when config entry is applied + }, fsmExecutor); + } if (state.getType() == Normal) { if (toBePurged) { rangeWriter.state(State.newBuilder() @@ -1592,7 +1610,7 @@ private void detectZombieState(KVRangeDescriptor descriptor) { } } - private void checkZombieState() { + private void checkAndRepairFromZombieState() { if (zombieAt > 0 && Duration.ofMillis(HLC.INST.getPhysical() - zombieAt).toSeconds() > opts.getZombieTimeoutSec()) { ClusterConfig clusterConfig = wal.latestClusterConfig(); @@ -1604,7 +1622,8 @@ private void checkZombieState() { clusterConfig); wal.recover().whenComplete((v, e) -> recovering.set(false)); } - } else { + } else if (!clusterConfig.getVotersList().contains(hostStoreId) + && !clusterConfig.getLearnersList().contains(hostStoreId)) { log.info("Zombie state detected, send quit signal."); quitSignal.complete(null); } From 22f9f4cae99d4918aac8cf689c477c2c4f5c328c Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Sun, 17 Aug 2025 14:19:24 +0800 Subject: [PATCH 03/20] Reduce memory head caused by inefficient argument formatter --- .../bifromq/basecrdt/util/Formatter.java | 48 +++++++++++-------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java index 4fc6aa7b4..35f3d678c 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java @@ -41,30 +41,36 @@ public static Supplier toPrintable(Replica replica) { return () -> replica.getUri() + "-" + BaseEncoding.base32().encode(replica.getId().toByteArray()); } - public static String toPrintable(DeltaMessage delta) { - try { - return JsonFormat.printer().print(delta); - } catch (Exception e) { - // ignore - return delta.toString(); - } + public static Supplier toPrintable(DeltaMessage delta) { + return () -> { + try { + return JsonFormat.printer().print(delta); + } catch (Exception e) { + // ignore + return delta.toString(); + } + }; } - public static String toPrintable(AckMessage ack) { - try { - return JsonFormat.printer().print(ack); - } catch (Exception e) { - // ignore - return ack.toString(); - } + public static Supplier toPrintable(AckMessage ack) { + return () -> { + try { + return JsonFormat.printer().print(ack); + } catch (Exception e) { + // ignore + return ack.toString(); + } + }; } - public static String toPrintable(CRDTStoreMessage ack) { - try { - return JsonFormat.printer().print(ack); - } catch (Exception e) { - // ignore - return ack.toString(); - } + public static Supplier toPrintable(CRDTStoreMessage ack) { + return () -> { + try { + return JsonFormat.printer().print(ack); + } catch (Exception e) { + // ignore + return ack.toString(); + } + }; } } From 2138a7e096a68367271dd0a39b946fcc280cb566 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Sun, 17 Aug 2025 14:23:36 +0800 Subject: [PATCH 04/20] Improve the backpressure mechanism when the downstream is stalled --- .../apache/bifromq/basescheduler/Batcher.java | 79 +++++++++++-------- .../apache/bifromq/basescheduler/EMALong.java | 49 ++++++++---- .../basescheduler/BatchCallSchedulerTest.java | 22 ++++-- .../bifromq/basescheduler/EMALongTest.java | 8 +- 4 files changed, 101 insertions(+), 57 deletions(-) diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java index d443430a8..754f46284 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java @@ -14,32 +14,33 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler; -import org.apache.bifromq.basescheduler.exception.BackPressureException; -import org.apache.bifromq.basescheduler.spi.ICapacityEstimator; import io.micrometer.core.instrument.Counter; import io.micrometer.core.instrument.DistributionSummary; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; import io.micrometer.core.instrument.Timer; -import java.util.ArrayDeque; import java.util.LinkedList; import java.util.Queue; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basescheduler.exception.BackPressureException; +import org.apache.bifromq.basescheduler.spi.ICapacityEstimator; @Slf4j final class Batcher { @@ -72,7 +73,7 @@ final class Batcher { this.batchCallBuilder = batchCallBuilder; this.capacityEstimator = capacityEstimator; this.maxBurstLatency = maxBurstLatency; - this.batchPool = new ArrayDeque<>(); + this.batchPool = new ConcurrentLinkedDeque<>(); this.emaQueueingTime = new EMALong(System::nanoTime, 0.1, 0.9, maxBurstLatency); Tags tags = Tags.of("name", name, "key", Integer.toUnsignedString(System.identityHashCode(this))); maxPipelineDepthGauge = Gauge.builder("batcher.pipeline.max", capacityEstimator::maxPipelineDepth) @@ -96,7 +97,7 @@ public CompletableFuture submit(BatcherKeyT batcherKey, CallT reque return CompletableFuture.failedFuture( new RejectedExecutionException("Batcher has been shut down")); } - if (emaQueueingTime.get() < maxBurstLatency) { + if (Math.max(emaQueueingTime.get(), headCallWaitingNanos()) < maxBurstLatency) { ICallTask callTask = new CallTask<>(batcherKey, request); boolean offered = callTaskBuffers.offer(callTask); assert offered; @@ -104,10 +105,18 @@ public CompletableFuture submit(BatcherKeyT batcherKey, CallT reque return callTask.resultPromise(); } else { dropCounter.increment(); - return CompletableFuture.failedFuture(new BackPressureException("Too high average latency")); + return CompletableFuture.failedFuture(new BackPressureException("Batch call busy")); } } + private long headCallWaitingNanos() { + ICallTask head = callTaskBuffers.peek(); + if (head != null) { + return System.nanoTime() - head.ts(); + } + return 0; + } + public CompletableFuture close() { if (state.compareAndSet(State.RUNNING, State.SHUTTING_DOWN)) { checkShutdownCompletion(); @@ -137,6 +146,7 @@ private void cleanupMetrics() { while ((batchCall = batchPool.poll()) != null) { batchCall.destroy(); } + batchCallBuilder.close(); } private void trigger() { @@ -178,30 +188,37 @@ private void batchAndEmit() { int finalBatchSize = batchSize; CompletableFuture future = batchCall.execute(); runningBatchCalls.add(future); - future.whenComplete((v, e) -> { - runningBatchCalls.remove(future); - long execEnd = System.nanoTime(); - if (e != null) { - batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e)); - } else { - long batchCallLatency = execEnd - execBegin; - capacityEstimator.record(finalBatchSize, batchCallLatency); - batchExecTimer.record(batchCallLatency, TimeUnit.NANOSECONDS); - batchedTasks.forEach(t -> { - long callLatency = execEnd - t.ts(); - batchCallTimer.record(callLatency, TimeUnit.NANOSECONDS); - }); - } - returnBatchCall(batchCall); - pipelineDepth.getAndDecrement(); - // After each completion, check for shutdown - if (state.get() == State.SHUTTING_DOWN) { - checkShutdownCompletion(); - } - if (!callTaskBuffers.isEmpty()) { - trigger(); - } - }); + future + .orTimeout(maxBurstLatency, TimeUnit.NANOSECONDS) // Ensure we don't block indefinitely + .whenComplete((v, e) -> { + runningBatchCalls.remove(future); + long execEnd = System.nanoTime(); + if (e != null) { + if (e instanceof TimeoutException) { + batchedTasks.forEach(t -> t.resultPromise() + .completeExceptionally(new BackPressureException("Batch Call timeout", e))); + } else { + batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e)); + } + } else { + long batchCallLatency = execEnd - execBegin; + capacityEstimator.record(finalBatchSize, batchCallLatency); + batchExecTimer.record(batchCallLatency, TimeUnit.NANOSECONDS); + batchedTasks.forEach(t -> { + long callLatency = execEnd - t.ts(); + batchCallTimer.record(callLatency, TimeUnit.NANOSECONDS); + }); + } + returnBatchCall(batchCall); + pipelineDepth.getAndDecrement(); + // After each completion, check for shutdown + if (state.get() == State.SHUTTING_DOWN) { + checkShutdownCompletion(); + } + if (!callTaskBuffers.isEmpty()) { + trigger(); + } + }); } catch (Throwable e) { log.error("Batch call failed unexpectedly", e); batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e)); diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java index 0f8d898c9..f412a559e 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java @@ -19,43 +19,58 @@ package org.apache.bifromq.basescheduler; -import java.util.concurrent.atomic.AtomicLong; +import com.google.common.base.Preconditions; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; class EMALong { private static final double NANOS_PER_SECOND = 1_000_000_000.0; private final Supplier nowSupplier; - private final double alpha; - private final double decay; + private final double alpha; // (0,1] + private final double decay; // (0,1] private final long decayDelayNanos; - private final AtomicLong value = new AtomicLong(0); - private final AtomicLong lastUpdateTime = new AtomicLong(0); + private final AtomicReference state; public EMALong(Supplier nowSupplier, double alpha, double decay, long decayDelayNanos) { + Preconditions.checkArgument(alpha > 0.0 && alpha <= 1.0, "alpha must be in (0,1]"); + Preconditions.checkArgument(decay > 0.0 && decay <= 1.0, "decay must be in (0,1]"); + Preconditions.checkArgument(decayDelayNanos >= 0, "decayDelayNanos must be non-negative"); this.nowSupplier = nowSupplier; this.alpha = alpha; this.decay = decay; this.decayDelayNanos = decayDelayNanos; + this.state = new AtomicReference<>(new State(0L, 0L)); } public void update(long newValue) { - value.updateAndGet(v -> { - lastUpdateTime.set(nowSupplier.get()); - if (v == 0) { - return newValue; - } else { - return (long) Math.ceil(v * (1 - alpha) + newValue * alpha); + long now = nowSupplier.get(); + while (true) { + State prev = state.get(); + long newEma = (prev.ema == 0L) ? newValue : (long) Math.ceil(prev.ema * (1 - alpha) + newValue * alpha); + State next = new State(newEma, now); + if (state.compareAndSet(prev, next)) { + return; } - }); + } } public long get() { long now = nowSupplier.get(); - long lastUpdate = lastUpdateTime.get(); - if (decayDelayNanos < Long.MAX_VALUE && lastUpdate + decayDelayNanos < now) { - return (long) (value.get() - * Math.pow(decay, Math.ceil((now - lastUpdate - decayDelayNanos) / NANOS_PER_SECOND))); + State s = state.get(); + if (s.ema == 0L || s.lastTs == 0L) { + return s.ema; + } + if (decayDelayNanos < Long.MAX_VALUE) { + long dt = now - s.lastTs; + if (dt > decayDelayNanos) { + double seconds = Math.ceil((dt - decayDelayNanos) / NANOS_PER_SECOND); + double decayed = s.ema * Math.pow(decay, seconds); + return decayed < 1.0 ? 0L : Math.round(decayed); + } } - return value.get(); + return s.ema; + } + + private record State(long ema, long lastTs) { } } diff --git a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java index 21d3b63e9..1661d5405 100644 --- a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java +++ b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java @@ -14,14 +14,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; -import org.apache.bifromq.basescheduler.exception.BackPressureException; import java.time.Duration; import java.util.ArrayList; import java.util.List; @@ -33,6 +33,7 @@ import java.util.concurrent.atomic.AtomicInteger; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basescheduler.exception.BackPressureException; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -61,9 +62,7 @@ public void batchCall() { executor.submit(() -> { int i; while ((i = count.decrementAndGet()) >= 0) { - scheduler.schedule(i).whenComplete((v, e) -> { - latch.countDown(); - }); + scheduler.schedule(i).whenComplete((v, e) -> latch.countDown()); } }); latch.await(); @@ -93,4 +92,17 @@ public void backPressure() { assertEquals(e.getCause().getClass(), BackPressureException.class); } } + + @Test + public void batchCallTimeout() { + TestBatchCallScheduler scheduler = + new TestBatchCallScheduler(1, Duration.ofNanos(Long.MAX_VALUE), Duration.ofSeconds(1)); + try { + scheduler.schedule(1).join(); + fail(); + } catch (Throwable e) { + assertEquals(e.getCause().getClass(), BackPressureException.class); + } + scheduler.close(); + } } diff --git a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java index 49fd1726a..3ca03f620 100644 --- a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java +++ b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler; @@ -81,10 +81,10 @@ void testDecayBeforeDelay() { void testDecayAfterDelay() { // set decay=0.5, decayDelay=1s EMALong ema = new EMALong(nowSupplier, 0.5, 0.5, 1_000_000_000L); - fakeTime.set(0L); + fakeTime.set(1L); ema.update(100L); // advance time to after delay + 2s total => one decay period - fakeTime.set(1_000_000_000L + 1_000_000_000L); + fakeTime.set(1_000_000_001L + 1_000_000_000L); // (now - lastUpdate - delay) / 1e9 = (2s - 1s)/1e9 = 1 => ceil(1) =1 // value * decay^1 = 100 * 0.5 = 50 assertEquals(ema.get(), 50); @@ -94,7 +94,7 @@ void testDecayAfterDelay() { void testMultipleDecayPeriods() { // decay=0.5, delay=1s EMALong ema = new EMALong(nowSupplier, 0.5, 0.5, 1_000_000_000L); - fakeTime.set(0L); + fakeTime.set(1L); ema.update(80L); // advance time to after delay + 3.2s => ceil(3.2)=4 periods fakeTime.set(1_000_000_000L + 3_200_000_000L); From c89355e181896a7a3e9959afcabfa8f42f79aa2f Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Sun, 17 Aug 2025 14:35:27 +0800 Subject: [PATCH 05/20] 1. Reducing AGENT_HOST_MAP sync overhead when failed member detected 2. Remove deprecated field in internal proto --- base-cluster/pom.xml | 4 + .../memberlist/HostMemberList.java | 98 ++++++++++++------- .../memberlist/IHostMemberList.java | 17 +++- .../basecluster/membership/HostMember.proto | 2 +- .../bifromq/basecluster/AgentHostsTest.java | 32 +++--- .../memberlist/HostMemberListTest.java | 59 ++++++----- .../bifromq/basecrdt/store/AntiEntropy.java | 2 +- .../basecrdt/store/AntiEntropyManager.java | 14 ++- base-util/pom.xml | 4 + .../bifromq/base/util}/RendezvousHash.java | 6 +- .../base/util}/RendezvousHashTest.java | 2 +- .../dist/worker/DeliverExecutorGroup.java | 1 + 12 files changed, 145 insertions(+), 96 deletions(-) rename {bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker => base-util/src/main/java/org/apache/bifromq/base/util}/RendezvousHash.java (94%) rename {bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker => base-util/src/test/java/org/apache/bifromq/base/util}/RendezvousHashTest.java (98%) diff --git a/base-cluster/pom.xml b/base-cluster/pom.xml index 34c978411..1bcc08082 100644 --- a/base-cluster/pom.xml +++ b/base-cluster/pom.xml @@ -33,6 +33,10 @@ org.apache.bifromq base-env-provider + + org.apache.bifromq + base-util + org.apache.bifromq base-hlc diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java index df208694a..765c8be67 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist; @@ -25,28 +25,8 @@ import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; import static org.apache.bifromq.basecrdt.store.ReplicaIdGenerator.generate; -import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; -import org.apache.bifromq.basecluster.memberlist.agent.Agent; -import org.apache.bifromq.basecluster.memberlist.agent.AgentAddressProvider; -import org.apache.bifromq.basecluster.memberlist.agent.AgentMessenger; -import org.apache.bifromq.basecluster.memberlist.agent.IAgent; -import org.apache.bifromq.basecluster.membership.proto.Doubt; -import org.apache.bifromq.basecluster.membership.proto.Fail; -import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; -import org.apache.bifromq.basecluster.membership.proto.HostMember; -import org.apache.bifromq.basecluster.membership.proto.Join; -import org.apache.bifromq.basecluster.membership.proto.Quit; -import org.apache.bifromq.basecluster.messenger.IMessenger; -import org.apache.bifromq.basecluster.proto.ClusterMessage; -import org.apache.bifromq.basecrdt.core.api.IORMap; -import org.apache.bifromq.basecrdt.core.api.MVRegOperation; -import org.apache.bifromq.basecrdt.core.api.ORMapOperation; -import org.apache.bifromq.basecrdt.proto.Replica; -import org.apache.bifromq.basecrdt.store.ICRDTStore; -import org.apache.bifromq.basehlc.HLC; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; -import com.google.common.collect.Sets; import com.google.protobuf.AbstractMessageLite; import com.google.protobuf.ByteString; import io.micrometer.core.instrument.Gauge; @@ -69,7 +49,30 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; +import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; +import org.apache.bifromq.basecluster.memberlist.agent.Agent; +import org.apache.bifromq.basecluster.memberlist.agent.AgentAddressProvider; +import org.apache.bifromq.basecluster.memberlist.agent.AgentMessenger; +import org.apache.bifromq.basecluster.memberlist.agent.IAgent; +import org.apache.bifromq.basecluster.membership.proto.Doubt; +import org.apache.bifromq.basecluster.membership.proto.Fail; +import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; +import org.apache.bifromq.basecluster.membership.proto.HostMember; +import org.apache.bifromq.basecluster.membership.proto.Join; +import org.apache.bifromq.basecluster.membership.proto.Quit; +import org.apache.bifromq.basecluster.messenger.IMessenger; +import org.apache.bifromq.basecluster.proto.ClusterMessage; +import org.apache.bifromq.basecrdt.core.api.IORMap; +import org.apache.bifromq.basecrdt.core.api.MVRegOperation; +import org.apache.bifromq.basecrdt.core.api.ORMapOperation; +import org.apache.bifromq.basecrdt.proto.Replica; +import org.apache.bifromq.basecrdt.store.ICRDTStore; +import org.apache.bifromq.basehlc.HLC; +/** + * HostMemberList implementation using CRDT for achieving a consistent view of the host members in the cluster. + */ @Slf4j public class HostMemberList implements IHostMemberList { private final AtomicReference state = new AtomicReference<>(State.JOINED); @@ -85,6 +88,18 @@ public class HostMemberList implements IHostMemberList { private final MetricManager metricManager; private final String[] tags; private volatile HostMember local; + + /** + * Constructor of HostMemberList. + * + * @param bindAddr the address to bind the host member + * @param port the port to bind the host member + * @param messenger the messenger to use for communication + * @param scheduler the scheduler to use for scheduling tasks + * @param store the CRDT store to use for storing internal OR-Map + * @param addressResolver the address resolver to resolve host endpoints to addresses + * @param tags the tags to be used for metrics + */ public HostMemberList(String bindAddr, int port, IMessenger messenger, @@ -134,10 +149,13 @@ private boolean join(HostMember member) { if (joined) { // add it into crdt log.debug("Member[{}] joins the cluster: local={}", member, local); - Optional memberInCRDT = getHostMember(hostListCRDT, member.getEndpoint()); - if (memberInCRDT.isEmpty() || memberInCRDT.get().getIncarnation() < member.getIncarnation()) { - hostListCRDT.execute(ORMapOperation.update(member.getEndpoint().toByteString()) - .with(MVRegOperation.write(member.toByteString()))); + if (member == local) { + // only update crdt if it's local member + Optional memberInCRDT = getHostMember(hostListCRDT, member.getEndpoint()); + if (memberInCRDT.isEmpty() || memberInCRDT.get().getIncarnation() < member.getIncarnation()) { + hostListCRDT.execute(ORMapOperation.update(member.getEndpoint().toByteString()) + .with(MVRegOperation.write(member.toByteString()))); + } } // update crdt landscape store.join(hostListCRDT.id(), currentMembers().keySet().stream() @@ -148,12 +166,11 @@ private boolean join(HostMember member) { } } - private void drop(HostEndpoint memberEndpoint, int incarnation) { + private void drop(HostEndpoint memberEndpoint, int incarnation, boolean fromQuit) { synchronized (this) { boolean removed = removeMember(memberEndpoint, incarnation); Optional memberInCRDT = getHostMember(hostListCRDT, memberEndpoint); - if (memberInCRDT.isPresent()) { - // remove it from crdt if any + if (!fromQuit && memberInCRDT.isPresent() && shouldReportFailure(memberInCRDT.get().getEndpoint())) { hostListCRDT.execute(ORMapOperation.remove(memberEndpoint.toByteString()).of(mvreg)); } if (removed) { @@ -165,6 +182,17 @@ private void drop(HostEndpoint memberEndpoint, int incarnation) { } } + private boolean shouldReportFailure(HostEndpoint failedMemberEndpoint) { + // if local member is responsible for removing the failed member from CRDT + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer())) + .nodes(currentMembers().keySet()) + .build(); + HostEndpoint reporter = hash.get(failedMemberEndpoint); + return reporter.getId().equals(local.getEndpoint().getId()); + } + @Override public boolean isZombie(HostEndpoint endpoint) { return !endpoint.getId().equals(local.getEndpoint().getId()) @@ -247,7 +275,6 @@ public IAgent host(String agentId) { tags)); local = local.toBuilder() .setIncarnation(local.getIncarnation() + 1) - .addAgentId(agentId) // deprecate since 3.3.3 .putAgent(agentId, agentEndpoint.getIncarnation()) .build(); join(local); @@ -265,8 +292,6 @@ public CompletableFuture stopHosting(String agentId) { synchronized (this) { local = local.toBuilder() .setIncarnation(local.getIncarnation() + 1) - .clearAgentId() - .addAllAgentId(agentMap.keySet()) // deprecate since 3.3.3 .clearAgent() .putAllAgent(Maps.transformValues(agentMap, a -> a.local().getIncarnation())) .build(); @@ -279,7 +304,7 @@ public CompletableFuture stopHosting(String agentId) { @Override public Observable>> landscape() { - return membershipSubject.map(m -> Maps.transformValues(m, v -> Sets.newHashSet(v.getAgentIdList()))); + return membershipSubject.map(m -> Maps.transformValues(m, v -> v.getAgentMap().keySet())); } private Map currentMembers() { @@ -327,6 +352,9 @@ private void handleMessage(ClusterMessage message) { case QUIT -> handleQuit(message.getQuit()); case FAIL -> handleFail(message.getFail()); case DOUBT -> handleDoubt(message.getDoubt()); + default -> { + // never happen + } } } @@ -363,7 +391,7 @@ private void handleFail(Fail fail) { } else if (isZombie(failedEndpoint)) { clearZombie(failedEndpoint); } else { - drop(failedEndpoint, fail.getIncarnation()); + drop(failedEndpoint, fail.getIncarnation(), false); } } @@ -371,7 +399,7 @@ private void handleQuit(Quit quit) { HostEndpoint quitEndpoint = quit.getEndpoint(); log.debug("Member[{}] quits the cluster", quitEndpoint); if (!quitEndpoint.equals(local.getEndpoint()) && !isZombie(quitEndpoint)) { - drop(quitEndpoint, quit.getIncarnation()); + drop(quitEndpoint, quit.getIncarnation(), true); } } @@ -388,7 +416,7 @@ private void handleDoubt(Doubt doubt) { private void clearZombie(HostEndpoint zombieEndpoint) { // drop zombie if any, and broadcast a quit on behalf of it - drop(zombieEndpoint, Integer.MAX_VALUE); + drop(zombieEndpoint, Integer.MAX_VALUE, false); messenger.spread(ClusterMessage.newBuilder() .setQuit(Quit.newBuilder().setEndpoint(zombieEndpoint).setIncarnation(Integer.MAX_VALUE).build()) .build()); diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java index c772a406d..23bd05507 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java @@ -14,19 +14,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist; -import org.apache.bifromq.basecluster.memberlist.agent.IAgent; -import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; -import org.apache.bifromq.basecluster.membership.proto.HostMember; import io.reactivex.rxjava3.core.Observable; import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basecluster.memberlist.agent.IAgent; +import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; +import org.apache.bifromq.basecluster.membership.proto.HostMember; +/** + * The interface of host member list service. + */ public interface IHostMemberList { /** * The member from local. @@ -35,6 +38,12 @@ public interface IHostMemberList { */ HostMember local(); + /** + * If the given endpoint is considered a zombie(The dead endpoint used to live in the local host). + * + * @param endpoint the endpoint + * @return true if the given endpoint is considered a zombie. + */ boolean isZombie(HostEndpoint endpoint); /** diff --git a/base-cluster/src/main/proto/basecluster/membership/HostMember.proto b/base-cluster/src/main/proto/basecluster/membership/HostMember.proto index 9d706d443..63b2f4d5a 100644 --- a/base-cluster/src/main/proto/basecluster/membership/HostMember.proto +++ b/base-cluster/src/main/proto/basecluster/membership/HostMember.proto @@ -34,6 +34,6 @@ message HostEndpoint{ message HostMember { HostEndpoint endpoint = 1; uint32 incarnation = 2; // incarnation of the node, managed by the node itself - repeated string agentId = 3; // deprecate since 3.3.3, the list of agents reside on the host + //repeated string agentId = 3; deprecate since 3.3.3, the list of agents reside on the host map agent = 4; // the map of agent id to incarnation } diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java index cce647ab3..0d2e2e98a 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; @@ -22,6 +22,13 @@ import static com.google.protobuf.ByteString.copyFromUtf8; import static org.awaitility.Awaitility.await; +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.observers.TestObserver; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; import org.apache.bifromq.basecluster.agent.proto.AgentMessage; @@ -30,13 +37,6 @@ import org.apache.bifromq.basecluster.memberlist.agent.IAgent; import org.apache.bifromq.basecluster.memberlist.agent.IAgentMember; import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; -import com.google.common.collect.Sets; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.observers.TestObserver; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import lombok.extern.slf4j.Slf4j; import org.testng.Assert; import org.testng.annotations.Test; @@ -86,16 +86,12 @@ public void testUnregister() { @StoreCfg(id = "s1", isSeed = true), @StoreCfg(id = "s2"), @StoreCfg(id = "s3"), - @StoreCfg(id = "s4"), - @StoreCfg(id = "s5"), }) @Test public void testMultipleAgentHosts() { - await().until(() -> storeMgr.membership("s1").size() == 5); - await().until(() -> storeMgr.membership("s2").size() == 5); - await().until(() -> storeMgr.membership("s3").size() == 5); - await().until(() -> storeMgr.membership("s4").size() == 5); - await().until(() -> storeMgr.membership("s5").size() == 5); + await().forever().until(() -> storeMgr.membership("s1").size() == 3); + await().forever().until(() -> storeMgr.membership("s2").size() == 3); + await().forever().until(() -> storeMgr.membership("s3").size() == 3); } @Test @@ -395,8 +391,8 @@ public void testAgentClusterPartitionAndHealing() { log.info("integrate s1"); // integrate s1 into the cluster storeMgr.integrate("s1"); - await().until(() -> agentOnS1.membership().blockingFirst().size() == 4); - await().until(() -> agentOnS2.membership().blockingFirst().size() == 4); - await().until(() -> agentOnS3.membership().blockingFirst().size() == 4); + await().forever().until(() -> agentOnS1.membership().blockingFirst().size() == 4); + await().forever().until(() -> agentOnS2.membership().blockingFirst().size() == 4); + await().forever().until(() -> agentOnS3.membership().blockingFirst().size() == 4); } } diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java index d51b20dae..0a93cf4c2 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java @@ -14,11 +14,12 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist; +import static java.util.Collections.emptyIterator; import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI; import static org.apache.bifromq.basecluster.memberlist.Fixtures.LOCAL_ADDR; import static org.apache.bifromq.basecluster.memberlist.Fixtures.LOCAL_ENDPOINT; @@ -27,7 +28,6 @@ import static org.apache.bifromq.basecluster.memberlist.Fixtures.REMOTE_ADDR_1; import static org.apache.bifromq.basecluster.memberlist.Fixtures.REMOTE_HOST_1_ENDPOINT; import static org.apache.bifromq.basecluster.memberlist.Fixtures.ZOMBIE_ENDPOINT; -import static java.util.Collections.emptyIterator; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.Mockito.mockConstruction; @@ -39,6 +39,18 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import com.google.common.collect.Iterators; +import com.google.common.util.concurrent.MoreExecutors; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.core.Scheduler; +import io.reactivex.rxjava3.schedulers.Schedulers; +import io.reactivex.rxjava3.schedulers.Timed; +import io.reactivex.rxjava3.subjects.PublishSubject; +import java.net.InetSocketAddress; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecluster.memberlist.agent.Agent; import org.apache.bifromq.basecluster.memberlist.agent.IAgent; import org.apache.bifromq.basecluster.membership.proto.Doubt; @@ -55,18 +67,6 @@ import org.apache.bifromq.basecrdt.core.api.IORMap; import org.apache.bifromq.basecrdt.core.api.ORMapOperation; import org.apache.bifromq.basecrdt.store.ICRDTStore; -import com.google.common.collect.Iterators; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.core.Scheduler; -import io.reactivex.rxjava3.schedulers.Schedulers; -import io.reactivex.rxjava3.schedulers.Timed; -import io.reactivex.rxjava3.subjects.PublishSubject; -import java.net.InetSocketAddress; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.TimeUnit; -import lombok.extern.slf4j.Slf4j; import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.MockedConstruction; @@ -120,7 +120,7 @@ public void init() { assertEquals(local.getEndpoint().getAddress(), LOCAL_ADDR.getHostName()); assertEquals(local.getEndpoint().getPort(), LOCAL_ADDR.getPort()); assertTrue(local.getIncarnation() >= 0); - assertTrue(local.getAgentIdList().isEmpty()); + assertTrue(local.getAgentMap().isEmpty()); assertEquals(memberList.landscape().blockingFirst().size(), 1); Map hostMap = memberList.members().blockingFirst(); assertEquals(hostMap.size(), 1); @@ -157,15 +157,15 @@ public void stopHosting() { when(hostMemberOnCRDT.read()).thenReturn(emptyIterator()); IHostMemberList memberList = new HostMemberList(LOCAL_ADDR.getHostName(), LOCAL_ADDR.getPort(), messenger, scheduler, store, addressResolver); - HostMember local = memberList.local(); memberList.host(agentId); when(mockAgent.constructed().get(0).quit()).thenReturn(CompletableFuture.completedFuture(null)); memberList.stopHosting(agentId); - assertEquals(memberList.local().getAgentIdCount(), 0); + assertEquals(memberList.local().getAgentMap().size(), 0); assertEquals(memberList.landscape().blockingFirst().size(), 1); - assertTrue(local.getIncarnation() + 2 == memberList.local().getIncarnation()); + HostMember local = memberList.local(); + assertEquals(memberList.local().getIncarnation(), local.getIncarnation()); Map hostMap = memberList.members().blockingFirst(); - assertTrue(local.getIncarnation() + 2 == hostMap.get(local.getEndpoint())); + assertEquals((int) hostMap.get(local.getEndpoint()), local.getIncarnation()); verify(hostListCRDT, times(3)).execute(any(ORMapOperation.ORMapUpdate.class)); } @@ -197,7 +197,7 @@ public void handleJoin() { .build())); ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(2)).execute(opCap.capture()); + verify(hostListCRDT, times(1)).execute(opCap.capture()); verify(store, times(2)).join( argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); } @@ -243,7 +243,7 @@ public void handleJoinFromHealing() { .build(), LOCAL_ENDPOINT)); ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(2)).execute(opCap.capture()); + verify(hostListCRDT, times(1)).execute(opCap.capture()); ArgumentCaptor msgCap = ArgumentCaptor.forClass(ClusterMessage.class); ArgumentCaptor addrCap = ArgumentCaptor.forClass(InetSocketAddress.class); @@ -274,7 +274,7 @@ public void handleJoinFromDuplicatedHealing() { .build(), LOCAL_ENDPOINT)); ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(3)).execute(opCap.capture()); + verify(hostListCRDT, times(2)).execute(opCap.capture()); ArgumentCaptor msgCap = ArgumentCaptor.forClass(ClusterMessage.class); ArgumentCaptor addrCap = ArgumentCaptor.forClass(InetSocketAddress.class); @@ -333,9 +333,9 @@ public void handleFailAndDrop() { messageSubject.onNext(failMsg(REMOTE_HOST_1_ENDPOINT, 1)); ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(3)).execute(opCap.capture()); - assertEquals(((ORMapOperation.ORMapRemove) opCap.getAllValues().get(2)).valueType, CausalCRDTType.mvreg); - assertEquals(opCap.getAllValues().get(2).keyPath[0], REMOTE_HOST_1_ENDPOINT.toByteString()); + verify(hostListCRDT, times(2)).execute(opCap.capture()); + assertEquals(((ORMapOperation.ORMapRemove) opCap.getAllValues().get(1)).valueType, CausalCRDTType.mvreg); + assertEquals(opCap.getAllValues().get(1).keyPath[0], REMOTE_HOST_1_ENDPOINT.toByteString()); } @Test @@ -346,7 +346,6 @@ public void handleFailAndRenew() { messenger, scheduler, store, addressResolver); assertEquals(memberList.members().blockingFirst().get(LOCAL_ENDPOINT).intValue(), 0); - messageSubject.onNext(failMsg(LOCAL_ENDPOINT, 0)); messageSubject.onNext(failMsg(LOCAL_ENDPOINT, 0)); // this time will be ignored @@ -388,7 +387,7 @@ public void handleQuitNotExistMember() { messageSubject.onNext(quitMsg(REMOTE_HOST_1_ENDPOINT, 1)); // nothing will happen - verify(hostListCRDT, times(1)).execute(any(ORMapOperation.ORMapRemove.class)); + verify(hostListCRDT, never()).execute(any(ORMapOperation.ORMapRemove.class)); verify(store, times(1)).join( argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); } @@ -409,7 +408,6 @@ public void handleQuitNotExistMemberOnCRDT() { argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); } - @Test public void handleQuitSelf() { when(hostListCRDT.getMVReg(any())).thenReturn(hostMemberOnCRDT); @@ -444,8 +442,8 @@ public void handleQuitAndDrop() { messageSubject.onNext(quitMsg(REMOTE_HOST_1_ENDPOINT, 0)); // nothing will happen ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(3)).execute(opCap.capture()); - assertTrue(opCap.getAllValues().get(2) instanceof ORMapOperation.ORMapRemove); + verify(hostListCRDT, times(1)).execute(opCap.capture()); + assertFalse(opCap.getAllValues().get(0) instanceof ORMapOperation.ORMapRemove); verify(store, times(3)).join( argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); @@ -480,7 +478,6 @@ public void handleDoubtAndIgnore() { assertEquals(memberList.members().blockingFirst().get(LOCAL_ENDPOINT).intValue(), 0); } - private Timed joinMsg(HostMember member) { return to(ClusterMessage.newBuilder() .setJoin(Join.newBuilder() diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java index dff26bf68..1ddfea549 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java @@ -239,7 +239,7 @@ private void resend(DeltaMessage toResend) { if (currentDelta == toResend) { log.trace("Local[{}] resend delta to neighbor[{}]:\n{}", toPrintable(localAddr), toPrintable(neighborAddr), toPrintable(toResend)); - deltaMsgCounter.increment(1D); + deltaMsgCounter.increment(); deltaMsgBytesCounter.increment(currentDelta.getSerializedSize()); neighborMessageSubject.onNext(new NeighborMessage(currentDelta, neighborAddr)); if (resendCount++ < 10) { diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java index e942ddaf6..2ed3cd52c 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java @@ -99,7 +99,17 @@ Observable neighborMessages() { CompletableFuture receive(DeltaMessage delta, ByteString sender) { log.trace("Local[{}] receive delta[{}] from addr[{}]:\n{}", toPrintable(localAddr), delta.getSeqNo(), toPrintable(sender), toPrintable(delta)); - metricManager.receiveDeltaNum.increment(1D); + return handleDelta(delta, sender).thenApply(ack -> { + metricManager.sendAckNum.increment(); + metricManager.sendAckBytes.increment(ack.getSerializedSize()); + log.trace("Local[{}] send ack[{}] to addr[{}]:\n{}", + toPrintable(localAddr), ack.getSeqNo(), toPrintable(sender), toPrintable(ack)); + return ack; + }); + } + + private CompletableFuture handleDelta(DeltaMessage delta, ByteString sender) { + metricManager.receiveDeltaNum.increment(); metricManager.receiveDeltaBytes.increment(delta.getSerializedSize()); AntiEntropy neighborAntiEntropy = neighborMap.get(sender); if (neighborAntiEntropy != null) { @@ -124,7 +134,7 @@ CompletableFuture receive(DeltaMessage delta, ByteString sender) { } void receive(AckMessage ack, ByteString neighborAddr) { - metricManager.receiveAckNum.increment(1D); + metricManager.receiveAckNum.increment(); metricManager.receiveAckBytes.increment(ack.getSerializedSize()); AntiEntropy neighborAntiEntropy = neighborMap.get(neighborAddr); if (neighborAntiEntropy != null) { diff --git a/base-util/pom.xml b/base-util/pom.xml index ca88f6c85..701acd89b 100644 --- a/base-util/pom.xml +++ b/base-util/pom.xml @@ -31,6 +31,10 @@ base-util + + com.google.guava + guava + io.micrometer micrometer-core diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java b/base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java similarity index 94% rename from bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java rename to base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java index b5ec12199..583e33483 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java +++ b/base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java @@ -14,10 +14,10 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ -package org.apache.bifromq.dist.worker; +package org.apache.bifromq.base.util; import static com.google.common.hash.Hashing.murmur3_128; @@ -31,7 +31,7 @@ * @param The type of the node. */ @Builder -class RendezvousHash { +public class RendezvousHash { private final Funnel keyFunnel; private final Funnel nodeFunnel; private final Iterable nodes; diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java b/base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java similarity index 98% rename from bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java rename to base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java index a08733379..55b954c35 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java +++ b/base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.bifromq.dist.worker; +package org.apache.bifromq.base.util; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java index e0f87f398..70fcdb47a 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java @@ -39,6 +39,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; import org.apache.bifromq.deliverer.IMessageDeliverer; import org.apache.bifromq.deliverer.TopicMessagePackHolder; import org.apache.bifromq.dist.worker.schema.GroupMatching; From e68de01aaa53bc86d244b3b3356c2b5723d0912b Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Tue, 19 Aug 2025 11:36:23 +0800 Subject: [PATCH 06/20] 1. Correct the log context for CRDT 2. MDCLogger support lambda --- .../bifromq/basecrdt/service/CRDTCluster.java | 2 +- .../core/internal/AWORSetInflater.java | 9 +++--- .../core/internal/CCounterInflater.java | 9 +++--- .../core/internal/CausalCRDTInflater.java | 6 ++-- .../internal/CausalCRDTInflaterFactory.java | 25 ++++++++------- .../core/internal/DWFlagInflater.java | 9 +++--- .../core/internal/EWFlagInflater.java | 8 ++--- .../internal/InMemReplicaStateLattice.java | 7 ++-- .../basecrdt/core/internal/MVRegInflater.java | 11 ++++--- .../basecrdt/core/internal/ORMapInflater.java | 11 ++++--- .../core/internal/RWORSetInflater.java | 11 ++++--- .../bifromq/basecrdt/store/CRDTStore.java | 1 + .../bifromq/basecrdt/util/Formatter.java | 2 +- .../core/benchmark/CRDTBenchmarkTemplate.java | 6 ++-- .../basecrdt/core/internal/AWORSetTest.java | 16 +++++----- .../basecrdt/core/internal/CCounterTest.java | 22 ++++++------- .../basecrdt/core/internal/CRDTTest.java | 6 ++-- .../basecrdt/core/internal/DWFlagTest.java | 14 ++++---- .../basecrdt/core/internal/EWFlagTest.java | 14 ++++---- .../InMemReplicaStateLatticeTest.java | 25 ++++++++------- .../basecrdt/core/internal/MVRegTest.java | 18 +++++------ .../basecrdt/core/internal/ORMapTest.java | 32 +++++++++---------- .../basecrdt/core/internal/RWORSetTest.java | 14 ++++---- .../org/apache/bifromq/logger/MDCLogger.java | 10 +++++- 24 files changed, 155 insertions(+), 133 deletions(-) diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java index 72b13ae93..25eb0c825 100644 --- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java +++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java @@ -73,7 +73,7 @@ class CRDTCluster> { this.store = store; this.agentHost = agentHost; replicaId = generate(uri); - log = MDCLogger.getLogger(CRDTCluster.class, "replica", print(replicaId)); + log = MDCLogger.getLogger(CRDTCluster.class, "store", store.id(), "replica", print(replicaId)); membershipAgent = agentHost.host(replicaId.getUri()); endpoint = AgentMemberAddr.newBuilder() .setName(AgentUtil.toAgentMemberName(replicaId)) diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java index d3662f976..0d91e8410 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java @@ -19,21 +19,22 @@ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.AWORSetOperation; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IAWORSet; import org.apache.bifromq.basecrdt.core.api.IAWORSetInflater; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class AWORSetInflater extends CausalCRDTInflater implements IAWORSetInflater { - AWORSetInflater(Replica replica, + AWORSetInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java index 4bef5345e..06c7e5810 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java @@ -19,21 +19,22 @@ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CCounterOperation; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.ICCounter; import org.apache.bifromq.basecrdt.core.api.ICCounterInflater; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class CCounterInflater extends CausalCRDTInflater implements ICCounterInflater { - CCounterInflater(Replica replica, + CCounterInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java index f38d8bd02..ba9e912c5 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java @@ -73,11 +73,13 @@ abstract class CausalCRDTInflater new AWORSetInflater(replicaId, lattice, executor, inflationInterval, tags); - case rworset -> new RWORSetInflater(replicaId, lattice, executor, inflationInterval, tags); - case ormap -> new ORMapInflater(replicaId, lattice, executor, inflationInterval, tags); - case cctr -> new CCounterInflater(replicaId, lattice, executor, inflationInterval, tags); - case dwflag -> new DWFlagInflater(replicaId, lattice, executor, inflationInterval, tags); - case ewflag -> new EWFlagInflater(replicaId, lattice, executor, inflationInterval, tags); - case mvreg -> new MVRegInflater(replicaId, lattice, executor, inflationInterval, tags); + case aworset -> new AWORSetInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case rworset -> new RWORSetInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case ormap -> new ORMapInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case cctr -> new CCounterInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case dwflag -> new DWFlagInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case ewflag -> new EWFlagInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case mvreg -> new MVRegInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); }; } } diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java index bd8a654ea..af8276cca 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java @@ -19,21 +19,22 @@ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.DWFlagOperation; import org.apache.bifromq.basecrdt.core.api.IDWFlag; import org.apache.bifromq.basecrdt.core.api.IDWFlagInflater; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class DWFlagInflater extends CausalCRDTInflater implements IDWFlagInflater { - DWFlagInflater(Replica replica, + DWFlagInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java index e48b94642..dbcd80958 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java @@ -19,18 +19,18 @@ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.EWFlagOperation; import org.apache.bifromq.basecrdt.core.api.IEWFlag; import org.apache.bifromq.basecrdt.core.api.IEWFlagInflater; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class EWFlagInflater extends CausalCRDTInflater implements IEWFlagInflater { - EWFlagInflater(Replica replica, IReplicaStateLattice stateLattice, + EWFlagInflater(String storeId, Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java index d12fa3209..0b42d7c54 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; @@ -69,9 +69,10 @@ class InMemReplicaStateLattice implements IReplicaStateLattice { private final Duration historyExpire; private final long maxCompactionDuration; - InMemReplicaStateLattice(Replica ownerReplica, Duration historyExpire, Duration maxCompactionTime) { + InMemReplicaStateLattice(String storeId, Replica ownerReplica, Duration historyExpire, Duration maxCompactionTime) { this.ownerReplica = ownerReplica; - this.log = MDCLogger.getLogger(InMemReplicaStateLattice.class, "replica", print(ownerReplica)); + this.log = MDCLogger.getLogger(InMemReplicaStateLattice.class, + "store", storeId, "replica", print(ownerReplica)); this.historyExpire = historyExpire; this.maxCompactionDuration = maxCompactionTime.toNanos(); } diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java index 3044da2f4..497844b6d 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java @@ -14,26 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IMVReg; import org.apache.bifromq.basecrdt.core.api.IMVRegInflater; import org.apache.bifromq.basecrdt.core.api.MVRegOperation; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class MVRegInflater extends CausalCRDTInflater implements IMVRegInflater { - MVRegInflater(Replica replica, + MVRegInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java index 137b62343..aa8bec0b6 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java @@ -14,26 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IORMap; import org.apache.bifromq.basecrdt.core.api.IORMapInflater; import org.apache.bifromq.basecrdt.core.api.ORMapOperation; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class ORMapInflater extends CausalCRDTInflater implements IORMapInflater { - ORMapInflater(Replica replica, + ORMapInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java index 8698aa25c..c842a58a4 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java @@ -14,26 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IRWORSet; import org.apache.bifromq.basecrdt.core.api.IRWORSetInflater; import org.apache.bifromq.basecrdt.core.api.RWORSetOperation; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class RWORSetInflater extends CausalCRDTInflater implements IRWORSetInflater { - RWORSetInflater(Replica replica, + RWORSetInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java index 9201d72c1..7a639809c 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java @@ -72,6 +72,7 @@ public CRDTStore(CRDTStoreOptions options) { storeExecutor = options.storeExecutor(); String[] tags = new String[] {"store.id", storeId}; inflaterFactory = new CausalCRDTInflaterFactory( + options.id(), options.inflationInterval(), options.orHistoryExpireTime(), options.maxCompactionTime(), diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java index 35f3d678c..a7672009e 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java @@ -38,7 +38,7 @@ public static String print(Replica replica) { } public static Supplier toPrintable(Replica replica) { - return () -> replica.getUri() + "-" + BaseEncoding.base32().encode(replica.getId().toByteArray()); + return () -> replica.getUri() + "-" + replica.hashCode(); } public static Supplier toPrintable(DeltaMessage delta) { diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java index 398c4a70a..6adbba88f 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java @@ -14,20 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.benchmark; import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; -import org.apache.bifromq.basecrdt.core.internal.CausalCRDTInflaterFactory; import com.google.protobuf.ByteString; import java.io.IOException; import java.nio.ByteBuffer; import java.time.Duration; import java.util.concurrent.Executors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecrdt.core.internal.CausalCRDTInflaterFactory; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.runner.Runner; @@ -42,7 +42,7 @@ public abstract class CRDTBenchmarkTemplate { @Setup public void setup() throws IOException { - inflaterFactory = new CausalCRDTInflaterFactory( + inflaterFactory = new CausalCRDTInflaterFactory("testStoreId", Duration.ofMillis(200), Duration.ofSeconds(20), Duration.ofMillis(200), Executors.newSingleThreadScheduledExecutor()); doSetup(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java index c8e7d12e5..98293ad94 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java @@ -25,12 +25,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.AWORSetOperation; -import org.apache.bifromq.basecrdt.core.api.IAWORSet; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.time.Duration; +import org.apache.bifromq.basecrdt.core.api.AWORSetOperation; +import org.apache.bifromq.basecrdt.core.api.IAWORSet; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; public class AWORSetTest extends CRDTTest { @@ -48,9 +48,9 @@ public class AWORSetTest extends CRDTTest { @Test public void testOperation() { - AWORSetInflater aworSetInflater = - new AWORSetInflater(leftReplica, newStateLattice(leftReplica, 1000), - executor, Duration.ofMillis(100)); + AWORSetInflater aworSetInflater = new AWORSetInflater("testStore", leftReplica, + newStateLattice(leftReplica, 1000), + executor, Duration.ofMillis(100)); IAWORSet aworSet = aworSetInflater.getCRDT(); assertEquals(aworSet.id(), leftReplica); @@ -82,11 +82,11 @@ public void testOperation() { @Test public void testJoin() { - AWORSetInflater leftInflater = new AWORSetInflater(leftReplica, + AWORSetInflater leftInflater = new AWORSetInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IAWORSet left = leftInflater.getCRDT(); - AWORSetInflater rightInflater = new AWORSetInflater(rightReplica, + AWORSetInflater rightInflater = new AWORSetInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100)); IAWORSet right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java index e0ffbf94f..6352f320a 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java @@ -24,14 +24,14 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.CCounterOperation; -import org.apache.bifromq.basecrdt.core.api.ICCounter; -import org.apache.bifromq.basecrdt.proto.Replacement; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.observers.TestObserver; import java.time.Duration; import java.util.Optional; +import org.apache.bifromq.basecrdt.core.api.CCounterOperation; +import org.apache.bifromq.basecrdt.core.api.ICCounter; +import org.apache.bifromq.basecrdt.proto.Replacement; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; public class CCounterTest extends CRDTTest { @@ -46,7 +46,7 @@ public class CCounterTest extends CRDTTest { @Test public void testOperation() { - CCounterInflater cctrInflater = new CCounterInflater(leftReplica, + CCounterInflater cctrInflater = new CCounterInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); ICCounter cctr = cctrInflater.getCRDT(); assertEquals(cctr.id(), leftReplica); @@ -73,11 +73,11 @@ public void testOperation() { @Test public void testJoin() { - CCounterInflater leftInflater = new CCounterInflater(leftReplica, + CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica, newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100)); ICCounter left = leftInflater.getCRDT(); - CCounterInflater rightInflater = new CCounterInflater(rightReplica, + CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica, newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100)); ICCounter right = rightInflater.getCRDT(); @@ -101,11 +101,11 @@ public void testJoin() { @Test public void testZeroOut() { - CCounterInflater leftInflater = new CCounterInflater(leftReplica, + CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica, newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100)); ICCounter left = leftInflater.getCRDT(); - CCounterInflater rightInflater = new CCounterInflater(rightReplica, + CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica, newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100)); ICCounter right = rightInflater.getCRDT(); @@ -129,11 +129,11 @@ public void testZeroOut() { @Test public void testZeroOutInBatch() { - CCounterInflater leftInflater = new CCounterInflater(leftReplica, + CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica, newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100)); ICCounter left = leftInflater.getCRDT(); - CCounterInflater rightInflater = new CCounterInflater(rightReplica, + CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica, newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100)); ICCounter right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java index 1bd623184..5510283ee 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java @@ -19,8 +19,6 @@ package org.apache.bifromq.basecrdt.core.internal; -import org.apache.bifromq.basecrdt.proto.Replacement; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.common.util.concurrent.MoreExecutors; import java.time.Duration; import java.util.Optional; @@ -28,6 +26,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import org.apache.bifromq.basecrdt.proto.Replacement; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -45,7 +45,7 @@ public void tearDown() { } protected IReplicaStateLattice newStateLattice(Replica ownerReplica, long historyDurationInMS) { - return new InMemReplicaStateLattice(ownerReplica, + return new InMemReplicaStateLattice("storeId", ownerReplica, Duration.ofMillis(historyDurationInMS), Duration.ofMillis(200)); } diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java index 6f645d657..8a15a977a 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; @@ -25,12 +25,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.DWFlagOperation; -import org.apache.bifromq.basecrdt.core.api.IDWFlag; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.protobuf.ByteString; import java.time.Duration; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecrdt.core.api.DWFlagOperation; +import org.apache.bifromq.basecrdt.core.api.IDWFlag; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; @Slf4j @@ -46,7 +46,7 @@ public class DWFlagTest extends CRDTTest { @Test public void testOperation() { - DWFlagInflater dwFlagInflater = new DWFlagInflater(leftReplica, + DWFlagInflater dwFlagInflater = new DWFlagInflater("testStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IDWFlag dwFlag = dwFlagInflater.getCRDT(); assertEquals(dwFlag.id(), leftReplica); @@ -65,12 +65,12 @@ public void testOperation() { @Test public void testJoin() { - DWFlagInflater leftInflater = new DWFlagInflater(leftReplica, + DWFlagInflater leftInflater = new DWFlagInflater("store1", leftReplica, newStateLattice(leftReplica, 1000000), executor, Duration.ofMillis(100)); IDWFlag left = leftInflater.getCRDT(); - DWFlagInflater rightInflater = new DWFlagInflater(rightReplica, + DWFlagInflater rightInflater = new DWFlagInflater("store2", rightReplica, newStateLattice(rightReplica, 1000000), executor, Duration.ofMillis(100)); IDWFlag right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java index 86bdf261a..8e6f216e6 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; @@ -25,13 +25,13 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.EWFlagOperation; -import org.apache.bifromq.basecrdt.core.api.IEWFlag; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.observers.TestObserver; import java.time.Duration; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecrdt.core.api.EWFlagOperation; +import org.apache.bifromq.basecrdt.core.api.IEWFlag; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; @Slf4j @@ -47,7 +47,7 @@ public class EWFlagTest extends CRDTTest { @Test public void testOperation() { - EWFlagInflater ewFlagInflater = new EWFlagInflater(leftReplica, + EWFlagInflater ewFlagInflater = new EWFlagInflater("testStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IEWFlag ewFlag = ewFlagInflater.getCRDT(); @@ -67,11 +67,11 @@ public void testOperation() { @Test public void testJoin() { - EWFlagInflater leftInflater = new EWFlagInflater(leftReplica, + EWFlagInflater leftInflater = new EWFlagInflater("lestStore", leftReplica, newStateLattice(leftReplica, 1000000), executor, Duration.ofMillis(100)); IEWFlag left = leftInflater.getCRDT(); - EWFlagInflater rightInflater = new EWFlagInflater(rightReplica, + EWFlagInflater rightInflater = new EWFlagInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000000), executor, Duration.ofMillis(100)); IEWFlag right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java index 3ce126904..6c0c91805 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java @@ -14,46 +14,47 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; -import static org.apache.bifromq.basecrdt.core.internal.EventHistoryUtil.isRemembering; -import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.dot; -import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacement; -import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacements; -import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.singleDot; import static com.google.common.collect.Lists.newArrayList; import static com.google.common.collect.Sets.newHashSet; import static com.google.protobuf.ByteString.copyFromUtf8; import static java.util.Collections.emptyMap; import static java.util.Collections.singleton; +import static org.apache.bifromq.basecrdt.core.internal.EventHistoryUtil.isRemembering; +import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.dot; +import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacement; +import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacements; +import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.singleDot; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.proto.Replacement; -import org.apache.bifromq.basecrdt.proto.Replica; -import org.apache.bifromq.basecrdt.proto.StateLattice; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.time.Duration; import java.util.List; import java.util.Optional; import java.util.Set; +import org.apache.bifromq.basecrdt.proto.Replacement; +import org.apache.bifromq.basecrdt.proto.Replica; +import org.apache.bifromq.basecrdt.proto.StateLattice; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class InMemReplicaStateLatticeTest { - private InMemReplicaStateLattice testLattice; private final Replica ownerReplica = Replica.newBuilder().setId(copyFromUtf8("Owner")).build(); private final ByteString replicaA = copyFromUtf8("A"); private final ByteString replicaB = copyFromUtf8("B"); + private InMemReplicaStateLattice testLattice; @BeforeMethod public void setup() { - testLattice = new InMemReplicaStateLattice(ownerReplica, Duration.ofMillis(1000), Duration.ofMillis(200)); + testLattice = new InMemReplicaStateLattice("storeId", ownerReplica, Duration.ofMillis(1000), + Duration.ofMillis(200)); assertFalse(testLattice.lattices().hasNext()); } @@ -495,7 +496,7 @@ public void testCompact6() throws InterruptedException { } @Test - public void compact7() throws InterruptedException { + public void testCompact7() throws InterruptedException { Set states = newHashSet( replacement(dot(replicaA, 4, singleDot(replicaA, 4)), dot(replicaA, 3), dot(replicaA, 1)), replacement(dot(replicaA, 2), dot(replicaA, 1))); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java index 3f18b2793..cb99c3ea6 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java @@ -19,19 +19,19 @@ package org.apache.bifromq.basecrdt.core.internal; +import static java.util.Collections.emptyIterator; import static org.apache.bifromq.basecrdt.core.api.CRDTURI.toURI; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; -import static java.util.Collections.emptyIterator; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; -import org.apache.bifromq.basecrdt.core.api.IMVReg; -import org.apache.bifromq.basecrdt.core.api.MVRegOperation; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.time.Duration; +import org.apache.bifromq.basecrdt.core.api.IMVReg; +import org.apache.bifromq.basecrdt.core.api.MVRegOperation; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; public class MVRegTest extends CRDTTest { @@ -49,7 +49,7 @@ public class MVRegTest extends CRDTTest { @Test public void testOperation() { - MVRegInflater mvRegInflater = new MVRegInflater(leftReplica, + MVRegInflater mvRegInflater = new MVRegInflater("storeId", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IMVReg mvReg = mvRegInflater.getCRDT(); assertEquals(mvReg.id(), leftReplica); @@ -67,11 +67,11 @@ public void testOperation() { @Test public void testJoin() { - MVRegInflater leftInflater = new MVRegInflater(leftReplica, newStateLattice(leftReplica, 10000), + MVRegInflater leftInflater = new MVRegInflater("leftStore", leftReplica, newStateLattice(leftReplica, 10000), executor, Duration.ofMillis(100)); IMVReg left = leftInflater.getCRDT(); - MVRegInflater rightInflater = new MVRegInflater(rightReplica, newStateLattice(rightReplica, 10000), + MVRegInflater rightInflater = new MVRegInflater("rightStore", rightReplica, newStateLattice(rightReplica, 10000), executor, Duration.ofMillis(100)); IMVReg right = rightInflater.getCRDT(); @@ -96,11 +96,11 @@ public void testJoin() { @Test public void testJoin1() throws InterruptedException { - MVRegInflater leftInflater = new MVRegInflater(leftReplica, newStateLattice(leftReplica, 1000), + MVRegInflater leftInflater = new MVRegInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IMVReg left = leftInflater.getCRDT(); - MVRegInflater rightInflater = new MVRegInflater(rightReplica, newStateLattice(rightReplica, 1000), + MVRegInflater rightInflater = new MVRegInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100)); IMVReg right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java index 54afb219a..fd5c5ecc3 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java @@ -19,15 +19,23 @@ package org.apache.bifromq.basecrdt.core.internal; +import static java.util.Collections.emptySet; import static org.apache.bifromq.basecrdt.core.api.CRDTURI.toURI; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.ormap; -import static java.util.Collections.emptySet; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertTrue; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.disposables.Disposable; +import java.time.Duration; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecrdt.core.api.AWORSetOperation; import org.apache.bifromq.basecrdt.core.api.CCounterOperation; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; @@ -44,14 +52,6 @@ import org.apache.bifromq.basecrdt.core.api.ORMapOperation; import org.apache.bifromq.basecrdt.core.api.RWORSetOperation; import org.apache.bifromq.basecrdt.proto.Replica; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.disposables.Disposable; -import java.time.Duration; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import lombok.extern.slf4j.Slf4j; import org.testng.annotations.Test; @Slf4j @@ -76,7 +76,7 @@ public class ORMapTest extends CRDTTest { @Test public void testOperation() { - ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000), + ORMapInflater orMapInflater = new ORMapInflater("storeId", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap ormap = orMapInflater.getCRDT(); assertEquals(ormap.id(), leftReplica); @@ -191,11 +191,11 @@ public void testOperation() { @Test public void testJoin() { - ORMapInflater leftInflater = new ORMapInflater(leftReplica, + ORMapInflater leftInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap leftMap = leftInflater.getCRDT(); - ORMapInflater rightInflater = new ORMapInflater(rightReplica, + ORMapInflater rightInflater = new ORMapInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100)); IORMap rightMap = rightInflater.getCRDT(); @@ -286,11 +286,11 @@ public void testJoin() { @Test public void testJoinAfterCompaction() throws InterruptedException { - ORMapInflater leftInflater = new ORMapInflater(leftReplica, + ORMapInflater leftInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap leftMap = leftInflater.getCRDT(); - ORMapInflater rightInflater = new ORMapInflater(rightReplica, + ORMapInflater rightInflater = new ORMapInflater("rightStore", rightReplica, newStateLattice(rightReplica, 100), executor, Duration.ofMillis(100)); IORMap rightMap = rightInflater.getCRDT(); @@ -318,7 +318,7 @@ public void testJoinAfterCompaction() throws InterruptedException { @Test public void testSubCRDTGC() { - ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000), + ORMapInflater orMapInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap orMap = orMapInflater.getCRDT(); @@ -344,7 +344,7 @@ public void testSubCRDTGC() { @Test public void testInflationSubscriptionWhenGC() { - ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000), + ORMapInflater orMapInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap orMap = orMapInflater.getCRDT(); AtomicInteger inflationCount = new AtomicInteger(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java index 4d3320501..e677b8dda 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; @@ -25,12 +25,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.IRWORSet; -import org.apache.bifromq.basecrdt.core.api.RWORSetOperation; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.time.Duration; +import org.apache.bifromq.basecrdt.core.api.IRWORSet; +import org.apache.bifromq.basecrdt.core.api.RWORSetOperation; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; public class RWORSetTest extends CRDTTest { @@ -49,7 +49,7 @@ public class RWORSetTest extends CRDTTest { @Test public void testOperation() { RWORSetInflater rworSetInflater = - new RWORSetInflater(leftReplica, newStateLattice(leftReplica, 1000), + new RWORSetInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IRWORSet rworSet = rworSetInflater.getCRDT(); assertEquals(rworSet.id(), leftReplica); @@ -82,11 +82,11 @@ public void testOperation() { @Test public void testJoin() { - RWORSetInflater leftInflater = new RWORSetInflater(leftReplica, + RWORSetInflater leftInflater = new RWORSetInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IRWORSet left = leftInflater.getCRDT(); - RWORSetInflater rightInflater = new RWORSetInflater(rightReplica, + RWORSetInflater rightInflater = new RWORSetInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100)); IRWORSet right = rightInflater.getCRDT(); diff --git a/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java b/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java index 32c2dd5e3..6d418ce5d 100644 --- a/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java +++ b/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java @@ -74,12 +74,20 @@ private void logWithMDC(Supplier> isEnabled, if (lvl.isEmpty()) { return; } + Object[] evaluated = args; + if (args != null && args.length > 0) { + evaluated = new Object[args.length]; + for (int i = 0; i < args.length; i++) { + Object a = args[i]; + evaluated[i] = (a instanceof Supplier) ? ((Supplier) a).get() : a; + } + } for (int i = 0; i < tags.length; i += 2) { MDC.put(tags[i], tags[i + 1]); } Map extraCtx = extraContext(); extraCtx.forEach(MDC::put); - delegate.log(marker, FQCN, lvl.get().toInt(), msg, args, t); + delegate.log(marker, FQCN, lvl.get().toInt(), msg, evaluated, t); for (int i = 0; i < tags.length; i += 2) { MDC.remove(tags[i]); } From 2fdf7898f474184a01561067a7a1cb9ded8c8a2c Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Tue, 19 Aug 2025 11:41:24 +0800 Subject: [PATCH 07/20] fixed a race condition which may cause pipeline graceful retargeting stuck --- .../bifromq/baserpc/client/BiDiStream.java | 6 +- .../bifromq/baserpc/client/ClientChannel.java | 22 +++--- .../baserpc/client/ManagedBiDiStream.java | 71 ++++++++++++------- .../TrafficDirectiveLoadBalancer.java | 44 ++++-------- .../server/AbstractResponsePipeline.java | 11 ++- 5 files changed, 77 insertions(+), 77 deletions(-) diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java index dcf7a1353..46b0b2c5b 100644 --- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java +++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java @@ -21,8 +21,6 @@ import static io.grpc.stub.ClientCalls.asyncBidiStreamingCall; -import org.apache.bifromq.baserpc.RPCContext; -import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject; import io.grpc.CallOptions; import io.grpc.Channel; import io.grpc.Context; @@ -36,10 +34,11 @@ import java.util.concurrent.atomic.AtomicBoolean; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.baserpc.RPCContext; +import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject; @Slf4j class BiDiStream implements IBiDiStream { - private final String tenantId; private final String serverId; private final ClientCallStreamObserver callStreamObserver; private final Subject outSubject = PublishSubject.create(); @@ -54,7 +53,6 @@ class BiDiStream implements IBiDiStream { MethodDescriptor methodDescriptor, Map metadata, CallOptions callOptions) { - this.tenantId = tenantId; this.serverId = serverId; Context ctx = Context.ROOT.fork() .withValue(RPCContext.TENANT_ID_CTX_KEY, tenantId) diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java index f753ba63b..e4dc3a4d4 100644 --- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java +++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java @@ -19,16 +19,6 @@ package org.apache.bifromq.baserpc.client; -import org.apache.bifromq.baseenv.EnvProvider; -import org.apache.bifromq.baseenv.NettyEnv; -import org.apache.bifromq.baserpc.BluePrint; -import org.apache.bifromq.baserpc.client.interceptor.TenantAwareClientInterceptor; -import org.apache.bifromq.baserpc.client.loadbalancer.IServerSelector; -import org.apache.bifromq.baserpc.client.loadbalancer.TrafficDirectiveLoadBalancerProvider; -import org.apache.bifromq.baserpc.client.nameresolver.TrafficGovernorNameResolverProvider; -import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject; -import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceLandscape; -import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; import com.google.common.util.concurrent.MoreExecutors; import io.grpc.Channel; import io.grpc.ConnectivityState; @@ -50,6 +40,16 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import lombok.Builder; +import org.apache.bifromq.baseenv.EnvProvider; +import org.apache.bifromq.baseenv.NettyEnv; +import org.apache.bifromq.baserpc.BluePrint; +import org.apache.bifromq.baserpc.client.interceptor.TenantAwareClientInterceptor; +import org.apache.bifromq.baserpc.client.loadbalancer.IServerSelector; +import org.apache.bifromq.baserpc.client.loadbalancer.TrafficDirectiveLoadBalancerProvider; +import org.apache.bifromq.baserpc.client.nameresolver.TrafficGovernorNameResolverProvider; +import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject; +import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceLandscape; +import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; class ClientChannel implements IClientChannel { private final String serviceUniqueName; @@ -136,7 +136,7 @@ public Observable>> serverList() { @Override public Observable serverSelectorObservable() { - return serverSelectorSubject; + return serverSelectorSubject.distinctUntilChanged(); } @Override diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java index 27f63fcfa..03159dcd3 100644 --- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java +++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java @@ -116,14 +116,14 @@ final boolean isReady() { abstract void onServiceUnavailable(); private void reportNoServerAvailable() { - log.debug("Stream@{} no server available to target: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} no server available to target: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); onNoServerAvailable(); } private void reportServiceUnavailable() { - log.debug("Stream@{} service unavailable to target: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} service unavailable to target: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); onServiceUnavailable(); } @@ -246,12 +246,12 @@ void close() { private void gracefulRetarget() { if (state.compareAndSet(State.Normal, State.PendingRetarget)) { - log.debug("Stream@{} start graceful retarget process: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} start graceful retarget process: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); if (prepareRetarget()) { // if it's ready to retarget, close it and start a new one - log.debug("Stream@{} close current bidi-stream immediately before retargeting: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} close current bidi-stream immediately before retargeting: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); state.set(State.Retargeting); bidiStream.get().close(); scheduleRetargetNow(); @@ -272,10 +272,12 @@ private void scheduleRetargetNow() { private void scheduleRetarget(Duration delay) { if (retargetScheduled.compareAndSet(false, true)) { - log.debug("Stream@{} schedule retarget task in {}ms: method={}", - this.hashCode(), delay.toMillis(), methodDescriptor.getBareMethodName()); - CompletableFuture.runAsync(() -> retarget(this.serverSelector), - CompletableFuture.delayedExecutor(delay.toMillis(), MILLISECONDS)); + log.debug("Stream@{} schedule retarget task in {}ms: method={}, state={}", + this.hashCode(), delay.toMillis(), methodDescriptor.getBareMethodName(), state.get()); + CompletableFuture.runAsync(() -> { + retargetScheduled.set(false); + retarget(this.serverSelector); + }, CompletableFuture.delayedExecutor(delay.toMillis(), MILLISECONDS)); } } @@ -328,7 +330,6 @@ private void retarget(IServerSelector serverSelector) { } } } - retargetScheduled.set(false); if (serverSelector != this.serverSelector) { // server selector has been changed, schedule a retarget scheduleRetargetNow(); @@ -338,10 +339,11 @@ private void retarget(IServerSelector serverSelector) { private void target(String serverId) { if (state.compareAndSet(State.Init, State.Normal) || state.compareAndSet(State.StreamDisconnect, State.Normal) + || state.compareAndSet(State.PendingRetarget, State.Normal) || state.compareAndSet(State.NoServerAvailable, State.Normal) || state.compareAndSet(State.Retargeting, State.Normal)) { - log.debug("Stream@{} build bidi-stream to target server[{}]: method={}", - this.hashCode(), serverId, methodDescriptor.getBareMethodName()); + log.debug("Stream@{} build stream to server[{}]: method={}, state={}", + this.hashCode(), serverId, methodDescriptor.getBareMethodName(), state.get()); BidiStreamContext bidiStreamContext = BidiStreamContext.from(new BiDiStream<>( tenantId, serverId, @@ -350,13 +352,13 @@ private void target(String serverId) { metadataSupplier.get(), callOptions)); bidiStream.set(bidiStreamContext); - onStreamCreated(); bidiStreamContext.subscribe(this::onNext, this::onError, this::onCompleted); bidiStreamContext.onReady(ts -> onStreamReady()); + onStreamCreated(); } if (bidiStream.get().bidiStream().isReady()) { - log.debug("Stream@{} ready after build to server[{}]: method={}", - this.hashCode(), serverId, methodDescriptor.getBareMethodName()); + log.debug("Stream@{} ready: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); onStreamReady(); } } @@ -367,8 +369,8 @@ private void onNext(OutT out) { if (state.get() == State.PendingRetarget && canStartRetarget()) { // do not close the stream inline CompletableFuture.runAsync(() -> { - log.debug("Stream@{} close current bidi-stream before retargeting: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} close current stream before retargeting: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); state.set(State.Retargeting); bidiStream.get().close(); scheduleRetargetNow(); @@ -377,18 +379,33 @@ private void onNext(OutT out) { } private void onError(Throwable t) { - log.debug("BidiStream@{} error: method={}", this.hashCode(), methodDescriptor.getBareMethodName(), t); - state.compareAndSet(State.Normal, State.StreamDisconnect); + log.debug("Stream@{} error: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get(), t); + State s = state.get(); + if (s == State.Normal || s == State.PendingRetarget) { + state.compareAndSet(s, State.StreamDisconnect); + } onStreamError(t); - scheduleRetargetWithRandomDelay(); + if (s == State.PendingRetarget) { + scheduleRetargetNow(); + } else { + scheduleRetargetWithRandomDelay(); + } } private void onCompleted() { - log.debug("BidiStream@{} complete: method={}", this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} close by server: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); // server gracefully close the stream - state.compareAndSet(State.Normal, State.StreamDisconnect); - onStreamError(new CancellationException("server close the bidi-stream")); - scheduleRetargetWithRandomDelay(); + State s = state.get(); + if (s == State.Normal || s == State.PendingRetarget) { + state.compareAndSet(s, State.StreamDisconnect); + } + onStreamError(new CancellationException("Server shutdown")); + if (s == State.PendingRetarget) { + scheduleRetargetNow(); + } + // wait for selector change to trigger retargeting } enum State { diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java index 2539d9cdc..1f6c38572 100644 --- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java +++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java @@ -19,17 +19,16 @@ package org.apache.bifromq.baserpc.client.loadbalancer; -import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.IN_PROC_SERVER_ATTR_KEY; -import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_GROUP_TAG_ATTR_KEY; -import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_ID_ATTR_KEY; import static com.google.common.base.Preconditions.checkNotNull; import static io.grpc.ConnectivityState.CONNECTING; import static io.grpc.ConnectivityState.IDLE; import static io.grpc.ConnectivityState.READY; import static io.grpc.ConnectivityState.SHUTDOWN; import static io.grpc.ConnectivityState.TRANSIENT_FAILURE; +import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.IN_PROC_SERVER_ATTR_KEY; +import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_GROUP_TAG_ATTR_KEY; +import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_ID_ATTR_KEY; -import org.apache.bifromq.baseenv.EnvProvider; import com.google.common.collect.Maps; import io.grpc.Attributes; import io.grpc.ConnectivityState; @@ -48,6 +47,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.IntStream; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.baseenv.EnvProvider; @Slf4j public class TrafficDirectiveLoadBalancer extends LoadBalancer { @@ -72,6 +72,12 @@ public class TrafficDirectiveLoadBalancer extends LoadBalancer { this.currentPicker = new SubChannelPicker(); } + private static Set difference(Set a, Set b) { + Set aCopy = new HashSet<>(a); + aCopy.removeAll(b); + return aCopy; + } + @Override public void handleResolvedAddresses(ResolvedAddresses resolvedAddresses) { log.debug("Handle traffic change: resolvedAddresses={}", resolvedAddresses); @@ -183,25 +189,11 @@ private void updateBalancingState() { currentPicker.refresh(serverChannels); helper.updateBalancingState(newState, currentPicker); - Map allServers = currentServers; - ITenantRouter tenantRouter = - new TenantRouter(currentServers, currentTrafficDirective, currentServerGroupTags); - updateListener.onUpdate(new IServerSelector() { - @Override - public boolean exists(String serverId) { - return allServers.containsKey(serverId); - } - - @Override - public IServerGroupRouter get(String tenantId) { - return tenantRouter.get(tenantId); - } - - @Override - public String toString() { - return allServers.toString(); - } - }); + if (newState == READY || (newState == TRANSIENT_FAILURE && currentServers.isEmpty())) { + // notify when channel is ready or TRANSIENT_FAILURE state and no servers available + updateListener.onUpdate( + new TenantAwareServerSelector(currentServers, currentServerGroupTags, currentTrafficDirective)); + } } balancingStateUpdateScheduled.set(false); } @@ -275,10 +267,4 @@ private void updateSubChannelState(Subchannel subchannel, ConnectivityStateInfo subchannel.requestConnection(); } } - - private static Set difference(Set a, Set b) { - Set aCopy = new HashSet<>(a); - aCopy.removeAll(b); - return aCopy; - } } diff --git a/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java b/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java index 2f740f0c4..7439c173d 100644 --- a/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java +++ b/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java @@ -14,13 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.baserpc.server; -import org.apache.bifromq.base.util.FutureTracker; -import org.apache.bifromq.baserpc.metrics.RPCMetric; import io.grpc.Status; import io.grpc.stub.StreamObserver; import io.micrometer.core.instrument.Timer; @@ -28,6 +26,8 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicBoolean; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.FutureTracker; +import org.apache.bifromq.baserpc.metrics.RPCMetric; @Slf4j abstract class AbstractResponsePipeline extends AbstractStreamObserver { @@ -40,6 +40,7 @@ abstract class AbstractResponsePipeline extends AbstractStr protected AbstractResponsePipeline(StreamObserver responseObserver) { super(responseObserver); + this.responseObserver.setOnCancelHandler(this::cleanup); } @Override @@ -80,7 +81,7 @@ private void close(Throwable t) { /** * Handle the request and return the result via completable future, remember always throw exception asynchronously - * Returned future complete exceptionally will cause pipeline close + * Returned future complete exceptionally will cause pipeline close. * * @param tenantId the tenantId * @param request the request @@ -110,7 +111,6 @@ final CompletableFuture startHandlingRequest(RequestT request) { return respFuture; } - final void emitResponse(RequestT req, ResponseT resp) { if (!isClosed()) { log.trace("Response sent in pipeline@{}: request={}, response={}", hashCode(), req, resp); @@ -125,7 +125,6 @@ final void emitResponse(RequestT req, ResponseT resp) { protected void afterClose() { } - private void fail(Throwable throwable) { if (!isClosed()) { if (throwable instanceof CancellationException) { From a575ce5baf2a703e513ea09f287614050ccec0eb Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Tue, 19 Aug 2025 11:57:13 +0800 Subject: [PATCH 08/20] 1. reduce HostMemberList sync overhead during failure broadcast 2. expose refute signal to upper layer so it could be used to trigger CRDT refresh to speedup convergence. --- .../apache/bifromq/basecluster/AgentHost.java | 43 +++++++++------- .../bifromq/basecluster/IAgentHost.java | 20 +++++--- .../memberlist/HostMemberList.java | 10 ++++ .../memberlist/IHostMemberList.java | 8 +++ .../basecluster/memberlist/agent/Agent.java | 47 ++++++++++++----- .../memberlist/agent/AgentMember.java | 43 ++++++++++------ .../basecluster/memberlist/agent/IAgent.java | 29 ++++++++--- .../memberlist/agent/IAgentMember.java | 51 ++++++++++--------- .../bifromq/basecluster/AgentHostsTest.java | 2 +- .../bifromq/basecluster/AgentTestCluster.java | 26 +++++----- .../bifromq/basecrdt/service/CRDTService.java | 5 ++ .../basecrdt/service/ICRDTService.java | 17 +++++-- .../metaservice/BaseKVLandscapeCRDT.java | 5 ++ .../metaservice/BaseKVLandscapeReporter.java | 5 ++ .../BaseKVStoreBalancerStatesCRDT.java | 5 ++ .../BaseKVStoreBalancerStatesReporter.java | 5 ++ .../metaservice/IBaseKVLandscapeCRDT.java | 6 +++ .../metaservice/IBaseKVLandscapeReporter.java | 8 +++ .../IBaseKVStoreBalancerStatesCRDT.java | 3 ++ .../IBaseKVStoreBalancerStatesReporter.java | 8 +++ .../balance/KVStoreBalanceController.java | 9 ++++ .../balance/KVStoreBalanceControllerTest.java | 14 +++++ .../basekv/server/BaseKVStoreService.java | 2 + .../RPCServiceTrafficManager.java | 47 +++++++++-------- 24 files changed, 292 insertions(+), 126 deletions(-) diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java index 1633b3cf5..d74230271 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java @@ -14,14 +14,31 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; -import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI; import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.protobuf.ByteString; +import io.micrometer.core.instrument.Metrics; +import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.core.Scheduler; +import io.reactivex.rxjava3.disposables.CompositeDisposable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import java.net.InetSocketAddress; +import java.time.Duration; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; import org.apache.bifromq.basecluster.fd.FailureDetector; import org.apache.bifromq.basecluster.fd.IFailureDetector; @@ -43,23 +60,6 @@ import org.apache.bifromq.basecrdt.store.ICRDTStore; import org.apache.bifromq.basecrdt.store.proto.CRDTStoreMessage; import org.apache.bifromq.baseenv.EnvProvider; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.protobuf.ByteString; -import io.micrometer.core.instrument.Metrics; -import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; -import io.reactivex.rxjava3.core.Observable; -import io.reactivex.rxjava3.core.Scheduler; -import io.reactivex.rxjava3.disposables.CompositeDisposable; -import io.reactivex.rxjava3.schedulers.Schedulers; -import java.net.InetSocketAddress; -import java.time.Duration; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ScheduledThreadPoolExecutor; -import java.util.concurrent.atomic.AtomicReference; -import lombok.extern.slf4j.Slf4j; @Slf4j final class AgentHost implements IAgentHost { @@ -173,6 +173,11 @@ public Observable>> landscape() { return memberList.landscape(); } + @Override + public Observable refuteSignal() { + return memberList.refuteSignal(); + } + @Override public void close() { if (state.compareAndSet(State.STARTED, State.STOPPING)) { diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java index e804f4e55..d5b26aa47 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java @@ -14,11 +14,16 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; +import io.reactivex.rxjava3.core.Observable; +import java.net.InetSocketAddress; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basecluster.memberlist.HostAddressResolver; import org.apache.bifromq.basecluster.memberlist.IHostAddressResolver; import org.apache.bifromq.basecluster.memberlist.agent.IAgent; @@ -26,11 +31,6 @@ import org.apache.bifromq.basecluster.transport.ITransport; import org.apache.bifromq.basecluster.transport.TCPTransport; import org.apache.bifromq.basecluster.transport.Transport; -import io.reactivex.rxjava3.core.Observable; -import java.net.InetSocketAddress; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CompletableFuture; /** * Agent host defines the interface for hosting agents and joining the cluster. @@ -101,6 +101,14 @@ static IAgentHost newInstance(AgentHostOptions options) { */ Observable>> landscape(); + /** + * Emits a signal whenever the local host actively refutes a suspicion of being dead. + * Each emission carries the timestamp (in millis) when the refutation occurred. + * + * @return an observable stream of refutation timestamps + */ + Observable refuteSignal(); + /** * Shutdown the agent host. */ diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java index 765c8be67..25868a209 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java @@ -37,6 +37,7 @@ import io.reactivex.rxjava3.core.Scheduler; import io.reactivex.rxjava3.disposables.CompositeDisposable; import io.reactivex.rxjava3.subjects.BehaviorSubject; +import io.reactivex.rxjava3.subjects.PublishSubject; import java.net.InetSocketAddress; import java.util.HashSet; import java.util.Iterator; @@ -82,6 +83,7 @@ public class HostMemberList implements IHostMemberList { private final IHostAddressResolver addressResolver; private final BehaviorSubject> membershipSubject = BehaviorSubject.createDefault( new ConcurrentHashMap<>()); + private final PublishSubject refuteSubject = PublishSubject.create(); private final Map agentMap = new ConcurrentHashMap<>(); private final IORMap hostListCRDT; private final CompositeDisposable disposables = new CompositeDisposable(); @@ -235,6 +237,7 @@ public CompletableFuture stop() { .thenCompose(v -> store.stopHosting(hostListCRDT.id())) .whenComplete((v, e) -> { membershipSubject.onComplete(); + refuteSubject.onComplete(); metricManager.close(); state.set(State.QUITED); }); @@ -254,6 +257,8 @@ private void renew(int atLeastIncarnation) { synchronized (this) { local = local.toBuilder().setIncarnation(Math.max(local.getIncarnation(), atLeastIncarnation) + 1).build(); join(local); + agentMap.values().forEach(Agent::refreshRegistration); + refuteSubject.onNext(HLC.INST.get()); } } @@ -307,6 +312,11 @@ public Observable>> landscape() { return membershipSubject.map(m -> Maps.transformValues(m, v -> v.getAgentMap().keySet())); } + @Override + public Observable refuteSignal() { + return refuteSubject; + } + private Map currentMembers() { return membershipSubject.getValue(); } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java index 23bd05507..52b152fb8 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java @@ -80,4 +80,12 @@ public interface IHostMemberList { * @return the observable */ Observable>> landscape(); + + /** + * Emits a signal whenever the local member actively refutes a suspicion of being dead. + * Each emission carries the timestamp (in millis) when the refutation occurred. + * + * @return an observable stream of refutation timestamps + */ + Observable refuteSignal(); } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java index 8a9e656f1..d1866ff9a 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java @@ -14,21 +14,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; -import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; import static java.util.Collections.emptyMap; +import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; -import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecrdt.core.api.IORMap; -import org.apache.bifromq.basecrdt.core.api.ORMapOperation; -import org.apache.bifromq.basecrdt.proto.Replica; -import org.apache.bifromq.basecrdt.store.ICRDTStore; import com.google.common.collect.Sets; import com.google.protobuf.AbstractMessageLite; import io.micrometer.core.instrument.Gauge; @@ -50,13 +43,18 @@ import java.util.function.Supplier; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; +import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; +import org.apache.bifromq.basecrdt.core.api.IORMap; +import org.apache.bifromq.basecrdt.core.api.ORMapOperation; +import org.apache.bifromq.basecrdt.proto.Replica; +import org.apache.bifromq.basecrdt.store.ICRDTStore; @Slf4j public final class Agent implements IAgent { - private enum State { - JOINED, QUITTING, QUITED - } - private final ReadWriteLock quitLock = new ReentrantReadWriteLock(); private final String agentId; private final AgentEndpoint localEndpoint; @@ -140,6 +138,11 @@ public CompletableFuture deregister(IAgentMember member) { }); } + @Override + public void refreshRegistration() { + localMemberRegistry.values().forEach(AgentMember::refresh); + } + public CompletableFuture quit() { Lock writeLock = quitLock.writeLock(); try { @@ -194,7 +197,8 @@ private void handleAgentEndpointsUpdate(Set agentEndpoints) { .setEndpoint(memberAddr.getEndpoint()) .setIncarnation(memberAddr.getIncarnation()) .build(); - if (leftHosts.contains(agentEndpoint)) { + if (leftHosts.contains(agentEndpoint) + && shouldReportFailure(newAgentEndpoints, memberAddr.getEndpoint())) { agentCRDT.execute(ORMapOperation.remove(memberAddr.toByteString()).of(mvreg)); } } @@ -205,6 +209,17 @@ private void handleAgentEndpointsUpdate(Set agentEndpoints) { }); } + private boolean shouldReportFailure(Set allEndpoints, HostEndpoint failedMemberEndpoint) { + // if local member is responsible for removing the failed member from CRDT + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.getEndpoint().getId().asReadOnlyByteBuffer())) + .nodes(allEndpoints) + .build(); + AgentEndpoint reporter = hash.get(failedMemberEndpoint); + return reporter.getEndpoint().getId().equals(localEndpoint.getEndpoint().getId()); + } + private void skipRunIfNotJoined(Runnable runnable) { Lock readLock = quitLock.readLock(); try { @@ -231,4 +246,8 @@ private T runIfJoined(Supplier supplier) { readLock.unlock(); } } + + private enum State { + JOINED, QUITTING, QUITED + } } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/AgentMember.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/AgentMember.java index e36980ab5..6e5d6e776 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/AgentMember.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/AgentMember.java @@ -14,21 +14,13 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecluster.agent.proto.AgentMessage; -import org.apache.bifromq.basecluster.agent.proto.AgentMessageEnvelope; -import org.apache.bifromq.basecrdt.core.api.IORMap; -import org.apache.bifromq.basecrdt.core.api.MVRegOperation; -import org.apache.bifromq.basecrdt.core.api.ORMapOperation; -import org.apache.bifromq.basehlc.HLC; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.core.Scheduler; @@ -38,12 +30,23 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Supplier; import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecluster.agent.proto.AgentMessage; +import org.apache.bifromq.basecluster.agent.proto.AgentMessageEnvelope; +import org.apache.bifromq.basecrdt.core.api.IORMap; +import org.apache.bifromq.basecrdt.core.api.MVRegOperation; +import org.apache.bifromq.basecrdt.core.api.ORMapOperation; +import org.apache.bifromq.basehlc.HLC; +@Slf4j class AgentMember implements IAgentMember { private final AgentMemberAddr localAddr; private final IORMap agentCRDT; @@ -52,8 +55,9 @@ class AgentMember implements IAgentMember { private final PublishSubject agentMessageSubject = PublishSubject.create(); private final CompositeDisposable disposables = new CompositeDisposable(); private final ReadWriteLock destroyLock = new ReentrantReadWriteLock(); + private final AtomicReference metadata = new AtomicReference<>( + AgentMemberMetadata.newBuilder().setHlc(HLC.INST.get()).build()); private volatile boolean destroy = false; - private volatile AgentMemberMetadata metadata; AgentMember(AgentMemberAddr memberAddr, IORMap agentCRDT, @@ -64,7 +68,6 @@ class AgentMember implements IAgentMember { this.agentCRDT = agentCRDT; this.messenger = messenger; this.memberAddresses = memberAddresses; - metadata = AgentMemberMetadata.newBuilder().setHlc(HLC.INST.get()).build(); updateCRDT(); disposables.add(agentCRDT.inflation() .observeOn(scheduler) @@ -78,14 +81,14 @@ class AgentMember implements IAgentMember { @Override public AgentMemberMetadata metadata() { - return metadata; + return metadata.get(); } @Override public void metadata(ByteString value) { skipRunWhenDestroyed(() -> { - if (!metadata.getValue().equals(value)) { - metadata = AgentMemberMetadata.newBuilder().setValue(value).setHlc(HLC.INST.get()).build(); + if (!metadata.get().getValue().equals(value)) { + metadata.set(AgentMemberMetadata.newBuilder().setValue(value).setHlc(HLC.INST.get()).build()); updateCRDT(); } }); @@ -136,7 +139,7 @@ public CompletableFuture multicast(String targetMemberName, ByteString mes private void updateCRDT(long ts) { skipRunWhenDestroyed(() -> { Optional metaOnCRDT = CRDTUtil.getAgentMemberMetadata(agentCRDT, localAddr); - if (metaOnCRDT.isEmpty() || !metaOnCRDT.get().equals(metadata)) { + if (metaOnCRDT.isEmpty() || !metaOnCRDT.get().equals(metadata.get())) { updateCRDT(); } }); @@ -144,7 +147,7 @@ private void updateCRDT(long ts) { private void updateCRDT() { skipRunWhenDestroyed(() -> agentCRDT.execute(ORMapOperation.update(localAddr.toByteString()) - .with(MVRegOperation.write(metadata.toByteString())))); + .with(MVRegOperation.write(metadata.get().toByteString())))); } @Override @@ -152,6 +155,14 @@ public Observable receive() { return agentMessageSubject; } + @Override + public void refresh() { + skipRunWhenDestroyed(() -> { + metadata.set(metadata.get().toBuilder().setHlc(HLC.INST.get()).build()); + updateCRDT(); + }); + } + private void skipRunWhenDestroyed(Runnable runnable) { Lock readLock = destroyLock.readLock(); try { diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgent.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgent.java index dee044d59..87202e1a2 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgent.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgent.java @@ -14,19 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; -import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; import io.reactivex.rxjava3.core.Observable; import java.util.Map; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +/** + * The interface for an overlay agent cluster. + */ public interface IAgent { + /** + * The agent cluster id. + * + * @return the agent cluster id + */ String id(); /** @@ -39,7 +47,7 @@ public interface IAgent { /** * A hot observable of agent membership. * - * @return + * @return an observable that emits the current membership map */ Observable> membership(); @@ -47,14 +55,19 @@ public interface IAgent { * Register a local agent member. It's allowed to register same member name in same logical agent from different * agent hosts * - * @param memberName + * @param memberName the member name, should be unique in local host member */ IAgentMember register(String memberName); /** - * Deregister a member instance, the caller should never hold the reference to the instance after deregistered + * Deregister a member instance, the caller should never hold the reference to the instance after deregistered. * - * @param member + * @param member the member instance to deregister */ CompletableFuture deregister(IAgentMember member); + + /** + * Refresh the registration of the local agent member. + */ + void refreshRegistration(); } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgentMember.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgentMember.java index 3d921d779..69e3a1879 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgentMember.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgentMember.java @@ -14,17 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecluster.agent.proto.AgentMessage; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecluster.agent.proto.AgentMessage; public interface IAgentMember { AgentMemberAddr address(); @@ -32,50 +32,55 @@ public interface IAgentMember { /** * Broadcast a message among the agent members. * - * @param message - * @param reliable - * @return + * @param message the message to be sent + * @param reliable if true, the message will be sent reliably, otherwise it may be dropped + * @return a CompletableFuture that completes when the message is sent */ CompletableFuture broadcast(ByteString message, boolean reliable); /** - * Send a message to another member located in given endpoint + * Send a message to another member located in given endpoint. * - * @param targetMemberAddr - * @param message - * @param reliable - * @return + * @param targetMemberAddr the address of the target member + * @param message the message to be sent + * @param reliable if true, the message will be sent reliably, otherwise it may be dropped + * @return a CompletableFuture that completes when the message is sent */ CompletableFuture send(AgentMemberAddr targetMemberAddr, ByteString message, boolean reliable); /** - * Send a message to all endpoints where target member name is registered + * Send a message to all endpoints where target member name is registered. * - * @param targetMemberName - * @param message - * @param reliable - * @return + * @param targetMemberName the name of the target member + * @param message the message to be sent + * @param reliable if true, the message will be sent reliably, otherwise it may be dropped + * @return a CompletableFuture that completes when the message is sent */ CompletableFuture multicast(String targetMemberName, ByteString message, boolean reliable); /** - * Get current associated metadata + * Get current associated metadata. * - * @return + * @return the current metadata */ AgentMemberMetadata metadata(); /** - * Update associated metadata + * Update associated metadata. * - * @param value + * @param value the new metadata value */ void metadata(ByteString value); /** - * An observable of incoming messages + * An observable of incoming messages. * - * @return + * @return an observable that emits AgentMessage */ Observable receive(); + + /** + * Refresh the registration of the local agent member. + */ + void refresh(); } diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java index 0d2e2e98a..12c64d788 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java @@ -381,7 +381,7 @@ public void testAgentClusterPartitionAndHealing() { await().until(() -> agentOnS2.membership().blockingFirst().size() == 4); await().until(() -> agentOnS3.membership().blockingFirst().size() == 4); - // isolate s1 from others + // isolate s2 from others log.info("isolate s1"); storeMgr.isolate("s1"); await().forever().until(() -> agentOnS1.membership().blockingFirst().size() == 2); diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java index c827dbcf4..d6c74fdb5 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java @@ -14,17 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecluster.memberlist.HostAddressResolver; -import org.apache.bifromq.basecluster.memberlist.agent.IAgent; -import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; -import org.apache.bifromq.basecluster.transport.ITransport; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -40,14 +34,15 @@ import java.util.Set; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecluster.memberlist.HostAddressResolver; +import org.apache.bifromq.basecluster.memberlist.agent.IAgent; +import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; +import org.apache.bifromq.basecluster.transport.ITransport; @Slf4j public class AgentTestCluster { - @AllArgsConstructor - private static class AgentHostMeta { - final AgentHostOptions options; - } - private final MockNetwork network = new MockNetwork(); private final Map hostMetaMap = Maps.newConcurrentMap(); private final Map hostEndpointMap = Maps.newConcurrentMap(); @@ -55,7 +50,6 @@ private static class AgentHostMeta { private final Map hostMap = Maps.newConcurrentMap(); private final Map> inflationLogs = Maps.newConcurrentMap(); private final CompositeDisposable disposables = new CompositeDisposable(); - public AgentTestCluster() { } @@ -98,7 +92,6 @@ public void integrate(String hostId) { network.integrate(hostTransportMap.get(hostId)); } - public HostEndpoint endpoint(String hostId) { checkHost(hostId); return getHost(hostId).local(); @@ -155,4 +148,9 @@ public IAgentHost getHost(String hostId) { private void checkHost(String hostId) { Preconditions.checkArgument(hostEndpointMap.containsKey(hostId)); } + + @AllArgsConstructor + private static class AgentHostMeta { + final AgentHostOptions options; + } } diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java index 94213d86e..e74ee85d8 100644 --- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java +++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java @@ -115,6 +115,11 @@ public Observable> aliveCRDTs() { }); } + @Override + public Observable refreshSignal() { + return agentHost.refuteSignal(); + } + private CompletableFuture stopHostingInternal(String uri) { return hostedCRDT.remove(uri).close(); } diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java index 14b1d72db..97375ff37 100644 --- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java +++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java @@ -14,20 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.service; -import org.apache.bifromq.basecluster.IAgentHost; -import org.apache.bifromq.basecrdt.core.api.ICRDTOperation; -import org.apache.bifromq.basecrdt.core.api.ICausalCRDT; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; import java.util.Set; import java.util.concurrent.CompletableFuture; import lombok.NonNull; +import org.apache.bifromq.basecluster.IAgentHost; +import org.apache.bifromq.basecrdt.core.api.ICRDTOperation; +import org.apache.bifromq.basecrdt.core.api.ICausalCRDT; +import org.apache.bifromq.basecrdt.proto.Replica; /** * The CRDT service with decentralized membership management based on base-cluster. @@ -89,6 +89,13 @@ static ICRDTService newInstance(IAgentHost agentHost, @NonNull CRDTServiceOption */ Observable> aliveCRDTs(); + /** + * A signal to refresh the CRDT replica hosted in the service. + * + * @return an observable that emits refresh signal + */ + Observable refreshSignal(); + /** * Stop the store. */ diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java index cc6523180..3b9422aad 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java @@ -64,6 +64,11 @@ class BaseKVLandscapeCRDT implements IBaseKVLandscapeCRDT { .subscribe(landscapeSubject::onNext)); } + @Override + public Observable refreshSignal() { + return crdtService.refreshSignal(); + } + public Observable> aliveReplicas() { return crdtService.aliveReplicas(landscapeORMap.id().getUri()) .map(replicas -> replicas.stream().map(Replica::getId).collect(Collectors.toSet())); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java index 3c45e0a4c..0b1cb7610 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java @@ -57,6 +57,11 @@ public CompletableFuture report(KVRangeStoreDescriptor descriptor) { return CompletableFuture.completedFuture(null); } + @Override + public Observable refreshSignal() { + return landscapeCRDT.refreshSignal(); + } + @Override public void stop() { landscapeCRDT.removeDescriptor(storeId).join(); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java index f541c9d62..c1d485956 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java @@ -69,6 +69,11 @@ class BaseKVStoreBalancerStatesCRDT implements IBaseKVStoreBalancerStatesCRDT { .subscribe(balancerStatesSubject::onNext)); } + @Override + public Observable refuteSignal() { + return crdtService.refreshSignal(); + } + public Observable> aliveReplicas() { return crdtService.aliveReplicas(balancerStatesByStoreORMap.id().getUri()) .map(replicas -> replicas.stream().map(Replica::getId).collect(Collectors.toSet())); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java index c4a192d0a..7a5cf549d 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java @@ -64,6 +64,11 @@ public CompletableFuture reportBalancerState(String balancerFactoryClassFQ return CompletableFuture.completedFuture(null); } + @Override + public Observable refreshSignal() { + return statesCRDT.refuteSignal(); + } + @Override public void stop() { statesCRDT.removeStore(storeId).join(); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java index 1a58c7e94..721f7c33e 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java @@ -32,6 +32,12 @@ * The interface of a BaseKV landscape CRDT. */ public interface IBaseKVLandscapeCRDT { + /** + * A signal to refresh the landscape CRDT. + * + * @return the observable of the signal + */ + Observable refreshSignal(); /** * Get the observable of alive replicas of landscape CRDT. * diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java index 5bbdf5b7e..7b39b0c9b 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java @@ -19,6 +19,7 @@ package org.apache.bifromq.basekv.metaservice; +import io.reactivex.rxjava3.core.Observable; import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; @@ -33,6 +34,13 @@ public interface IBaseKVLandscapeReporter { */ CompletableFuture report(KVRangeStoreDescriptor descriptor); + /** + * A signal to refresh the landscape reporter's state. + * + * @return an observable that emits a timestamp when the reporter should refresh its state + */ + Observable refreshSignal(); + /** * Stop the reporter. */ diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java index a0e44b963..99960bef0 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java @@ -32,6 +32,9 @@ * The interface of a BaseKV store balancer states CRDT. */ public interface IBaseKVStoreBalancerStatesCRDT { + + Observable refuteSignal(); + Observable> aliveReplicas(); Observable>> currentBalancerStates(); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java index c4ca82670..9e6891f16 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java @@ -20,6 +20,7 @@ package org.apache.bifromq.basekv.metaservice; import com.google.protobuf.Struct; +import io.reactivex.rxjava3.core.Observable; import java.util.concurrent.CompletableFuture; /** @@ -36,6 +37,13 @@ public interface IBaseKVStoreBalancerStatesReporter { */ CompletableFuture reportBalancerState(String balancerFactoryClassFQN, boolean disable, Struct loadRules); + /** + * A signal to refresh the reporter's state. + * + * @return an observable that emits a timestamp when the reporter should refresh its state + */ + Observable refreshSignal(); + /** * Stop the reporter. */ diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java index 5e511d11a..3bf66c15a 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java @@ -171,6 +171,15 @@ public void start(String localStoreId) { trimRangeHistory(descriptors); trigger(); })); + disposables.add(statesReporter.refreshSignal() + .subscribe(ts -> { + for (Map.Entry entry : balancers.entrySet()) { + String balancerFacClassFQN = entry.getKey(); + StoreBalancerState balancerState = entry.getValue(); + statesReporter.reportBalancerState(balancerFacClassFQN, + balancerState.disabled.get(), balancerState.loadRules.get()); + } + })); } } diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java index e3f537967..0a7ba3335 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java @@ -79,6 +79,7 @@ public class KVStoreBalanceControllerTest { private static final String LOCAL_STORE_ID = "localStoreId"; private final PublishSubject> proposalSubject = PublishSubject.create(); private final PublishSubject> storeDescSubject = PublishSubject.create(); + private final PublishSubject refreshSignal = PublishSubject.create(); @Mock private IBaseKVMetaService metaService; @Mock @@ -103,6 +104,7 @@ public void setup() throws IOException { when(balancerFactory.newBalancer(eq(CLUSTER_ID), eq(LOCAL_STORE_ID))).thenReturn(storeBalancer); when(metaService.balancerStatesProposal(eq(CLUSTER_ID))).thenReturn(statesProposal); when(metaService.balancerStatesReporter(eq(CLUSTER_ID), eq(LOCAL_STORE_ID))).thenReturn(statesReporter); + when(statesReporter.refreshSignal()).thenReturn(refreshSignal); when(statesProposal.expectedBalancerStates()).thenReturn(proposalSubject); when(storeClient.describe()).thenReturn(storeDescSubject); executor = Executors.newScheduledThreadPool(1); @@ -400,6 +402,18 @@ public void testInvalidRules() { verify(statesReporter, never()).reportBalancerState(anyString(), anyBoolean(), any(Struct.class)); } + @Test + public void testRefreshSignal() { + reset(statesReporter); + refreshSignal.onNext(System.currentTimeMillis()); + verify(statesReporter, times(4)) + .reportBalancerState(anyString(), anyBoolean(), any(Struct.class)); + verify(statesReporter, times(1)) + .reportBalancerState(eq(balancerFactory.getClass().getName()), + eq(false), + eq(Struct.getDefaultInstance())); + } + private Set generateDescriptor(KVRangeId id, long ver) { List voters = Lists.newArrayList(LOCAL_STORE_ID, "store1"); List learners = Lists.newArrayList(); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java index b3fb57119..ab06de152 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java @@ -98,6 +98,8 @@ public void start() { landscapeReporter = metaService.landscapeReporter(clusterId, kvRangeStore.id()); // sync store descriptor via crdt disposables.add(kvRangeStore.describe().subscribe(landscapeReporter::report)); + disposables.add(landscapeReporter.refreshSignal() + .subscribe(ts -> landscapeReporter.report(kvRangeStore.describe().blockingFirst()))); log.debug("BaseKVStore service started"); } diff --git a/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java b/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java index 4cd67863a..e83a6098c 100644 --- a/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java +++ b/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java @@ -14,24 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.baserpc.trafficgovernor; -import static org.apache.bifromq.baserpc.trafficgovernor.SharedScheduler.RPC_SHARED_SCHEDULER; import static java.util.Collections.emptySet; +import static org.apache.bifromq.baserpc.trafficgovernor.SharedScheduler.RPC_SHARED_SCHEDULER; -import org.apache.bifromq.basecrdt.service.ICRDTService; -import org.apache.bifromq.basehlc.HLC; -import org.apache.bifromq.baserpc.proto.RPCServer; import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import io.grpc.inprocess.InProcessSocketAddress; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.CompositeDisposable; -import io.reactivex.rxjava3.disposables.Disposable; import io.reactivex.rxjava3.subjects.BehaviorSubject; import java.net.InetSocketAddress; import java.util.Map; @@ -41,6 +37,9 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecrdt.service.ICRDTService; +import org.apache.bifromq.basehlc.HLC; +import org.apache.bifromq.baserpc.proto.RPCServer; @Slf4j class RPCServiceTrafficManager extends RPCServiceAnnouncer @@ -157,41 +156,49 @@ private static class ServerRegistration implements IServerRegistration { private final RPCServiceTrafficManager manager; private final AtomicReference localServer; - private final Disposable disposable; - private final CompositeDisposable disposables; + private final CompositeDisposable myDisposibles = new CompositeDisposable(); + private final CompositeDisposable allDisposibles; - private ServerRegistration(RPCServer server, RPCServiceTrafficManager announcer, - CompositeDisposable disposables) { + private ServerRegistration(RPCServer server, + RPCServiceTrafficManager announcer, + CompositeDisposable allDisposables) { this.localServer = new AtomicReference<>(server); this.manager = announcer; - this.disposables = disposables; + this.allDisposibles = allDisposables; // make an announcement via rpcServiceCRDT log.debug("Announce local server[{}]:{}", announcer.serviceUniqueName, server); announcer.announce(localServer.get()).join(); // enforce the announcement consistent eventually - disposable = announcer.announcedServers() + myDisposibles.add(announcer.announcedServers() .doOnDispose(() -> manager.revoke(localServer.get().getId()).join()) .subscribe(serverMap -> { RPCServer localServer = this.localServer.get(); if (!serverMap.containsKey(localServer.getId())) { - RPCServer toUpdate = localServer.toBuilder().setAnnouncedTS(HLC.INST.get()).build(); - log.debug("Re-announce local server: {}", toUpdate); - // refresh announcement time - announcer.announce(toUpdate); + reannounce(); } else if (localServer.getAnnouncedTS() < serverMap.get(localServer.getId()).getAnnouncedTS()) { localServer = serverMap.get(localServer.getId()); log.debug("Update local server from announcement: server={}", localServer); } - }); - disposables.add(disposable); + })); + myDisposibles.add(announcer.crdtService.refreshSignal() + .subscribe(ts -> reannounce())); + allDisposables.add(myDisposibles); + } + + private void reannounce() { + RPCServer localServer = this.localServer.get(); + RPCServer toUpdate = localServer.toBuilder().setAnnouncedTS(HLC.INST.get()).build(); + log.debug("Re-announce local server: {}", toUpdate); + // refresh announcement time + manager.announce(toUpdate); } @Override public void stop() { - disposables.remove(disposable); - disposable.dispose(); + allDisposibles.remove(myDisposibles); + myDisposibles.dispose(); } } } From a2a7f71b96108b8ccf0f37cb901711692e64ca41 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Wed, 20 Aug 2025 18:11:14 +0800 Subject: [PATCH 09/20] 1. fixed an issue in RedundantRangeRemovalBalancer which may cause the removal of functioning range mistakenly 2. improve the stability of RangeSplitBalancer and ReplicaCntBalancer 3. enhance balancer controller to support accept partial load rules from API --- .../balance/KVStoreBalanceController.java | 22 +- .../balance/impl/RangeSplitBalancer.java | 16 + .../impl/RedundantRangeRemovalBalancer.java | 45 ++- .../balance/impl/ReplicaCntBalancer.java | 369 ++++++++++-------- .../balance/impl/RangeSplitBalancerTest.java | 172 ++++++++ .../RedundantRangeRemovalBalancerTest.java | 39 ++ .../balance/impl/ReplicaCntBalancerTest.java | 153 ++++++++ 7 files changed, 628 insertions(+), 188 deletions(-) diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java index 3bf66c15a..50d75ce48 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java @@ -235,14 +235,20 @@ private void updateAndBalance() { balancerState.disabled.set(disable); needReport = true; } - Struct expectedLoadRules = expectedState.getLoadRules(); - if (!loadRules.equals(expectedLoadRules) - && balancerState.balancer.validate(expectedLoadRules)) { - loadRules = expectedLoadRules; - // report the balancer state - balancerState.loadRules.set(expectedLoadRules); - balancerState.balancer.update(expectedLoadRules); - needReport = true; + Struct expectedLoadRules = loadRules.toBuilder() + .mergeFrom(expectedState.getLoadRules()) + .build(); + if (!loadRules.equals(expectedLoadRules)) { + if (balancerState.balancer.validate(expectedLoadRules)) { + loadRules = expectedLoadRules; + // report the balancer state + balancerState.loadRules.set(expectedLoadRules); + balancerState.balancer.update(expectedLoadRules); + needReport = true; + } else { + log.warn("Balancer[{}] load rules not valid: {}", + balancerFacClassFQN, expectedLoadRules); + } } if (needReport) { statesReporter.reportBalancerState(balancerFacClassFQN, disable, loadRules); diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java index b9642fe4e..7089ef3cb 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java @@ -26,9 +26,12 @@ import com.google.protobuf.Struct; import com.google.protobuf.Value; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Optional; +import java.util.Set; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; @@ -131,6 +134,10 @@ protected Map doGenerate(Struct loadRules, KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); KVRangeStoreDescriptor storeDescriptor = landscape.get(leaderRange.ownerStoreDescriptor().getId()); ClusterConfig clusterConfig = rangeDescriptor.getConfig(); + if (containsDeadMember(clusterConfig, landscape.keySet())) { + // shortcut when config contains dead members + return Collections.emptyMap(); + } Optional splitHintOpt = rangeDescriptor .getHintsList() .stream() @@ -170,4 +177,13 @@ && compareEndKeys(splitHint.getSplitKey(), endKey(boundary)) < 0) { } return expectedRangeLayout; } + + private boolean containsDeadMember(ClusterConfig clusterConfig, Set live) { + Set members = new HashSet<>(); + members.addAll(clusterConfig.getVotersList()); + members.addAll(clusterConfig.getLearnersList()); + members.addAll(clusterConfig.getNextVotersList()); + members.addAll(clusterConfig.getNextLearnersList()); + return members.stream().anyMatch(m -> !live.contains(m)); + } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java index bedf16fbd..38f86834a 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java @@ -23,11 +23,13 @@ import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveRoute; import static org.apache.bifromq.basekv.utils.DescriptorUtil.organizeByEpoch; +import com.google.common.collect.Sets; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Map; import java.util.NavigableMap; +import java.util.NavigableSet; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; @@ -38,6 +40,7 @@ import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.basekv.utils.EffectiveEpoch; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; @@ -97,16 +100,16 @@ public BalanceResult balance() { return NoNeedBalance.INSTANCE; } Map.Entry> oldestEntry = latest.firstEntry(); - Map> conflictingRanges = findConflictingRanges(oldestEntry.getValue()); + Map> conflictingRanges = findConflictingRanges(oldestEntry.getValue()); if (!conflictingRanges.isEmpty()) { // deal with id-conflict ranges for (KVRangeId rangeId : conflictingRanges.keySet()) { - SortedSet leaderRanges = conflictingRanges.get(rangeId); + NavigableSet leaderRanges = conflictingRanges.get(rangeId); for (LeaderRange leaderRange : leaderRanges) { if (!leaderRange.ownerStoreDescriptor().getId().equals(localStoreId)) { return NoNeedBalance.INSTANCE; } - log.debug("Remove Id-Conflict range: {} in store {}", + log.warn("Remove Id-Conflict range: {} in store {}", KVRangeIdUtil.toString(leaderRange.descriptor().getId()), leaderRange.ownerStoreDescriptor().getId()); return quit(localStoreId, leaderRange.descriptor()); @@ -128,7 +131,7 @@ public BalanceResult balance() { Boundary boundary = rangeDescriptor.getBoundary(); LeaderRange leaderRange = effectiveLeaders.get(boundary); if (leaderRange == null || !leaderRange.descriptor().getId().equals(rangeDescriptor.getId())) { - log.debug("Remove Boundary-Conflict range: {} in store {}", + log.warn("Remove Boundary-Conflict range: {} in store {}", KVRangeIdUtil.toString(rangeDescriptor.getId()), storeDescriptor.getId()); return quit(localStoreId, rangeDescriptor); @@ -138,9 +141,10 @@ public BalanceResult balance() { return NoNeedBalance.INSTANCE; } - private Map> findConflictingRanges(Set effectiveEpoch) { - Map> leaderRangesByRangeId = new HashMap<>(); - Map> conflictingRanges = new HashMap<>(); + private Map> findConflictingRanges( + Set effectiveEpoch) { + Map> leaderRangesByRangeId = new HashMap<>(); + Map> conflictingRanges = new HashMap<>(); for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch) { for (KVRangeDescriptor rangeDescriptor : storeDescriptor.getRangesList()) { if (rangeDescriptor.getRole() != RaftNodeStatus.Leader) { @@ -151,12 +155,33 @@ private Map> findConflictingRanges(Set lr.ownerStoreDescriptor().getId(), String::compareTo) .reversed())); leaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor)); - if (leaderRanges.size() > 1) { - // More than one leader for the same range, add to conflicting ranges - conflictingRanges.put(rangeId, leaderRanges); + } + } + for (KVRangeId rangeId : leaderRangesByRangeId.keySet()) { + NavigableSet leaderRanges = leaderRangesByRangeId.get(rangeId); + LeaderRange firstLeaderRange = leaderRanges.first(); + ClusterConfig firstLeaderClusterConfig = firstLeaderRange.descriptor().getConfig(); + if (leaderRanges.size() > 1) { + NavigableSet restLeaderRanges = leaderRanges.tailSet(firstLeaderRange, false); + // check if rest leader ranges are conflicting: disjoint voter set + for (LeaderRange restLeaderRange : restLeaderRanges) { + ClusterConfig restLeaderClusterConfig = restLeaderRange.descriptor().getConfig(); + if (isDisjoint(firstLeaderClusterConfig, restLeaderClusterConfig)) { + // if disjoint, add to conflicting ranges + conflictingRanges.put(rangeId, leaderRanges); + } } } } return conflictingRanges; } + + private boolean isDisjoint(ClusterConfig firstConfig, ClusterConfig secondConfig) { + Set firstVoters = Sets.newHashSet(firstConfig.getVotersList()); + Set secondVoters = Sets.newHashSet(secondConfig.getVotersList()); + Set firstNextVoters = Sets.newHashSet(firstConfig.getNextVotersList()); + Set secondNextVoters = Sets.newHashSet(secondConfig.getNextVotersList()); + return Collections.disjoint(firstVoters, secondVoters) + && Collections.disjoint(firstNextVoters, secondNextVoters); + } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java index dcb47face..b43072f58 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java @@ -19,14 +19,11 @@ package org.apache.bifromq.basekv.balance.impl; -import static com.google.common.collect.Sets.difference; -import static com.google.common.collect.Sets.union; - import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import com.google.protobuf.Struct; import com.google.protobuf.Value; -import java.util.Collections; +import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -76,6 +73,17 @@ public ReplicaCntBalancer(String clusterId, Preconditions.checkArgument(validate(defaultLoadRules), "Invalid default load rules"); } + private ClusterConfig buildConfig(Set voters, Set learners) { + return ClusterConfig.newBuilder() + .addAllVoters(voters) + .addAllLearners(learners) + .build(); + } + + private void sanitize(Set s, Set live) { + s.retainAll(live); + } + @Override public Struct initialLoadRules() { return defaultLoadRules; @@ -116,137 +124,143 @@ private boolean meetExpectedConfig(Struct loadRules, Map landscape, EffectiveRoute effectiveRoute, Map expectedRangeLayout) { - int expectedVoters = (int) loadRules.getFieldsMap().get(LOAD_RULE_VOTERS).getNumberValue(); - int expectedLearners = (int) loadRules.getFieldsMap().get(LOAD_RULE_LEARNERS).getNumberValue(); - // meeting goal one - meet the expected number of Voter replicas and learner replicas for each Range dynamically + final Set liveStores = landscape.keySet(); + final int expectedVoters = (int) loadRules.getFieldsMap().get(LOAD_RULE_VOTERS).getNumberValue(); + final int expectedLearners = (int) loadRules.getFieldsMap().get(LOAD_RULE_LEARNERS).getNumberValue(); + boolean meetingGoal = false; + for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { - Boundary boundary = entry.getKey(); LeaderRange leaderRange = entry.getValue(); KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); ClusterConfig clusterConfig = rangeDescriptor.getConfig(); - if (meetingGoal) { - expectedRangeLayout.put(boundary, clusterConfig); - continue; - } + + // if there is running config change process, abort generation and wait for the next round + // keep range config change as linear as possible if (clusterConfig.getNextVotersCount() > 0 || clusterConfig.getNextLearnersCount() > 0) { - // if there is running config change process, abort generation - expectedRangeLayout.put(boundary, clusterConfig); - meetingGoal = true; - continue; + expectedRangeLayout.clear(); + // shortcut + return true; } - // voter count not meet expectation or exceeds actual store node amount + Set voters = new HashSet<>(clusterConfig.getVotersList()); Set learners = new HashSet<>(clusterConfig.getLearnersList()); - if (clusterConfig.getVotersCount() != expectedVoters || clusterConfig.getVotersCount() > landscape.size()) { - if (clusterConfig.getVotersCount() < expectedVoters) { - // add some voters from the least range count store - List aliveStoresSortedByRangeCountAsc = landscape.entrySet().stream() - .filter(e -> - !learners.contains(e.getKey()) && !voters.contains(e.getKey())) + + // remove unreachable stores from voters and learners + sanitize(voters, liveStores); + sanitize(learners, liveStores); + + Boundary boundary = entry.getKey(); + int targetVoters = Math.min(expectedVoters, liveStores.size()); + boolean needFix = voters.size() != targetVoters; + if (!meetingGoal && needFix) { + String leaderStore = leaderRange.ownerStoreDescriptor().getId(); + if (voters.size() < targetVoters) { + List candidates = landscape.entrySet().stream() + .filter(e -> !learners.contains(e.getKey()) && !voters.contains(e.getKey())) .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) .map(Map.Entry::getKey) .toList(); - for (String aliveStoreId : aliveStoresSortedByRangeCountAsc) { - voters.add(aliveStoreId); - if (voters.size() == expectedVoters) { + for (String s : candidates) { + voters.add(s); + if (voters.size() == targetVoters) { break; } } - } else { - // remove some voters from the most range count store - List aliveStoresSortedByRangeCountDesc = landscape.entrySet().stream() + } else { // voters.size() > targetVoters + List overloaded = landscape.entrySet().stream() .sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount()) .map(Map.Entry::getKey) .toList(); - for (String aliveStoreId : aliveStoresSortedByRangeCountDesc) { - if (!aliveStoreId.equals(leaderRange.ownerStoreDescriptor().getId())) { - voters.remove(aliveStoreId); + for (String s : overloaded) { + if (!s.equals(leaderStore) && voters.contains(s)) { + voters.remove(s); + if (voters.size() == targetVoters) { + break; + } } - if (voters.size() == expectedVoters) { - break; + } + if (voters.size() > targetVoters) { + for (String s : new ArrayList<>(voters)) { + if (!s.equals(leaderStore)) { + voters.remove(s); + if (voters.size() == targetVoters) { + break; + } + } } } } - // remove unreachable voters - voters.removeIf(voter -> !landscape.containsKey(voter)); - ClusterConfig newConfig = ClusterConfig.newBuilder() - .mergeFrom(clusterConfig) - .clearVoters() - .addAllVoters(voters) - .build(); - if (!newConfig.equals(clusterConfig)) { - meetingGoal = true; - } - expectedRangeLayout.put(boundary, newConfig); + expectedRangeLayout.put(boundary, buildConfig(voters, learners)); + meetingGoal = true; } else { - expectedRangeLayout.put(boundary, clusterConfig); + expectedRangeLayout.put(boundary, buildConfig(voters, learners)); } } + if (meetingGoal) { return true; } - // voter count met the expectation, check learner count + for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { - Boundary boundary = entry.getKey(); LeaderRange leaderRange = entry.getValue(); KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); ClusterConfig clusterConfig = rangeDescriptor.getConfig(); - if (meetingGoal) { - expectedRangeLayout.put(boundary, clusterConfig); - continue; - } + Set voters = new HashSet<>(clusterConfig.getVotersList()); Set learners = new HashSet<>(clusterConfig.getLearnersList()); - if (expectedLearners == -1 - || clusterConfig.getLearnersCount() != expectedLearners - || clusterConfig.getLearnersCount() > landscape.size()) { - if (expectedLearners == -1) { - Set newLearners = new HashSet<>(landscape.keySet()); - newLearners.removeAll(voters); - learners.addAll(newLearners); - } else { - if (clusterConfig.getLearnersCount() < expectedLearners) { - // add some learners from the least range count store - List aliveStoresSortedByRangeCountAsc = landscape.entrySet().stream() - .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) - .map(Map.Entry::getKey) - .toList(); - for (String aliveStoreId : aliveStoresSortedByRangeCountAsc) { - if (!voters.contains(aliveStoreId)) { - learners.add(aliveStoreId); - } - if (learners.size() == expectedVoters) { + sanitize(voters, liveStores); + sanitize(learners, liveStores); + + boolean changed = false; + + if (expectedLearners == -1) { + // learners = live - voters + Set newLearners = new HashSet<>(liveStores); + newLearners.removeAll(voters); + if (!newLearners.equals(learners)) { + learners = newLearners; + changed = true; + } + } else { + int maxPossible = Math.max(0, liveStores.size() - voters.size()); + int targetLearners = Math.min(expectedLearners, maxPossible); + + if (learners.size() < targetLearners) { + List candidates = landscape.entrySet().stream() + .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) + .map(Map.Entry::getKey) + .toList(); + for (String s : candidates) { + if (!voters.contains(s) && !learners.contains(s)) { + learners.add(s); + if (learners.size() == targetLearners) { break; } } - } else { - // remove some learners from the most range count store - List aliveStoresSortedByRangeCountDesc = landscape.entrySet().stream() - .sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount()) - .map(Map.Entry::getKey) - .toList(); - for (String aliveStoreId : aliveStoresSortedByRangeCountDesc) { - learners.remove(aliveStoreId); - if (learners.size() == expectedLearners) { + } + changed = true; + } else if (learners.size() > targetLearners) { + List overloaded = landscape.entrySet().stream() + .sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount()) + .map(Map.Entry::getKey) + .toList(); + for (String s : overloaded) { + if (learners.contains(s)) { + learners.remove(s); + if (learners.size() == targetLearners) { break; } } } + changed = true; } - // remove unreachable learners - learners.removeIf(learner -> !landscape.containsKey(learner)); - ClusterConfig newConfig = ClusterConfig.newBuilder() - .mergeFrom(clusterConfig) - .clearLearners() - .addAllLearners(learners) - .build(); - if (!newConfig.equals(clusterConfig)) { - meetingGoal = true; - } - expectedRangeLayout.put(boundary, newConfig); - } else { - expectedRangeLayout.put(boundary, clusterConfig); + } + + Boundary boundary = entry.getKey(); + expectedRangeLayout.put(boundary, buildConfig(voters, learners)); + if (!meetingGoal && changed) { + meetingGoal = true; } } return meetingGoal; @@ -255,133 +269,148 @@ private boolean meetExpectedConfig(Struct loadRules, private boolean balanceVoterCount(Map landscape, EffectiveRoute effectiveRoute, Map expectedRangeLayout) { - // goal one has met, meeting goal two - evenly distributed voter replicas across all stores - boolean meetingGoal = false; + final Set liveStores = landscape.keySet(); Map storeVoterCount = new HashMap<>(); for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { ClusterConfig config = entry.getValue().descriptor().getConfig(); - config.getVotersList() + config.getVotersList().stream() + .filter(liveStores::contains) .forEach(storeId -> storeVoterCount.put(storeId, storeVoterCount.getOrDefault(storeId, 0) + 1)); } - landscape.keySet().forEach(storeId -> { - if (!storeVoterCount.containsKey(storeId)) { - storeVoterCount.put(storeId, 0); - } - }); - record StoreVoterCount(String storeId, int voterCount) { - } + liveStores.forEach(s -> storeVoterCount.putIfAbsent(s, 0)); + + record StoreVoterCount(String storeId, int voterCount) {} - SortedSet storeVoterCountSorted = new TreeSet<>(Comparator - .comparingInt(StoreVoterCount::voterCount).thenComparing(StoreVoterCount::storeId)); + SortedSet storeVoterCountSorted = new TreeSet<>( + Comparator.comparingInt(StoreVoterCount::voterCount).thenComparing(StoreVoterCount::storeId)); storeVoterCount.forEach( (storeId, voterCount) -> storeVoterCountSorted.add(new StoreVoterCount(storeId, voterCount))); + double totalVoters = storeVoterCount.values().stream().mapToInt(Integer::intValue).sum(); - double targetVotersPerStore = totalVoters / landscape.size(); + double targetVotersPerStore = liveStores.isEmpty() ? 0 : totalVoters / liveStores.size(); int maxVotersPerStore = (int) Math.ceil(targetVotersPerStore); int minVotersPerStore = (int) Math.floor(targetVotersPerStore); - int globalMax = Collections.max(storeVoterCount.values()); - int globalMin = Collections.min(storeVoterCount.values()); + int globalMax = storeVoterCount.values().stream().mapToInt(Integer::intValue).max().orElse(0); + int globalMin = storeVoterCount.values().stream().mapToInt(Integer::intValue).min().orElse(0); if (globalMax - globalMin <= 1) { + for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { + ClusterConfig cc = entry.getValue().descriptor().getConfig(); + Set voters = new HashSet<>(cc.getVotersList()); + Set learners = new HashSet<>(cc.getLearnersList()); + sanitize(voters, liveStores); + sanitize(learners, liveStores); + expectedRangeLayout.put(entry.getKey(), buildConfig(voters, learners)); + } return false; } + boolean meetingGoal = false; for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { Boundary boundary = entry.getKey(); - LeaderRange leaderRange = entry.getValue(); - KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); - ClusterConfig clusterConfig = rangeDescriptor.getConfig(); - if (meetingGoal) { - expectedRangeLayout.put(boundary, clusterConfig); - continue; - } - // examine in sorted order to ensure the result is deterministic - Set learners = Sets.newHashSet(clusterConfig.getLearnersList()); - SortedSet voterSorted = Sets.newTreeSet(clusterConfig.getVotersList()); - for (String voter : voterSorted) { - if (storeVoterCount.get(voter) >= maxVotersPerStore) { - // voter store has overloaded voters - for (StoreVoterCount underloadedStore : storeVoterCountSorted) { - // move to one underloaded store which is current not in the voter list - if (storeVoterCount.get(underloadedStore.storeId) <= minVotersPerStore - && !voterSorted.contains(underloadedStore.storeId) - && !learners.contains(underloadedStore.storeId)) { + LeaderRange lr = entry.getValue(); + ClusterConfig cc = lr.descriptor().getConfig(); + + Set learners = Sets.newHashSet(cc.getLearnersList()); + SortedSet voterSorted = Sets.newTreeSet(cc.getVotersList()); + sanitize(learners, liveStores); + voterSorted.retainAll(liveStores); + + for (String voter : new ArrayList<>(voterSorted)) { + if (storeVoterCount.getOrDefault(voter, 0) >= maxVotersPerStore) { + for (StoreVoterCount under : storeVoterCountSorted) { + if (storeVoterCount.getOrDefault(under.storeId, 0) <= minVotersPerStore + && !voterSorted.contains(under.storeId) + && !learners.contains(under.storeId)) { + // move voter -> underloaded + Set newVoters = new HashSet<>(voterSorted); + newVoters.remove(voter); + newVoters.add(under.storeId); + + expectedRangeLayout.put(boundary, buildConfig(newVoters, learners)); meetingGoal = true; - ClusterConfig newConfig = ClusterConfig.newBuilder() - .addAllVoters( - difference(union(voterSorted, Set.of(underloadedStore.storeId)), Set.of(voter))) - .addAllLearners(learners) - .build(); - expectedRangeLayout.put(boundary, newConfig); break; } } } + if (meetingGoal) { + break; + } + } + + if (!meetingGoal) { + expectedRangeLayout.put(boundary, buildConfig(voterSorted, learners)); + } else { + break; } } + return meetingGoal; } private boolean balanceLearnerCount(Map landscape, EffectiveRoute effectiveRoute, Map expectedRangeLayout) { - boolean meetingGoal = false; + final Set liveStores = landscape.keySet(); + Map storeLearnerCount = new HashMap<>(); for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { ClusterConfig config = entry.getValue().descriptor().getConfig(); - config.getLearnersList() + config.getLearnersList().stream() + .filter(liveStores::contains) .forEach(storeId -> storeLearnerCount.put(storeId, storeLearnerCount.getOrDefault(storeId, 0) + 1)); } - landscape.keySet().forEach(storeId -> { - if (!storeLearnerCount.containsKey(storeId)) { - storeLearnerCount.put(storeId, 0); - } - }); - record StoreLearnerCount(String storeId, int voterCount) { - } + liveStores.forEach(s -> storeLearnerCount.putIfAbsent(s, 0)); + + record StoreLearnerCount(String storeId, int learnerCount) {} - SortedSet storeVoterCountSorted = new TreeSet<>(Comparator - .comparingInt(StoreLearnerCount::voterCount).thenComparing(StoreLearnerCount::storeId)); - storeLearnerCount.forEach( - (storeId, voterCount) -> storeVoterCountSorted.add(new StoreLearnerCount(storeId, voterCount))); + SortedSet storeLearnerCountSorted = new TreeSet<>( + Comparator.comparingInt(StoreLearnerCount::learnerCount).thenComparing(StoreLearnerCount::storeId)); + storeLearnerCount.forEach((id, c) -> storeLearnerCountSorted.add(new StoreLearnerCount(id, c))); double totalLearners = storeLearnerCount.values().stream().mapToInt(Integer::intValue).sum(); - double targetLearnersPerStore = totalLearners / landscape.size(); + double targetLearnersPerStore = liveStores.isEmpty() ? 0 : totalLearners / liveStores.size(); int maxLearnersPerStore = (int) Math.ceil(targetLearnersPerStore); + boolean meetingGoal = false; for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { Boundary boundary = entry.getKey(); - LeaderRange leaderRange = entry.getValue(); - KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); - ClusterConfig clusterConfig = rangeDescriptor.getConfig(); - if (meetingGoal) { - expectedRangeLayout.put(boundary, clusterConfig); - continue; - } - // examine in sorted order to ensure the result is deterministic - Set voters = Sets.newHashSet(clusterConfig.getVotersList()); - SortedSet learnerSorted = Sets.newTreeSet(clusterConfig.getLearnersList()); - for (String learner : learnerSorted) { - if (storeLearnerCount.get(learner) > maxLearnersPerStore) { - // learner store has overloaded learners - for (StoreLearnerCount underloadedStore : storeVoterCountSorted) { - // move to one underloaded store which is current not in the voter or learner list - if (storeLearnerCount.get(underloadedStore.storeId) < maxLearnersPerStore - && !voters.contains(underloadedStore.storeId) - && !learnerSorted.contains(underloadedStore.storeId)) { + LeaderRange lr = entry.getValue(); + ClusterConfig cc = lr.descriptor().getConfig(); + + Set voters = Sets.newHashSet(cc.getVotersList()); + SortedSet learnerSorted = Sets.newTreeSet(cc.getLearnersList()); + sanitize(voters, liveStores); + learnerSorted.retainAll(liveStores); + + for (String learner : new ArrayList<>(learnerSorted)) { + if (storeLearnerCount.getOrDefault(learner, 0) > maxLearnersPerStore) { + for (StoreLearnerCount under : storeLearnerCountSorted) { + if (storeLearnerCount.getOrDefault(under.storeId, 0) < maxLearnersPerStore + && !voters.contains(under.storeId) + && !learnerSorted.contains(under.storeId)) { + Set newLearners = new HashSet<>(learnerSorted); + newLearners.remove(learner); + newLearners.add(under.storeId); + + expectedRangeLayout.put(boundary, buildConfig(voters, newLearners)); meetingGoal = true; - ClusterConfig newConfig = ClusterConfig.newBuilder() - .addAllVoters(voters) - .addAllLearners(difference( - union(learnerSorted, Set.of(underloadedStore.storeId)), Set.of(learner))) - .build(); - expectedRangeLayout.put(boundary, newConfig); break; } } } + if (meetingGoal) { + break; + } + } + + if (!meetingGoal) { + expectedRangeLayout.put(boundary, buildConfig(voters, learnerSorted)); + } else { + break; } } + return meetingGoal; } } diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java index a43f8fd47..0c0d6b62f 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java @@ -163,4 +163,176 @@ public void stopSplitWhenExceedMaxRanges() { .build())); assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); } + + @Test + public void skipWhenConfigHasDeadVoter() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setConfig(ClusterConfig.newBuilder() + .addVoters("store1") + .addVoters("deadStore") + .build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void skipWhenConfigHasDeadLearner() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setConfig(ClusterConfig.newBuilder() + .addVoters("store1") + .addLearners("ghost") + .build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void skipWhenConfigHasDeadNextMembers() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setConfig(ClusterConfig.newBuilder() + .addVoters("store1") + .addNextVoters("deadV") + .addNextLearners("deadL") + .build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void skipWhenOngoingConfigChange() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setConfig(ClusterConfig.newBuilder() + .addVoters("store1") + .addNextVoters("store1") + .build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void skipWhenSplitKeyEqualsStartOrOutOfRange() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setBoundary(org.apache.bifromq.basekv.proto.Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("a")) + .setEndKey(ByteString.copyFromUtf8("z")) + .build()) + .setConfig(ClusterConfig.newBuilder().addVoters("store1").build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + + KVRangeDescriptor rd2 = rd.toBuilder().clearHints() + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("z")) + .build()) + .build(); + KVRangeStoreDescriptor sd2 = sd.toBuilder().clearRanges().addRanges(rd2).build(); + balancer.update(Set.of(sd2)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void stopSplitWhenSplitKeyNotProvided() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } } diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java index ecfeca3b9..05ba95a7a 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java @@ -342,4 +342,43 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() { BalanceResult result = balancer.balance(); assertSame(result.type(), BalanceResultType.NoNeedBalance); } + + @Test + public void idConflictButVotersOverlap_shouldNotDelete() { + String peerStoreId = "peer"; + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + Boundary boundary = Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("a")) + .setEndKey(ByteString.copyFromUtf8("z")).build(); + + KVRangeDescriptor localRange = KVRangeDescriptor.newBuilder() + .setId(kvRangeId).setRole(RaftNodeStatus.Leader).setVer(1) + .setBoundary(boundary) + .setConfig(ClusterConfig.newBuilder() + .addVoters(localStoreId) + .addVoters("x").build()) + .build(); + + KVRangeDescriptor peerRange = KVRangeDescriptor.newBuilder() + .setId(kvRangeId).setRole(RaftNodeStatus.Leader).setVer(1) + .setBoundary(boundary) + .setConfig(ClusterConfig.newBuilder() + .addVoters(localStoreId) + .addVoters(peerStoreId).build()) + .build(); + + KVRangeStoreDescriptor localStoreDesc = KVRangeStoreDescriptor.newBuilder() + .setId(localStoreId) + .addRanges(localRange) + .build(); + KVRangeStoreDescriptor peerStoreDesc = KVRangeStoreDescriptor.newBuilder() + .setId(peerStoreId) + .addRanges(peerRange) + .build(); + + balancer.update(Set.of(localStoreDesc, peerStoreDesc)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.NoNeedBalance); + } } \ No newline at end of file diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java index 346eedd31..e275db541 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java @@ -464,4 +464,157 @@ public void generateCorrectClusterConfig() { assertTrue(balancer.verify(layout, allStoreDescriptors)); } + + @Test + public void removeDeadVoterAndBackfillEvenIfCountEqualsExpected() { + // live: s1, s2, s3;expected voters=3 + // range current voters = [s1, ghost, s2] + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addVoters("ghost") + .addVoters("s2") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + + Set stores = new HashSet<>(); + stores.add(s1); + stores.add(s2); + stores.add(s3); + + // votersPerRange=3,learnersPerRange=0 + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0); + balancer.update(stores); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + // expected:ghost removed s3 added + assertEquals(cmd.getKvRangeId(), kvRangeId); + assertTrue(cmd.getVoters().contains("s1")); + assertTrue(cmd.getVoters().contains("s2")); + assertTrue(cmd.getVoters().contains("s3")); + assertFalse(cmd.getVoters().contains("ghost")); + assertTrue(cmd.getLearners().isEmpty()); + } + + @Test + public void targetVotersIsCappedByLiveStores() { + // live: s1, s2;expected voters=3 + // range current voters = [s1, s2, ghost] + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addVoters("s2") + .addVoters("ghost") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + + Set stores = new HashSet<>(); + stores.add(s1); + stores.add(s2); + + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0); + balancer.update(stores); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + // expected:voters=[s1、s2] + assertEquals(cmd.getKvRangeId(), kvRangeId); + assertTrue(cmd.getVoters().contains("s1")); + assertTrue(cmd.getVoters().contains("s2")); + assertEquals(cmd.getVoters().size(), 2); + assertFalse(cmd.getVoters().contains("ghost")); + assertTrue(cmd.getLearners().isEmpty()); + } + + @Test + public void abortWhenConfigChangeInProgress_nextFieldsPresent() { + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + ClusterConfig cfgWithNext = ClusterConfig.newBuilder() + .addVoters("localStore") + .addNextVoters("someone") + .build(); + + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(cfgWithNext) + .build(); + + KVRangeStoreDescriptor local = KVRangeStoreDescriptor.newBuilder() + .setId("localStore") + .addRanges(range) + .build(); + + Set stores = new HashSet<>(); + stores.add(local); + + balancer = new ReplicaCntBalancer("testCluster", "localStore", 1, 0); + balancer.update(stores); + + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void learnersMinusOneUsesLiveMinusVotersAndSanitizes() { + // expectedLearners = -1 => learners = live - voters; + balancer = new ReplicaCntBalancer("testCluster", "s1", 1, -1); + + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addLearners("ghostLearner") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + + Set stores = new HashSet<>(); + stores.add(s1); + stores.add(s2); + stores.add(s3); + + balancer.update(stores); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + // expected:learners = live - voters = {s2, s3};ghostLearner removed + assertTrue(cmd.getVoters().contains("s1")); + assertFalse(cmd.getLearners().contains("ghostLearner")); + assertTrue(cmd.getLearners().contains("s2")); + assertTrue(cmd.getLearners().contains("s3")); + assertEquals(cmd.getLearners().size(), 2); + } } From 0abe945226194236e521ee4a0dee220d9fc93d5e Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Thu, 21 Aug 2025 17:52:22 +0800 Subject: [PATCH 10/20] 1. Optimize bootstrap and config change workflow 2. Enhance ReplicaCntBalancer to consider more edge cases --- .../basekv/raft/RaftNodeStateFollower.java | 26 +- .../raft/exception/SnapshotException.java | 36 +- .../balance/KVStoreBalanceController.java | 9 +- .../balance/impl/RangeBootstrapBalancer.java | 33 +- .../balance/impl/ReplicaCntBalancer.java | 51 ++- .../impl/RangeBootstrapBalancerTest.java | 61 +++- .../balance/impl/ReplicaCntBalancerTest.java | 328 ++++++++++++++---- .../basekv/server/BaseKVStoreService.java | 2 - .../basekv/store/range/KVRangeFSM.java | 26 +- .../store/wal/KVRangeWALSubscription.java | 3 +- .../bifromq/dist/server/DistServiceTest.java | 3 +- .../bifromq/dist/worker/DistWorkerTest.java | 3 +- .../inbox/server/InboxServiceTest.java | 23 +- .../bifromq/inbox/store/InboxStoreTest.java | 21 +- .../bifromq/inbox/store/LoadExistingTest.java | 2 +- .../bifromq/inbox/store/LoadSubStatsTest.java | 4 +- .../bifromq/mqtt/integration/MQTTTest.java | 9 +- .../retain/store/LoadMetadataTest.java | 6 +- .../bifromq/retain/store/RetainStoreTest.java | 43 +-- 19 files changed, 501 insertions(+), 188 deletions(-) diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java index 62bf66661..c488260e1 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java @@ -14,11 +14,21 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft; +import com.google.protobuf.ByteString; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException; import org.apache.bifromq.basekv.raft.exception.DropProposalException; import org.apache.bifromq.basekv.raft.exception.LeaderTransferException; @@ -39,16 +49,6 @@ import org.apache.bifromq.basekv.raft.proto.RequestVote; import org.apache.bifromq.basekv.raft.proto.Snapshot; import org.apache.bifromq.basekv.raft.proto.Voting; -import com.google.protobuf.ByteString; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.TreeMap; -import java.util.concurrent.CompletableFuture; class RaftNodeStateFollower extends RaftNodeState { private final TreeMap stabilizingIndexes = new TreeMap<>(Long::compareTo); @@ -387,7 +387,7 @@ void changeClusterConfig(String correlateId, void onSnapshotRestored(ByteString requested, ByteString installed, Throwable ex, CompletableFuture onDone) { if (currentISSRequest == null) { log.debug("Snapshot installation request not found"); - onDone.completeExceptionally(new SnapshotException("No snapshot installation request")); + onDone.completeExceptionally(SnapshotException.noSnapshot()); return; } InstallSnapshot iss = currentISSRequest; @@ -398,7 +398,7 @@ void onSnapshotRestored(ByteString requested, ByteString installed, Throwable ex onDone.completeExceptionally(ex); } else { log.debug("Obsolete snapshot installation"); - onDone.completeExceptionally(new SnapshotException("Obsolete snapshot installed by FSM")); + onDone.completeExceptionally(SnapshotException.obsolete()); } return; } diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java index ec397ed25..3f4bdf66d 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java @@ -14,17 +14,47 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft.exception; +/** + * Exception thrown during snapshot operations in the Raft protocol. + * This exception can indicate that a snapshot is obsolete or has other issues. + */ public class SnapshotException extends RuntimeException { - public SnapshotException(String message) { + private SnapshotException(String message) { super(message); } - public SnapshotException(Throwable e) { + private SnapshotException(Throwable e) { super(e); } + + public static ObsoleteSnapshotException obsolete() { + return new ObsoleteSnapshotException(); + } + + public static NoSnapshotException noSnapshot() { + return new NoSnapshotException(); + } + + /** + * Exception indicating that no snapshot is available for installation. + */ + public static class NoSnapshotException extends SnapshotException { + private NoSnapshotException() { + super("No snapshot available"); + } + } + + /** + * Exception indicating that the snapshot is obsolete by a newer snapshot during installation. + */ + public static class ObsoleteSnapshotException extends SnapshotException { + private ObsoleteSnapshotException() { + super("The installed snapshot has been obsoleted by a newer snapshot"); + } + } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java index 50d75ce48..afe9c4f76 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java @@ -217,6 +217,9 @@ private void updateAndBalance() { Set landscape = this.landscape; if (landscape == null || landscape.isEmpty()) { scheduling.set(false); + if (this.landscape != landscape) { + trigger(); + } return; } for (Map.Entry entry : balancers.entrySet()) { @@ -283,7 +286,7 @@ private void scheduleRetry(Map expected, private void balance(final Map expected, final Set landscape) { metricsManager.scheduleCount.increment(); - Duration delay = Duration.ZERO; + Duration delay = null; for (Map.Entry entry : balancers.entrySet()) { String balancerFactoryName = entry.getKey(); StoreBalancerState fromBalancerState = entry.getValue(); @@ -332,7 +335,7 @@ private void balance(final Map expected, } case AwaitBalance -> { Duration await = ((AwaitBalance) result).await; - delay = await.toNanos() > delay.toNanos() ? await : delay; + delay = delay != null ? (await.toNanos() < delay.toNanos() ? await : delay) : await; } default -> { // do nothing @@ -346,7 +349,7 @@ private void balance(final Map expected, scheduling.set(false); if (this.landscape != landscape || this.expectedBalancerStates != expected) { trigger(); - } else if (!delay.isZero()) { + } else if (delay != null) { // if some balancers are in the progress of generating balance command, wait for a while scheduleRetry(expected, landscape, delay); } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java index 38b7eebec..6011e4d08 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java @@ -22,7 +22,14 @@ import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveEpoch; +import java.time.Duration; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; import org.apache.bifromq.basehlc.HLC; +import org.apache.bifromq.basekv.balance.AwaitBalance; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.NoNeedBalance; @@ -33,12 +40,6 @@ import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.apache.bifromq.basekv.utils.EffectiveEpoch; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import java.time.Duration; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Supplier; /** * RangeBootstrapBalancer is a specialized StoreBalancer designed to handle the bootstrap process of creating the @@ -49,6 +50,7 @@ public class RangeBootstrapBalancer extends StoreBalancer { private final Supplier millisSource; private final long suspicionDurationMillis; private final AtomicReference bootstrapTrigger = new AtomicReference<>(); + /** * Constructor of StoreBalancer. * @@ -104,13 +106,18 @@ public void update(Set landscape) { @Override public BalanceResult balance() { BootstrapTrigger current = bootstrapTrigger.get(); - if (current != null && millisSource.get() > current.triggerTime) { - bootstrapTrigger.set(null); - return BalanceNow.of(BootstrapCommand.builder() - .toStore(localStoreId) - .kvRangeId(current.id) - .boundary(current.boundary) - .build()); + if (current != null) { + long nowMillis = millisSource.get(); + if (nowMillis > current.triggerTime) { + bootstrapTrigger.set(null); + return BalanceNow.of(BootstrapCommand.builder() + .toStore(localStoreId) + .kvRangeId(current.id) + .boundary(current.boundary) + .build()); + } else { + return AwaitBalance.of(Duration.ofMillis(current.triggerTime - nowMillis)); + } } return NoNeedBalance.INSTANCE; } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java index b43072f58..d9014db30 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java @@ -128,6 +128,18 @@ private boolean meetExpectedConfig(Struct loadRules, final int expectedVoters = (int) loadRules.getFieldsMap().get(LOAD_RULE_VOTERS).getNumberValue(); final int expectedLearners = (int) loadRules.getFieldsMap().get(LOAD_RULE_LEARNERS).getNumberValue(); + if (liveStores.size() < expectedVoters) { + for (Map.Entry e : effectiveRoute.leaderRanges().entrySet()) { + ClusterConfig cc = e.getValue().descriptor().getConfig(); + for (String v : cc.getVotersList()) { + if (!liveStores.contains(v)) { + // shortcut for rolling restart + return true; + } + } + } + } + boolean meetingGoal = false; for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { @@ -143,8 +155,8 @@ private boolean meetExpectedConfig(Struct loadRules, return true; } - Set voters = new HashSet<>(clusterConfig.getVotersList()); - Set learners = new HashSet<>(clusterConfig.getLearnersList()); + final Set voters = new HashSet<>(clusterConfig.getVotersList()); + final Set learners = new HashSet<>(clusterConfig.getLearnersList()); // remove unreachable stores from voters and learners sanitize(voters, liveStores); @@ -156,6 +168,41 @@ private boolean meetExpectedConfig(Struct loadRules, if (!meetingGoal && needFix) { String leaderStore = leaderRange.ownerStoreDescriptor().getId(); if (voters.size() < targetVoters) { + if (!learners.isEmpty()) { + List learnerCandidates = landscape.entrySet().stream() + .filter(e -> learners.contains(e.getKey())) + .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) + .map(Map.Entry::getKey) + .toList(); + for (String s : learnerCandidates) { + learners.remove(s); // promote learner -> voter + voters.add(s); + if (voters.size() == targetVoters) { + break; + } + } + } + + if (voters.size() < targetVoters) { + List freeCandidates = landscape.entrySet().stream() + .filter(e -> !learners.contains(e.getKey()) && !voters.contains(e.getKey())) + .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) + .map(Map.Entry::getKey) + .toList(); + for (String s : freeCandidates) { + voters.add(s); + if (voters.size() == targetVoters) { + break; + } + } + } + + if (expectedLearners == -1) { + Set newLearners = new HashSet<>(liveStores); + newLearners.removeAll(voters); + learners.clear(); + learners.addAll(newLearners); + } List candidates = landscape.entrySet().stream() .filter(e -> !learners.contains(e.getKey()) && !voters.contains(e.getKey())) .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java index 6c9aec88d..7e167a8f3 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java @@ -21,18 +21,20 @@ import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.balance.BalanceNow; -import org.apache.bifromq.basekv.balance.BalanceResult; -import org.apache.bifromq.basekv.balance.BalanceResultType; -import org.apache.bifromq.basekv.balance.command.BootstrapCommand; -import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import java.time.Duration; import java.util.Collections; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; +import org.apache.bifromq.basekv.balance.BalanceNow; +import org.apache.bifromq.basekv.balance.BalanceResult; +import org.apache.bifromq.basekv.balance.BalanceResultType; +import org.apache.bifromq.basekv.balance.command.BootstrapCommand; +import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -58,7 +60,7 @@ public void updateWithoutStoreDescriptors() { BalanceResult result = balancer.balance(); assertSame(result.type(), BalanceResultType.BalanceNow); - assertEquals(FULL_BOUNDARY, ((BootstrapCommand) ((BalanceNow) result).command).getBoundary()); + assertEquals(((BootstrapCommand) ((BalanceNow) result).command).getBoundary(), FULL_BOUNDARY); } @@ -74,6 +76,51 @@ public void balanceWithTrigger() { BalanceResult result = balancer.balance(); assertSame(result.type(), BalanceResultType.BalanceNow); - assertEquals(FULL_BOUNDARY, ((BootstrapCommand) ((BalanceNow) result).command).getBoundary()); + assertEquals(((BootstrapCommand) ((BalanceNow) result).command).getBoundary(), FULL_BOUNDARY); + } + + @Test + public void returnsAwaitImmediatelyBeforeDeadline() { + balancer.update(Collections.emptySet()); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.AwaitBalance); + + Duration remaining = ((org.apache.bifromq.basekv.balance.AwaitBalance) result).await; + assertFalse(remaining.isZero()); + assertTrue(remaining.toMillis() <= 2000L); + } + + @Test + public void awaitThenBalanceNowAfterDeadline() { + balancer.update(Collections.emptySet()); + + BalanceResult r1 = balancer.balance(); + assertSame(r1.type(), BalanceResultType.AwaitBalance); + long r1ms = ((org.apache.bifromq.basekv.balance.AwaitBalance) r1).await.toMillis(); + assertTrue(r1ms > 0); + + long half = Math.max(1, r1ms / 2); + mockTime.addAndGet(half); + BalanceResult r2 = balancer.balance(); + assertSame(r2.type(), BalanceResultType.AwaitBalance); + long r2ms = ((org.apache.bifromq.basekv.balance.AwaitBalance) r2).await.toMillis(); + assertTrue(r2ms >= 0 && r2ms < r1ms); + + mockTime.addAndGet(r2ms + 1); + BalanceResult r3 = balancer.balance(); + assertSame(r3.type(), BalanceResultType.BalanceNow); + assertEquals(((BootstrapCommand) ((BalanceNow) r3).command).getBoundary(), FULL_BOUNDARY); + } + + @Test + public void noSecondTriggerAfterBootstrapFires() { + balancer.update(Collections.emptySet()); + mockTime.addAndGet(2000L); + BalanceResult fired = balancer.balance(); + assertSame(fired.type(), BalanceResultType.BalanceNow); + + BalanceResult next = balancer.balance(); + assertSame(next.type(), BalanceResultType.NoNeedBalance); } } \ No newline at end of file diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java index e275db541..93a238d7a 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java @@ -163,7 +163,7 @@ public void balanceToAddLearner() { } @Test - public void balanceToRemoveVoter() { + public void balanceToRemoveLearner() { KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() .setId(kvRangeId) @@ -172,7 +172,7 @@ public void balanceToRemoveVoter() { .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") - .addVoters("remoteStore") + .addLearners("learnerStore") .build()) .build(); @@ -190,63 +190,41 @@ public void balanceToRemoveVoter() { BalanceResult result = balancer.balance(); ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; assertTrue(command.getVoters().contains("localStore")); - assertFalse(command.getVoters().contains("remoteStore")); assertTrue(command.getLearners().isEmpty()); } @Test - public void balanceToRemoveLearner() { + public void promoteLearnersToVoters() { KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); - KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() + KVRangeDescriptor leader = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") - .addLearners("learnerStore") + .addLearners("remoteStore") .build()) .build(); - - KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() - .setId("localStore") - .addRanges(kvRangeDescriptor) - .putStatistics("cpu.usage", 0.5) - .build(); - - Set storeDescriptors = new HashSet<>(); - storeDescriptors.add(storeDescriptor); - - balancer.update(storeDescriptors); - - BalanceResult result = balancer.balance(); - ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; - assertTrue(command.getVoters().contains("localStore")); - assertTrue(command.getLearners().isEmpty()); - } - - @Test - public void nothingChanged() { - KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); - KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() + KVRangeDescriptor learner = KVRangeDescriptor.newBuilder() .setId(kvRangeId) - .setRole(RaftNodeStatus.Leader) + .setRole(RaftNodeStatus.Follower) .setVer(1) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") - .addLearners("learnerStore") + .addLearners("remoteStore") .build()) .build(); KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() .setId("localStore") - .addRanges(kvRangeDescriptor) + .addRanges(leader) .putStatistics("cpu.usage", 0.5) .build(); KVRangeStoreDescriptor learnerStoreDescriptor = KVRangeStoreDescriptor.newBuilder() - .setId("learnerStore") - .addRanges(kvRangeDescriptor) + .setId("remoteStore") + .addRanges(learner) .putStatistics("cpu.usage", 0.5) .build(); @@ -255,8 +233,10 @@ public void nothingChanged() { storeDescriptors.add(learnerStoreDescriptor); balancer.update(storeDescriptors); - - assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) balancer.balance()).command; + assertTrue(command.getVoters().contains("localStore")); + assertTrue(command.getVoters().contains("remoteStore")); + assertTrue(command.getLearners().isEmpty()); } @Test @@ -508,46 +488,6 @@ public void removeDeadVoterAndBackfillEvenIfCountEqualsExpected() { assertTrue(cmd.getLearners().isEmpty()); } - @Test - public void targetVotersIsCappedByLiveStores() { - // live: s1, s2;expected voters=3 - // range current voters = [s1, s2, ghost] - KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); - KVRangeDescriptor range = KVRangeDescriptor.newBuilder() - .setId(kvRangeId) - .setRole(RaftNodeStatus.Leader) - .setVer(1) - .setBoundary(FULL_BOUNDARY) - .setConfig(ClusterConfig.newBuilder() - .addVoters("s1") - .addVoters("s2") - .addVoters("ghost") - .build()) - .build(); - - KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); - KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); - - Set stores = new HashSet<>(); - stores.add(s1); - stores.add(s2); - - balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0); - balancer.update(stores); - - BalanceResult result = balancer.balance(); - assertSame(result.type(), BalanceResultType.BalanceNow); - ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; - - // expected:voters=[s1、s2] - assertEquals(cmd.getKvRangeId(), kvRangeId); - assertTrue(cmd.getVoters().contains("s1")); - assertTrue(cmd.getVoters().contains("s2")); - assertEquals(cmd.getVoters().size(), 2); - assertFalse(cmd.getVoters().contains("ghost")); - assertTrue(cmd.getLearners().isEmpty()); - } - @Test public void abortWhenConfigChangeInProgress_nextFieldsPresent() { KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); @@ -617,4 +557,242 @@ public void learnersMinusOneUsesLiveMinusVotersAndSanitizes() { assertTrue(cmd.getLearners().contains("s3")); assertEquals(cmd.getLearners().size(), 2); } + + @Test + public void skipWhenCapacityInsufficientAndHasDeadVoter() { + // expected voters=3,live voters=S1,S2, S3(dead) + ReplicaCntBalancer balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addVoters("s2") + .addVoters("deadS3") // dead + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + Set landscape = new HashSet<>(); + landscape.add(s1); + landscape.add(s2); + + balancer.update(landscape); + + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void learnersMinusOnePreferPromoteLearnersToFillVoters() { + // expected: voters=3, learners=-1 + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, -1); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addLearners("s2") + .addLearners("s3") + .addLearners("s4") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build(); + + balancer.update(Set.of(s1, s2, s3, s4)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + // voters should be s1 + two from {s2,s3,s4} + assertTrue(cmd.getVoters().contains("s1")); + assertEquals(cmd.getVoters().size(), 3); + // after promotion, learners should be live - voters = the remaining one + assertEquals(cmd.getLearners().size(), 1); + Set all = Set.of("s1", "s2", "s3", "s4"); + Set union = new HashSet<>(cmd.getVoters()); + union.addAll(cmd.getLearners()); + assertEquals(union, all); + } + + @Test + public void noChangeWhenLiveLessThanExpectedAndNoDeadVoter() { + // expected voters=3, live={s1,s2}, voters={s1,s2} + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addVoters("s2") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + + balancer.update(Set.of(s1, s2)); + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void neverRemoveLeaderWhenShrinkingVoters() { + // expected voters=3, voters currently 4 (leader must stay) + balancer = new ReplicaCntBalancer("testCluster", "leader", 3, 0); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("leader") + .addVoters("s2") + .addVoters("s3") + .addVoters("s4") + .build()) + .build(); + + KVRangeStoreDescriptor leader = KVRangeStoreDescriptor.newBuilder().setId("leader").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build(); + + balancer.update(Set.of(leader, s2, s3, s4)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertTrue(cmd.getVoters().contains("leader")); + assertEquals(cmd.getVoters().size(), 3); + } + + @Test + public void balanceVoterCountNoopWhenSpreadWithinOne() { + // two stores, two ranges: counts differ by at most 1 -> no rebalance + balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 0); + + KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() + .setId(r1).setVer(1).setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("s1").build()) + .build(); + + KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() + .setId(r2).setVer(1).setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("s2").build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(d1).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").addRanges(d2).build(); + + balancer.update(Set.of(s1, s2)); + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void fixedLearnerCountRemovesDeadAndBackfills() { + // expected learners=2; current learners={deadL, s2}; live={s1,s2,s3,s4} + balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 2); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addLearners("deadL") + .addLearners("s2") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build(); + + balancer.update(Set.of(s1, s2, s3, s4)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertTrue(cmd.getVoters().contains("s1")); + assertEquals(cmd.getLearners().size(), 2); + assertTrue(cmd.getLearners().contains("s2")); + assertFalse(cmd.getLearners().contains("deadL")); + } + + @Test + public void zeroLearnersTargetClearsLearners() { + // expected learners=0 + balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 0); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid).setRole(RaftNodeStatus.Leader).setVer(1).setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder().addVoters("s1").addLearners("s2").addLearners("s3").build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + + balancer.update(Set.of(s1, s2, s3)); + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + assertTrue(cmd.getLearners().isEmpty()); + assertTrue(cmd.getVoters().contains("s1")); + } + + @Test + public void learnersMinusOneWithAllLiveAsVotersMakesLearnersEmpty() { + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, -1); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid).setRole(RaftNodeStatus.Leader).setVer(1).setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1").addVoters("s2").addVoters("s3") + .addLearners("ghost") // should be sanitized away + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + + balancer.update(Set.of(s1, s2, s3)); + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertEquals(cmd.getVoters(), Set.of("s1", "s2", "s3")); + assertTrue(cmd.getLearners().isEmpty()); + } } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java index ab06de152..15d961508 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java @@ -20,7 +20,6 @@ package org.apache.bifromq.basekv.server; import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.apache.bifromq.baserpc.server.UnaryResponse.response; import com.google.common.collect.Sets; @@ -94,7 +93,6 @@ public String storeId() { public void start() { log.debug("Starting BaseKVStore service"); kvRangeStore.start(new AgentHostStoreMessenger(agentHost, clusterId, kvRangeStore.id())); - kvRangeStore.bootstrap(KVRangeIdUtil.generate(), FULL_BOUNDARY); landscapeReporter = metaService.landscapeReporter(clusterId, kvRangeStore.id()); // sync store descriptor via crdt disposables.add(kvRangeStore.describe().subscribe(landscapeReporter::report)); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java index f58dc2b99..0f60e10ba 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java @@ -758,9 +758,7 @@ private Supplier> applyConfigChange(long term, .setType(Normal) .setTaskId(taskId) .build()); - return () -> compactWAL().thenRun(() -> { - finishCommand(taskId); - }); + return () -> compactWAL().thenRun(() -> finishCommand(taskId)); } } else { // request config change failed, the config entry is appended due to leader reelection @@ -833,8 +831,8 @@ private Supplier> applyConfigChange(long term, } } default -> { - // skip internal config change triggered by leadership change - return this::compactWAL; + // skip internal config change triggered by leadership change, no need to compact WAL + return () -> CompletableFuture.completedFuture(null); } } } @@ -877,11 +875,6 @@ logTerm, logIndex, taskId, print(ver), state, newConfig.getVotersList(), // make a checkpoint if needed CompletableFuture compactWALFuture = CompletableFuture.completedFuture(null); if (wal.latestSnapshot().getLastAppliedIndex() < logIndex - 1) { - // cancel all on-going dump sessions - dumpSessions.forEach((sessionId, session) -> { - session.cancel(); - dumpSessions.remove(sessionId, session); - }); compactWALFuture = compactWAL(); } compactWALFuture.whenCompleteAsync((v, e) -> { @@ -1513,7 +1506,6 @@ private CompletableFuture restore(KVRangeSnapshot snapshot, return restorer.restoreFrom(leader, snapshot) .handle((result, ex) -> { if (ex != null) { - log.warn("Restored from snapshot error: \n{}", snapshot, ex); return onInstalled.call(null, ex); } else { return onInstalled.call(kvRange.checkpoint(), null); @@ -1522,10 +1514,9 @@ private CompletableFuture restore(KVRangeSnapshot snapshot, .thenCompose(f -> f) .whenCompleteAsync(unwrap((v, e) -> { if (e != null) { - if (e instanceof SnapshotException) { - log.error("Failed to apply snapshot to WAL \n{}", snapshot, e); - // WAL and FSM are inconsistent, need to quit and recreate again - quitSignal.complete(null); + if (e instanceof SnapshotException.ObsoleteSnapshotException) { + log.debug("Obsolete snapshot, reset kvRange to latest snapshot: \n{}", snapshot); + kvRange.toReseter(wal.latestSnapshot()).done(); } } else { linearizer.afterLogApplied(snapshot.getLastAppliedIndex()); @@ -1582,6 +1573,11 @@ private void shrinkWAL() { } private CompletableFuture compactWAL() { + // cancel all on-going dump sessions + dumpSessions.forEach((sessionId, session) -> { + session.cancel(); + dumpSessions.remove(sessionId, session); + }); return mgmtTaskRunner.add(this::doCompactWAL); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java index 42f63ab79..c121a241d 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java @@ -72,8 +72,7 @@ class KVRangeWALSubscription implements IKVRangeWALSubscription { applyRunner.add(restore(task)) .handle((snap, e) -> fetchRunner.add(() -> { if (e != null) { - log.error( - "Failed to install snapshot\n{}", snap); + log.error("Failed to restore from snapshot\n{}", task.snapshot, e); return; } log.debug("Snapshot installed\n{}", snap); diff --git a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java index edb96f62b..b65f248fb 100644 --- a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java +++ b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java @@ -135,6 +135,7 @@ public void setup() { .storeOptions(kvRangeStoreOptions) .subBrokerManager(subBrokerMgr) .settingProvider(settingProvider) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); distServer = IDistServer.builder() .rpcServerBuilder(rpcServerBuilder) @@ -145,7 +146,7 @@ public void setup() { rpcServer = rpcServerBuilder.build(); rpcServer.start(); - await().until(() -> BoundaryUtil.isValidSplitSet(workerClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(workerClient.latestEffectiveRouter().keySet())); distClient.connState().filter(s -> s == IRPCClient.ConnState.READY).blockingFirst(); log.info("Setup finished, and start testing"); } diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java index 106d29007..975f0e1d1 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java @@ -235,10 +235,11 @@ public void setup() { .subBrokerManager(receiverManager) .settingProvider(settingProvider) .inlineFanoutThreshold(1) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); rpcServer = rpcServerBuilder.build(); rpcServer.start(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); log.info("Setup finished, and start testing"); } diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java index 7306ea6e3..bf560d9d8 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java +++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -26,15 +26,6 @@ import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; -import org.apache.bifromq.baserpc.client.IRPCClient; -import org.apache.bifromq.baserpc.server.IRPCServer; -import org.apache.bifromq.baserpc.server.RPCServerBuilder; -import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; -import org.apache.bifromq.plugin.eventcollector.IEventCollector; -import org.apache.bifromq.plugin.settingprovider.ISettingProvider; -import org.apache.bifromq.plugin.settingprovider.Setting; -import org.apache.bifromq.retain.client.IRetainClient; -import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; import java.lang.reflect.Method; import java.time.Duration; import java.util.concurrent.CompletableFuture; @@ -51,11 +42,20 @@ import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; import org.apache.bifromq.basekv.utils.BoundaryUtil; +import org.apache.bifromq.baserpc.client.IRPCClient; +import org.apache.bifromq.baserpc.server.IRPCServer; +import org.apache.bifromq.baserpc.server.RPCServerBuilder; +import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; import org.apache.bifromq.dist.client.IDistClient; import org.apache.bifromq.dist.client.MatchResult; import org.apache.bifromq.dist.client.UnmatchResult; import org.apache.bifromq.inbox.client.IInboxClient; import org.apache.bifromq.inbox.store.IInboxStore; +import org.apache.bifromq.plugin.eventcollector.IEventCollector; +import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; +import org.apache.bifromq.plugin.settingprovider.ISettingProvider; +import org.apache.bifromq.plugin.settingprovider.Setting; +import org.apache.bifromq.retain.client.IRetainClient; import org.apache.bifromq.sessiondict.client.ISessionDictClient; import org.mockito.Mock; import org.mockito.MockitoAnnotations; @@ -144,6 +144,7 @@ public void setup() { .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) .detachTimeout(Duration.ofSeconds(2)) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); inboxServer = IInboxServer.builder() .rpcServerBuilder(rpcServerBuilder) @@ -153,7 +154,7 @@ public void setup() { .build(); rpcServer = rpcServerBuilder.build(); rpcServer.start(); - await().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreClient.latestEffectiveRouter().keySet())); inboxClient.connState().filter(s -> s == IRPCClient.ConnState.READY).blockingFirst(); log.info("Setup finished, and start testing"); } diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java index 73f23877b..fdf2d27e4 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java @@ -33,15 +33,6 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.baserpc.client.IConnectable; -import org.apache.bifromq.baserpc.server.IRPCServer; -import org.apache.bifromq.baserpc.server.RPCServerBuilder; -import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; -import org.apache.bifromq.plugin.eventcollector.IEventCollector; -import org.apache.bifromq.plugin.settingprovider.ISettingProvider; -import org.apache.bifromq.plugin.settingprovider.Setting; -import org.apache.bifromq.retain.client.IRetainClient; -import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; import com.google.protobuf.ByteString; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Meter; @@ -87,6 +78,10 @@ import org.apache.bifromq.basekv.store.proto.RWCoProcInput; import org.apache.bifromq.basekv.store.proto.ReplyCode; import org.apache.bifromq.basekv.utils.BoundaryUtil; +import org.apache.bifromq.baserpc.client.IConnectable; +import org.apache.bifromq.baserpc.server.IRPCServer; +import org.apache.bifromq.baserpc.server.RPCServerBuilder; +import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; import org.apache.bifromq.dist.client.IDistClient; import org.apache.bifromq.inbox.client.IInboxClient; import org.apache.bifromq.inbox.storage.proto.BatchAttachRequest; @@ -117,6 +112,11 @@ import org.apache.bifromq.inbox.storage.proto.InsertResult; import org.apache.bifromq.inbox.storage.proto.Replica; import org.apache.bifromq.metrics.TenantMetric; +import org.apache.bifromq.plugin.eventcollector.IEventCollector; +import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; +import org.apache.bifromq.plugin.settingprovider.ISettingProvider; +import org.apache.bifromq.plugin.settingprovider.Setting; +import org.apache.bifromq.retain.client.IRetainClient; import org.apache.bifromq.sessiondict.client.ISessionDictClient; import org.apache.bifromq.type.ClientInfo; import org.apache.bifromq.type.Message; @@ -208,7 +208,7 @@ public void setup() throws IOException { rpcServer.start(); storeClient.connState().filter(connState -> connState == IConnectable.ConnState.READY).blockingFirst(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); log.info("Setup finished, and start testing"); } @@ -232,6 +232,7 @@ private void buildStoreServer() { .bgTaskExecutor(bgTaskExecutor) .detachTimeout(Duration.ofSeconds(1)) .gcInterval(Duration.ofSeconds(1)) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); rpcServer = rpcServerBuilder.build(); } diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java index 12b10a968..b4911cb85 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java @@ -59,7 +59,7 @@ public void gcJobAfterRestart() { InboxVersion inboxVersion = requestAttach(attachParams).get(0); restartStoreServer(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); when(sessionDictClient.exist(any())).thenReturn(CompletableFuture.completedFuture(OnlineCheckResult.NOT_EXISTS)); ArgumentCaptor deleteCaptor = ArgumentCaptor.forClass(DeleteRequest.class); verify(inboxClient, timeout(10000)).delete(deleteCaptor.capture()); diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java index 262bb628c..1609614d6 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.store; @@ -73,7 +73,7 @@ public void collectAfterRestart() { restartStoreServer(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); Gauge newSubCountGauge = getSubCountGauge(tenantId); Gauge newPSessionGauge = getPSessionGauge(tenantId); Gauge newPSessionSpaceGauge = getPSessionSpaceGauge(tenantId); diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java index 855fe427b..a1e593dc4 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java @@ -184,6 +184,7 @@ public final void setupClass() { .resourceThrottler(resourceThrottler) .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) + .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() .setDataEngineConfigurator(new InMemKVEngineConfigurator()) .setWalEngineConfigurator(new InMemKVEngineConfigurator())) @@ -214,6 +215,7 @@ public final void setupClass() { .retainStoreClient(retainStoreKVStoreClient) .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) + .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() .setDataEngineConfigurator(new InMemKVEngineConfigurator()) .setWalEngineConfigurator(new InMemKVEngineConfigurator())) @@ -243,6 +245,7 @@ public final void setupClass() { .distWorkerClient(distWorkerStoreClient) .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) + .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() .setDataEngineConfigurator(new InMemKVEngineConfigurator()) .setWalEngineConfigurator(new InMemKVEngineConfigurator())) @@ -299,9 +302,9 @@ public final void setupClass() { .filter(state -> state == IRPCClient.ConnState.READY) .firstElement() .blockingSubscribe(); - await().until(() -> BoundaryUtil.isValidSplitSet(distWorkerStoreClient.latestEffectiveRouter().keySet())); - await().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreKVStoreClient.latestEffectiveRouter().keySet())); - await().until(() -> BoundaryUtil.isValidSplitSet(retainStoreKVStoreClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(distWorkerStoreClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreKVStoreClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(retainStoreKVStoreClient.latestEffectiveRouter().keySet())); lenient().when(settingProvider.provide(any(), anyString())) .thenAnswer(invocation -> { Setting setting = invocation.getArgument(0); diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java index f751fbaf5..abdbe24ca 100644 --- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java +++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.retain.store; @@ -22,9 +22,9 @@ import static org.awaitility.Awaitility.await; import static org.testng.Assert.assertNotSame; -import org.apache.bifromq.basekv.utils.BoundaryUtil; import io.micrometer.core.instrument.Gauge; import java.time.Duration; +import org.apache.bifromq.basekv.utils.BoundaryUtil; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -47,7 +47,7 @@ public void testLoadMetadata() { Gauge retainCountGauge = getRetainCountGauge(tenantId); restartStoreServer(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); Gauge newSpaceUsageGauge = getSpaceUsageGauge(tenantId); Gauge newRetainCountGauge = getRetainCountGauge(tenantId); diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java index 926e78a08..4f792c2dd 100644 --- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java +++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.retain.store; @@ -28,6 +28,25 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.google.protobuf.ByteString; +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.Meter; +import io.micrometer.core.instrument.Metrics; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.Comparator; +import java.util.Objects; +import java.util.UUID; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecluster.AgentHostOptions; import org.apache.bifromq.basecluster.IAgentHost; import org.apache.bifromq.basecrdt.service.CRDTServiceOptions; @@ -69,25 +88,6 @@ import org.apache.bifromq.type.ClientInfo; import org.apache.bifromq.type.Message; import org.apache.bifromq.type.TopicMessage; -import com.google.protobuf.ByteString; -import io.micrometer.core.instrument.Gauge; -import io.micrometer.core.instrument.Meter; -import io.micrometer.core.instrument.Metrics; -import io.micrometer.core.instrument.simple.SimpleMeterRegistry; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.time.Duration; -import java.util.Comparator; -import java.util.Objects; -import java.util.UUID; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledThreadPoolExecutor; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicReference; -import lombok.extern.slf4j.Slf4j; import org.mockito.MockitoAnnotations; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -143,7 +143,7 @@ public void setup() throws IOException { .metaService(metaService).build(); buildStoreServer(); rpcServer.start(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); log.info("Setup finished, and start testing"); } @@ -158,6 +158,7 @@ private void buildStoreServer() { .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) .gcInterval(Duration.ofSeconds(60)) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); rpcServer = rpcServerBuilder.build(); } From 23f12e9cf21dff304a2c59072a81e101c3192c34 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Fri, 22 Aug 2025 11:52:24 +0800 Subject: [PATCH 11/20] 1. correctly handle the duplicate matchinfo in inbox ingestion package 2. correct the code format of Setting's file --- .../proto/inboxservice/InboxStoreCoProc.proto | 11 +- .../bifromq/inbox/server/InboxWriter.java | 91 +++++-- .../bifromq/inbox/server/InboxWriterTest.java | 19 +- .../bifromq/inbox/store/InboxStoreCoProc.java | 44 ++-- .../bifromq/inbox/store/InboxInsertTest.java | 232 ++++++++++++++++-- .../plugin/settingprovider/Setting.java | 12 +- 6 files changed, 334 insertions(+), 75 deletions(-) diff --git a/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto b/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto index 66fa081d3..83ce4f89d 100644 --- a/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto +++ b/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto @@ -196,9 +196,13 @@ message BatchFetchReply{ repeated Fetched result = 1; } +message MatchedRoute{ + string topicFilter = 1; // the matched topic filter + uint64 incarnation = 2; // route incarnation +} // insert won't change version & lastActive timestamp message SubMessagePack{ - map matchedTopicFilters = 1; // key: topicFilter, value: route incarnation + repeated MatchedRoute matchedRoute = 1; commontype.TopicMessagePack messages = 2; } @@ -219,9 +223,8 @@ message InsertResult{ NO_INBOX = 1; } message SubStatus{ - string topicFilter = 1; - uint64 incarnation = 2; - bool rejected = 3; + MatchedRoute matchedRoute = 1; + bool rejected = 2; // true if the route is outdated } Code code = 1; repeated SubStatus result = 2; diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java index 941a3865c..77c0d4280 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -22,6 +22,15 @@ import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap; import static org.apache.bifromq.plugin.subbroker.TypeUtil.toResult; +import java.time.Duration; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.base.util.AsyncRetry; import org.apache.bifromq.base.util.exception.RetryTimeoutException; import org.apache.bifromq.basekv.client.exception.BadVersionException; @@ -34,6 +43,7 @@ import org.apache.bifromq.inbox.server.scheduler.IInboxInsertScheduler; import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; import org.apache.bifromq.inbox.storage.proto.SubMessagePack; import org.apache.bifromq.plugin.subbroker.DeliveryPack; import org.apache.bifromq.plugin.subbroker.DeliveryReply; @@ -41,14 +51,6 @@ import org.apache.bifromq.sysprops.props.DataPlaneMaxBurstLatencyMillis; import org.apache.bifromq.type.MatchInfo; import org.apache.bifromq.type.TopicMessagePack; -import org.apache.bifromq.util.TopicUtil; -import java.time.Duration; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import lombok.extern.slf4j.Slf4j; @Slf4j class InboxWriter implements InboxWriterPipeline.ISendRequestHandler { @@ -62,7 +64,7 @@ class InboxWriter implements InboxWriterPipeline.ISendRequestHandler { @Override public CompletableFuture handle(SendRequest request) { - Map> matchInfosByInbox = new HashMap<>(); + Map> matchInfosByInbox = new HashMap<>(); Map> subMsgPacksByInbox = new HashMap<>(); // break DeliveryPack into SubMessagePack by each TenantInboxInstance for (String tenantId : request.getRequest().getPackageMap().keySet()) { @@ -71,11 +73,15 @@ public CompletableFuture handle(SendRequest request) { Map subMsgPackByInbox = new HashMap<>(); for (MatchInfo matchInfo : pack.getMatchInfoList()) { TenantInboxInstance tenantInboxInstance = TenantInboxInstance.from(tenantId, matchInfo); - matchInfosByInbox.computeIfAbsent(tenantInboxInstance, k -> new LinkedList<>()).add(matchInfo); + MatchedRoute matchedRoute = MatchedRoute.newBuilder() + .setTopicFilter(matchInfo.getMatcher().getMqttTopicFilter()) + .setIncarnation(matchInfo.getIncarnation()) + .build(); + matchInfosByInbox.computeIfAbsent(tenantInboxInstance, k -> new HashMap<>()) + .put(matchedRoute, matchInfo); subMsgPackByInbox.computeIfAbsent(tenantInboxInstance, k -> SubMessagePack.newBuilder().setMessages(topicMessagePack)) - .putMatchedTopicFilters(matchInfo.getMatcher().getMqttTopicFilter(), - matchInfo.getIncarnation()); + .addMatchedRoute(matchedRoute); } for (TenantInboxInstance tenantInboxInstance : subMsgPackByInbox.keySet()) { subMsgPacksByInbox.computeIfAbsent(tenantInboxInstance, k -> new LinkedList<>()) @@ -127,20 +133,21 @@ public CompletableFuture handle(SendRequest request) { Map> tenantMatchResultMap = new HashMap<>(); int i = 0; for (TenantInboxInstance tenantInboxInstance : subMsgPacksByInbox.keySet()) { - String receiverId = tenantInboxInstance.receiverId(); + Map matchedRoutesMap = matchInfosByInbox.get(tenantInboxInstance); InsertResult result = replyFutures.get(i++).join(); Map matchResultMap = tenantMatchResultMap.computeIfAbsent(tenantInboxInstance.tenantId(), k -> new HashMap<>()); switch (result.getCode()) { - case OK -> result.getResultList().forEach(insertionResult -> { - DeliveryResult.Code code = - insertionResult.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK; - matchResultMap.putIfAbsent(MatchInfo.newBuilder().setReceiverId(receiverId) - .setMatcher(TopicUtil.from(insertionResult.getTopicFilter())) - .setIncarnation(insertionResult.getIncarnation()).build(), code); - }); + case OK -> { + Function resultFinder = + getFinalResultFinder(result.getResultList()); + for (MatchedRoute matchedRoute : matchedRoutesMap.keySet()) { + matchResultMap.putIfAbsent(matchedRoutesMap.get(matchedRoute), + resultFinder.apply(matchedRoute)); + } + } case NO_INBOX -> { - for (MatchInfo matchInfo : matchInfosByInbox.get(tenantInboxInstance)) { + for (MatchInfo matchInfo : matchedRoutesMap.values()) { matchResultMap.putIfAbsent(matchInfo, DeliveryResult.Code.NO_RECEIVER); } } @@ -155,4 +162,44 @@ public CompletableFuture handle(SendRequest request) { .build()).build(); })); } + + private Function getFinalResultFinder(List subStatuses) { + Function resultFinder = getResultFinder(subStatuses); + return matchedRoute -> { + DeliveryResult.Code code = resultFinder.apply(matchedRoute); + if (code == null) { + // incompleted result from coproc + log.warn("MatchedRoute {} is missing in result", matchedRoute); + return DeliveryResult.Code.NO_SUB; + } + return code; + }; + } + + private Function getResultFinder( + List subStatuses) { + if (subStatuses.size() == 1) { + InsertResult.SubStatus onlyStatus = subStatuses.get(0); + return matchedRoute -> { + if (matchedRoute.equals(onlyStatus.getMatchedRoute())) { + return onlyStatus.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK; + } + return null; + }; + } else if (subStatuses.size() < 10) { + return matchedRoute -> { + for (InsertResult.SubStatus status : subStatuses) { + if (status.getMatchedRoute().equals(matchedRoute)) { + return status.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK; + } + } + return null; + }; + } else { + Map resultMap = subStatuses.stream() + .collect(Collectors.toMap(InsertResult.SubStatus::getMatchedRoute, + e -> e.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK)); + return resultMap::get; + } + } } diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java index 1d811d3e0..67a51d029 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java +++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -25,16 +25,17 @@ import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import java.util.concurrent.CompletableFuture; +import lombok.SneakyThrows; import org.apache.bifromq.inbox.rpc.proto.SendReply; import org.apache.bifromq.inbox.rpc.proto.SendRequest; import org.apache.bifromq.inbox.server.scheduler.IInboxInsertScheduler; import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; import org.apache.bifromq.plugin.subbroker.DeliveryReply; import org.apache.bifromq.plugin.subbroker.DeliveryResult; import org.apache.bifromq.plugin.subbroker.DeliveryResults; -import java.util.concurrent.CompletableFuture; -import lombok.SneakyThrows; import org.mockito.Mock; import org.mockito.MockitoAnnotations; import org.testng.annotations.AfterMethod; @@ -86,8 +87,10 @@ public void insertScheduleRejected() { InsertResult.newBuilder() .addResult(InsertResult.SubStatus.newBuilder() .setRejected(true) - .setIncarnation(1L) - .setTopicFilter("/foo/+") + .setMatchedRoute(MatchedRoute.newBuilder() + .setIncarnation(1L) + .setTopicFilter("/foo/+") + .build()) .build()) .setCode(InsertResult.Code.OK) .build())); @@ -103,8 +106,10 @@ public void insertScheduleOk() { .setCode(InsertResult.Code.OK) .addResult(InsertResult.SubStatus.newBuilder() .setRejected(false) - .setTopicFilter("/foo/+") - .setIncarnation(1L) + .setMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter("/foo/+") + .setIncarnation(1L) + .build()) .build()) .build())); SendRequest request = sendRequest(); diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java index 3a019d80b..383e06acb 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java +++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.store; @@ -116,6 +116,7 @@ import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; import org.apache.bifromq.inbox.storage.proto.LWT; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; import org.apache.bifromq.inbox.storage.proto.SubMessagePack; import org.apache.bifromq.inbox.store.delay.DelayTaskRunner; import org.apache.bifromq.inbox.store.delay.ExpireInboxTask; @@ -966,39 +967,48 @@ private Runnable batchInsert(BatchInsertRequest request, List bufferMsgList = new ArrayList<>(); Set insertResults = new HashSet<>(); for (SubMessagePack messagePack : params.getMessagePackList()) { - Map matchedTopicFilters = messagePack.getMatchedTopicFiltersMap(); Map qos0TopicFilters = new HashMap<>(); Map qos1TopicFilters = new HashMap<>(); Map qos2TopicFilters = new HashMap<>(); TopicMessagePack topicMsgPack = messagePack.getMessages(); - for (String matchedTopicFilter : matchedTopicFilters.keySet()) { - long matchedIncarnation = matchedTopicFilters.get(matchedTopicFilter); - TopicFilterOption tfOption = metadata.getTopicFiltersMap().get(matchedTopicFilter); + for (MatchedRoute matchedRoute : messagePack.getMatchedRouteList()) { + long matchedIncarnation = matchedRoute.getIncarnation(); + TopicFilterOption tfOption = metadata.getTopicFiltersMap().get(matchedRoute.getTopicFilter()); if (tfOption == null) { - insertResults.add( - InsertResult.SubStatus.newBuilder().setTopicFilter(matchedTopicFilter) - .setIncarnation(matchedIncarnation).setRejected(true).build()); + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(true) + .build()); } else { if (tfOption.getIncarnation() > matchedIncarnation) { // messages from old sub incarnation log.debug("Receive message from previous subscription: topicFilter={}, inc={}, prevInc={}", - matchedTopicFilter, tfOption.getIncarnation(), matchedIncarnation); + matchedRoute, tfOption.getIncarnation(), matchedIncarnation); + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(true) + .build()); + } else { + // messages from current incarnation + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(false) + .build()); } switch (tfOption.getQos()) { - case AT_MOST_ONCE -> qos0TopicFilters.put(matchedTopicFilter, tfOption); - case AT_LEAST_ONCE -> qos1TopicFilters.put(matchedTopicFilter, tfOption); - case EXACTLY_ONCE -> qos2TopicFilters.put(matchedTopicFilter, tfOption); + case AT_MOST_ONCE -> qos0TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); + case AT_LEAST_ONCE -> qos1TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); + case EXACTLY_ONCE -> qos2TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); default -> { // never happens } } - insertResults.add(InsertResult.SubStatus.newBuilder() - .setTopicFilter(matchedTopicFilter) - .setIncarnation(matchedIncarnation) - .setRejected(false) - .build()); } } + if (qos0TopicFilters.isEmpty() && qos1TopicFilters.isEmpty() && qos2TopicFilters.isEmpty()) { + // no matched topic filter, skip this message pack + continue; + } String topic = topicMsgPack.getTopic(); for (TopicMessagePack.PublisherPack publisherPack : topicMsgPack.getMessageList()) { for (Message message : publisherPack.getMessageList()) { diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java index 2d08a10a0..e70fe507c 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.store; @@ -39,6 +39,7 @@ import org.apache.bifromq.inbox.storage.proto.InboxVersion; import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; import org.apache.bifromq.inbox.storage.proto.SubMessagePack; import org.apache.bifromq.plugin.eventcollector.inboxservice.Overflowed; import org.apache.bifromq.type.ClientInfo; @@ -61,7 +62,10 @@ public void insertNoInbox() { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 1L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(1L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(message(AT_MOST_ONCE, "hello")) @@ -86,6 +90,147 @@ public void commitNoInbox() { assertEquals(commitCode, BatchCommitReply.Code.NO_INBOX); } + + @Test(groups = "integration") + public void insertWithUnmatchedTopicFilterRejected() { + long now = 0; + String tenantId = "tenantId-" + System.nanoTime(); + String inboxId = "inboxId-" + System.nanoTime(); + long incarnation = System.nanoTime(); + // do not create any subscription, so all matched topic filters will be unmatched + ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build(); + BatchAttachRequest.Params attachParams = BatchAttachRequest.Params.newBuilder() + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setExpirySeconds(2) + .setLimit(10) + .setClient(client) + .setNow(now) + .build(); + requestAttach(attachParams).get(0); + + String unmatchedTF = "/not/subscribed"; + TopicMessagePack.PublisherPack msg = message(QoS.AT_MOST_ONCE, "hello-unmatched"); + + InsertResult insertResult = requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(unmatchedTF) + .setIncarnation(1L) + .build()) + .setMessages(TopicMessagePack.newBuilder() + .setTopic(unmatchedTF) + .addMessage(msg) + .build()) + .build()) + .build()).get(0); + + // insert is ignored because no subscription matches the topic filter + assertEquals(insertResult.getCode(), InsertResult.Code.OK); + assertEquals(insertResult.getResultCount(), 1); + assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), unmatchedTF); + assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L); + assertTrue(insertResult.getResult(0).getRejected()); + + // no messages should be fetched + Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setMaxFetch(10) + .build()).get(0); + assertEquals(fetched.getQos0MsgCount(), 0); + assertEquals(fetched.getSendBufferMsgCount(), 0); + } + + @Test(groups = "integration") + public void insertWithOldAndCurrentIncarnationMixed() { + long now = 0; + String tenantId = "tenantId-" + System.nanoTime(); + String inboxId = "inboxId-" + System.nanoTime(); + long incarnation = System.nanoTime(); + String topicFilter = "/a/b/c"; + + ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build(); + InboxVersion inboxVersion = requestAttach(BatchAttachRequest.Params.newBuilder() + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setExpirySeconds(2) + .setLimit(10) + .setClient(client) + .setNow(now) + .build()).get(0); + + requestSub(BatchSubRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(inboxVersion) + .setTopicFilter(topicFilter) + .setOption(TopicFilterOption.newBuilder() + .setIncarnation(1L) + .setQos(QoS.AT_MOST_ONCE) + .build()) + .setMaxTopicFilters(100) + .setNow(now) + .build()); + + TopicMessagePack.PublisherPack msg1 = message(QoS.AT_MOST_ONCE, "keep-me-1"); + TopicMessagePack.PublisherPack msg2 = message(QoS.AT_MOST_ONCE, "keep-me-2"); + + // same topicFilter, same package with 2 matched: one old (0), one current (1) + InsertResult insertResult = requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) // old -> rejected=true + .build()) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(1L) // matched -> rejected=false + .build()) + .setMessages(TopicMessagePack.newBuilder() + .setTopic(topicFilter) + .addMessage(msg1) + .addMessage(msg2) + .build()) + .build()) + .build()).get(0); + + assertEquals(insertResult.getCode(), InsertResult.Code.OK); + boolean oldRejected = false; + boolean currAccepted = false; + for (InsertResult.SubStatus s : insertResult.getResultList()) { + if (s.getMatchedRoute().getTopicFilter().equals(topicFilter) + && s.getMatchedRoute().getIncarnation() == 0L) { + assertTrue(s.getRejected()); + oldRejected = true; + } + if (s.getMatchedRoute().getTopicFilter().equals(topicFilter) + && s.getMatchedRoute().getIncarnation() == 1L) { + assertFalse(s.getRejected()); + currAccepted = true; + } + } + assertTrue(oldRejected && currAccepted); + + Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setMaxFetch(10) + .build()).get(0); + + assertEquals(fetched.getQos0MsgCount(), 2); + assertEquals(fetched.getQos0Msg(0).getMsg().getMessage(), msg1.getMessage(0)); + assertEquals(fetched.getQos0Msg(1).getMsg().getMessage(), msg2.getMessage(0)); + } + protected void fetchWithoutStartAfter(QoS qos) { long now = 0; String tenantId = "tenantId-" + System.nanoTime(); @@ -120,7 +265,10 @@ protected void fetchWithoutStartAfter(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 1L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(1L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -129,8 +277,8 @@ protected void fetchWithoutStartAfter(QoS qos) { .build()) .build()).get(0); assertEquals(insertResult.getCode(), InsertResult.Code.OK); - assertEquals(insertResult.getResult(0).getTopicFilter(), topicFilter); - assertEquals(insertResult.getResult(0).getIncarnation(), 1L); + assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), topicFilter); + assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L); Fetched fetched = requestFetch( BatchFetchRequest.Params.newBuilder() @@ -190,7 +338,10 @@ protected void fetchWithMaxLimit(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 1L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(1L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -199,8 +350,8 @@ protected void fetchWithMaxLimit(QoS qos) { .build()) .build()).get(0); assertEquals(insertResult.getCode(), InsertResult.Code.OK); - assertEquals(insertResult.getResult(0).getTopicFilter(), topicFilter); - assertEquals(insertResult.getResult(0).getIncarnation(), 1L); + assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), topicFilter); + assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L); Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() .setTenantId(tenantId) @@ -270,7 +421,10 @@ protected void fetchWithStartAfter(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -284,7 +438,10 @@ protected void fetchWithStartAfter(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg4) @@ -386,7 +543,10 @@ protected void commit(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -504,7 +664,10 @@ protected void commitAll(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -518,7 +681,10 @@ protected void commitAll(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg4) @@ -587,7 +753,10 @@ protected void insertDropOldest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) @@ -599,7 +768,10 @@ protected void insertDropOldest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -611,7 +783,10 @@ protected void insertDropOldest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg2) @@ -646,7 +821,10 @@ protected void insertDropOldest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) @@ -715,7 +893,10 @@ protected void insertDropYoungest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) @@ -727,7 +908,10 @@ protected void insertDropYoungest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -755,7 +939,10 @@ protected void insertDropYoungest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) @@ -824,7 +1011,10 @@ public void insertQoS012() { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) diff --git a/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java b/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java index a59619185..a6af28b2b 100644 --- a/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java +++ b/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java @@ -28,10 +28,14 @@ */ @Slf4j public enum Setting { - MQTT3Enabled(Boolean.class, val -> true, true), MQTT4Enabled(Boolean.class, val -> true, true), - MQTT5Enabled(Boolean.class, val -> true, true), DebugModeEnabled(Boolean.class, val -> true, false), - ForceTransient(Boolean.class, val -> true, false), ByPassPermCheckError(Boolean.class, val -> true, true), - PayloadFormatValidationEnabled(Boolean.class, val -> true, true), RetainEnabled(Boolean.class, val -> true, true), + MQTT3Enabled(Boolean.class, val -> true, true), + MQTT4Enabled(Boolean.class, val -> true, true), + MQTT5Enabled(Boolean.class, val -> true, true), + DebugModeEnabled(Boolean.class, val -> true, false), + ForceTransient(Boolean.class, val -> true, false), + ByPassPermCheckError(Boolean.class, val -> true, true), + PayloadFormatValidationEnabled(Boolean.class, val -> true, true), + RetainEnabled(Boolean.class, val -> true, true), WildcardSubscriptionEnabled(Boolean.class, val -> true, true), SubscriptionIdentifierEnabled(Boolean.class, val -> true, true), SharedSubscriptionEnabled(Boolean.class, val -> true, true), From 2fc40185977c834221a1482a4ce0f8cef2fe0deb Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Wed, 27 Aug 2025 18:30:44 +0800 Subject: [PATCH 12/20] Fixed an unbalanced issue of ReplicaCntBalancer --- .../balance/impl/ReplicaCntBalancer.java | 114 ++++++++--------- .../balance/impl/ReplicaCntBalancerTest.java | 116 ++++++++++++++++++ 2 files changed, 173 insertions(+), 57 deletions(-) diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java index d9014db30..7fabc17db 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java @@ -335,20 +335,11 @@ record StoreVoterCount(String storeId, int voterCount) {} double totalVoters = storeVoterCount.values().stream().mapToInt(Integer::intValue).sum(); double targetVotersPerStore = liveStores.isEmpty() ? 0 : totalVoters / liveStores.size(); - int maxVotersPerStore = (int) Math.ceil(targetVotersPerStore); int minVotersPerStore = (int) Math.floor(targetVotersPerStore); int globalMax = storeVoterCount.values().stream().mapToInt(Integer::intValue).max().orElse(0); int globalMin = storeVoterCount.values().stream().mapToInt(Integer::intValue).min().orElse(0); if (globalMax - globalMin <= 1) { - for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { - ClusterConfig cc = entry.getValue().descriptor().getConfig(); - Set voters = new HashSet<>(cc.getVotersList()); - Set learners = new HashSet<>(cc.getLearnersList()); - sanitize(voters, liveStores); - sanitize(learners, liveStores); - expectedRangeLayout.put(entry.getKey(), buildConfig(voters, learners)); - } return false; } @@ -363,41 +354,43 @@ record StoreVoterCount(String storeId, int voterCount) {} sanitize(learners, liveStores); voterSorted.retainAll(liveStores); - for (String voter : new ArrayList<>(voterSorted)) { - if (storeVoterCount.getOrDefault(voter, 0) >= maxVotersPerStore) { - for (StoreVoterCount under : storeVoterCountSorted) { - if (storeVoterCount.getOrDefault(under.storeId, 0) <= minVotersPerStore - && !voterSorted.contains(under.storeId) - && !learners.contains(under.storeId)) { - // move voter -> underloaded - Set newVoters = new HashSet<>(voterSorted); - newVoters.remove(voter); - newVoters.add(under.storeId); - - expectedRangeLayout.put(boundary, buildConfig(newVoters, learners)); - meetingGoal = true; - break; + if (!meetingGoal) { + meet: + for (String voter : new ArrayList<>(voterSorted)) { + int voters = storeVoterCount.getOrDefault(voter, 0); + if (voters == globalMax) { + for (StoreVoterCount under : storeVoterCountSorted) { + if (storeVoterCount.getOrDefault(under.storeId, 0) <= minVotersPerStore + && !voterSorted.contains(under.storeId) + && !learners.contains(under.storeId)) { + // move voter -> underloaded + Set newVoters = new HashSet<>(voterSorted); + newVoters.remove(voter); + newVoters.add(under.storeId); + + expectedRangeLayout.put(boundary, buildConfig(newVoters, learners)); + meetingGoal = true; + break meet; + } } } } - if (meetingGoal) { - break; + if (!meetingGoal) { + expectedRangeLayout.put(boundary, buildConfig(voterSorted, learners)); } - } - - if (!meetingGoal) { - expectedRangeLayout.put(boundary, buildConfig(voterSorted, learners)); } else { - break; + expectedRangeLayout.put(boundary, buildConfig(voterSorted, learners)); } } - + if (!meetingGoal) { + expectedRangeLayout.clear(); + } return meetingGoal; } - private boolean balanceLearnerCount(Map landscape, - EffectiveRoute effectiveRoute, - Map expectedRangeLayout) { + private void balanceLearnerCount(Map landscape, + EffectiveRoute effectiveRoute, + Map expectedRangeLayout) { final Set liveStores = landscape.keySet(); Map storeLearnerCount = new HashMap<>(); @@ -417,7 +410,13 @@ record StoreLearnerCount(String storeId, int learnerCount) {} double totalLearners = storeLearnerCount.values().stream().mapToInt(Integer::intValue).sum(); double targetLearnersPerStore = liveStores.isEmpty() ? 0 : totalLearners / liveStores.size(); - int maxLearnersPerStore = (int) Math.ceil(targetLearnersPerStore); + int minLearnersPerStore = (int) Math.floor(targetLearnersPerStore); + + int globalMax = storeLearnerCount.values().stream().mapToInt(Integer::intValue).max().orElse(0); + int globalMin = storeLearnerCount.values().stream().mapToInt(Integer::intValue).min().orElse(0); + if (globalMax - globalMin <= 1) { + return; + } boolean meetingGoal = false; for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { @@ -430,34 +429,35 @@ record StoreLearnerCount(String storeId, int learnerCount) {} sanitize(voters, liveStores); learnerSorted.retainAll(liveStores); - for (String learner : new ArrayList<>(learnerSorted)) { - if (storeLearnerCount.getOrDefault(learner, 0) > maxLearnersPerStore) { - for (StoreLearnerCount under : storeLearnerCountSorted) { - if (storeLearnerCount.getOrDefault(under.storeId, 0) < maxLearnersPerStore - && !voters.contains(under.storeId) - && !learnerSorted.contains(under.storeId)) { - Set newLearners = new HashSet<>(learnerSorted); - newLearners.remove(learner); - newLearners.add(under.storeId); - - expectedRangeLayout.put(boundary, buildConfig(voters, newLearners)); - meetingGoal = true; - break; + if (!meetingGoal) { + meet: + for (String learner : new ArrayList<>(learnerSorted)) { + int learners = storeLearnerCount.getOrDefault(learner, 0); + if (learners == globalMax) { + for (StoreLearnerCount under : storeLearnerCountSorted) { + if (storeLearnerCount.getOrDefault(under.storeId, 0) < minLearnersPerStore + && !voters.contains(under.storeId) + && !learnerSorted.contains(under.storeId)) { + Set newLearners = new HashSet<>(learnerSorted); + newLearners.remove(learner); + newLearners.add(under.storeId); + + expectedRangeLayout.put(boundary, buildConfig(voters, newLearners)); + meetingGoal = true; + break meet; + } } } } - if (meetingGoal) { - break; + if (!meetingGoal) { + expectedRangeLayout.put(boundary, buildConfig(voters, learnerSorted)); } - } - - if (!meetingGoal) { - expectedRangeLayout.put(boundary, buildConfig(voters, learnerSorted)); } else { - break; + expectedRangeLayout.put(boundary, buildConfig(voters, learnerSorted)); } } - - return meetingGoal; + if (!meetingGoal) { + expectedRangeLayout.clear(); + } } } diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java index 93a238d7a..f5f7e1a55 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java @@ -795,4 +795,120 @@ public void learnersMinusOneWithAllLiveAsVotersMakesLearnersEmpty() { assertEquals(cmd.getVoters(), Set.of("s1", "s2", "s3")); assertTrue(cmd.getLearners().isEmpty()); } + + @Test + public void balanceVoterCountPrefersZeroCountStoreFirst() { + KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeId r3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build(); + KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() + .setId(r1) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) + .build(); + KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() + .setId(r2) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("m")) + .setEndKey(ByteString.copyFromUtf8("z")) + .build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) + .build(); + + KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build(); + KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB") + .addRanges(KVRangeDescriptor.newBuilder() + .setId(r3) + .setVer(1).setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("z")) + .build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sB").build()) + .build()) + .build(); + KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build(); + + balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 0); + balancer.update(Set.of(sA, sB, sC)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertTrue(cmd.getVoters().contains("sC")); + assertFalse(cmd.getVoters().contains("sA")); + } + + @Test + public void balanceVoterCountDoesOnlyOneChangePerRound() { + KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() + .setId(r1) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) + .build(); + KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() + .setId(r2) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) + .build(); + + KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build(); + KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB").build(); + KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build(); + + balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 0); + balancer.update(Set.of(sA, sB, sC)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + assertTrue(cmd.getKvRangeId().equals(r1) || cmd.getKvRangeId().equals(r2)); + } + + @Test + public void balanceVoterCountSkipsTargetsAlreadyInVotersOrLearners() { + KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() + .setId(r1).setVer(1).setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build()) + .setConfig(ClusterConfig.newBuilder() + .addVoters("sA") + .addLearners("sB") + .build()) + .build(); + + KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() + .setId(KVRangeId.newBuilder().setEpoch(1).setId(2).build()) + .setVer(1).setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) + .setConfig(ClusterConfig.newBuilder() + .addVoters("sA") + .addLearners("sB") + .build()) + .build(); + + KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build(); + KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB").build(); + KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build(); + + balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 1); + balancer.update(Set.of(sA, sB, sC)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertTrue(cmd.getVoters().contains("sC")); + assertFalse(cmd.getVoters().contains("sB")); + } } From 4e65e0f66e534a4ba3f9b73ef6f8dc7e6b2e1fdb Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Wed, 27 Aug 2025 21:39:25 +0800 Subject: [PATCH 13/20] CRDT AntiEntropy Improvements: 1. Reset `resendCount` on ACK to avoid inflated backoff and spurious probe resets 2. Continue anti-entropy after ACK when needed to drain remaining deltas. 3. Leverage late or unmatched ACK whenever possible --- base-crdt/base-crdt-service/pom.xml | 4 + .../bifromq/basecrdt/store/AntiEntropy.java | 58 +++-- .../store/AntiEntropyResilienceTest.java | 204 ++++++++++++++++++ 3 files changed, 250 insertions(+), 16 deletions(-) create mode 100644 base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java diff --git a/base-crdt/base-crdt-service/pom.xml b/base-crdt/base-crdt-service/pom.xml index f0b2351ab..2dd7cf2df 100644 --- a/base-crdt/base-crdt-service/pom.xml +++ b/base-crdt/base-crdt-service/pom.xml @@ -47,6 +47,10 @@ slf4j-api + + org.awaitility + awaitility + org.apache.logging.log4j log4j-api diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java index 1ddfea549..f2d0c6468 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java @@ -65,6 +65,9 @@ final class AntiEntropy { private long currentNeighborVer; private long currentInflationTs; private DeltaMessage currentDelta = null; + // track if the last sent delta contains replacements (i.e., real diff), + // so that after ACK we can proactively continue to drain remaining deltas + private boolean lastSentHasReplacement = false; AntiEntropy(String storeId, ByteString localAddr, @@ -110,31 +113,51 @@ void updateObservedNeighborHistory(long ver, } void handleAck(AckMessage ack) { - if (canceled.get() || !running.get()) { + if (canceled.get()) { return; } synchronized (this) { - if (!running.get() || currentDelta == null) { - return; - } - if (ack.getSeqNo() != currentDelta.getSeqNo()) { + // Case 1: Matched ACK for in-flight delta + if (running.get() && currentDelta != null && ack.getSeqNo() == currentDelta.getSeqNo()) { + // currentDelta has been ack'ed + currentDelta = null; + if (resendTask != null) { + resendTask.cancel(false); + } + // reset resend counter after a successful ack to avoid inflated backoff + resendCount = 0; + if (ack.getVer() > neighborVer) { + // got newer neighbor's history + neighborVer = ack.getVer(); + neighborLatticeIndex = to(ack.getLatticeEventsList()); + neighborHistoryIndex = to(ack.getHistoryEventsList()); + } + running.set(false); + // Proactively continue if: + // - probe success (currentNeighborVer==0), or + // - local inflation happened, or + // - neighbor's version advanced since we computed delta, or + // - we just sent a batch of replacements and may have more to drain + if (currentNeighborVer == 0 + || lastInflationTs != currentInflationTs + || ack.getVer() > currentNeighborVer + || lastSentHasReplacement) { + scheduleRun(); + } + // clear the flag after scheduling decision + lastSentHasReplacement = false; return; } - // currentDelta has been ack'ed - currentDelta = null; - if (resendTask != null) { - resendTask.cancel(false); - } + + // Case 2: Late or unmatched ACK. Use it to advance neighbor index if it's newer. if (ack.getVer() > neighborVer) { - // got newer neighbor's history neighborVer = ack.getVer(); neighborLatticeIndex = to(ack.getLatticeEventsList()); neighborHistoryIndex = to(ack.getHistoryEventsList()); - } - running.set(false); - // if there are new inflation happened or probe success, restart the task - if (currentNeighborVer == 0 || lastInflationTs != currentInflationTs) { - scheduleRun(); + // try schedule a run if we are not currently running + if (!running.get()) { + scheduleRun(); + } } } } @@ -180,6 +203,8 @@ private void run() { .addAllHistoryEvents(to(crdtInflater.historyEvents())) .setVer(HLC.INST.get()) .build(); + lastSentHasReplacement = false; + deltaMsgBytesCounter.increment(currentDelta.getSerializedSize()); send(currentDelta); } else { // Calculate delta @@ -200,6 +225,7 @@ private void run() { .addAllHistoryEvents(to(crdtInflater.historyEvents())) .setVer(HLC.INST.get()) .build(); + lastSentHasReplacement = true; send(currentDelta); } else { currentDelta = null; diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java new file mode 100644 index 000000000..5d414a735 --- /dev/null +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basecrdt.store; + +import static org.awaitility.Awaitility.await; + +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import io.reactivex.rxjava3.subjects.PublishSubject; +import io.reactivex.rxjava3.subjects.Subject; +import java.time.Duration; +import java.util.Collections; +import org.apache.bifromq.basecrdt.core.api.CRDTURI; +import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; +import org.apache.bifromq.basecrdt.core.api.IMVReg; +import org.apache.bifromq.basecrdt.core.api.IORMap; +import org.apache.bifromq.basecrdt.core.api.MVRegOperation; +import org.apache.bifromq.basecrdt.core.api.ORMapOperation; +import org.apache.bifromq.basecrdt.proto.Replica; +import org.apache.bifromq.basecrdt.store.compressor.GzipCompressor; +import org.apache.bifromq.basecrdt.store.proto.CRDTStoreMessage; +import org.apache.bifromq.basecrdt.store.proto.MessagePayload; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.Test; + +public class AntiEntropyResilienceTest { + private ICRDTStore storeA; + private ICRDTStore storeB; + private Subject chAB; + private Subject chBA; + + @AfterMethod(alwaysRun = true) + public void teardown() { + if (storeA != null) { + storeA.stop(); + storeA = null; + } + if (storeB != null) { + storeB.stop(); + storeB = null; + } + } + + @Test(groups = "integration") + public void testConvergeWithDroppedAckOnce() { + CRDTStoreOptions optsA = CRDTStoreOptions.builder() + .inflationInterval(Duration.ofMillis(50)) + .maxEventsInDelta(16) + .build(); + CRDTStoreOptions optsB = CRDTStoreOptions.builder() + .inflationInterval(Duration.ofMillis(50)) + .maxEventsInDelta(16) + .build(); + storeA = ICRDTStore.newInstance(optsA); + storeB = ICRDTStore.newInstance(optsB); + + chAB = PublishSubject.create().toSerialized(); + chBA = PublishSubject.create().toSerialized(); + + // Interpose B->A path to drop the first ACK intentionally to exercise resend/late-ack path + GzipCompressor compressor = new GzipCompressor(); + final boolean[] firstAckDropped = {false}; + + // Start stores with the interposed channels + storeA.start(chBA); + storeB.start(chAB + .flatMap(msg -> { + // inspect payload; if it's ACK and first time, drop it once + MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg); + if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.ACK && !firstAckDropped[0]) { + firstAckDropped[0] = true; + // drop this ack + return Observable.empty(); + } + return Observable.just(msg); + })); + + storeA.storeMessages() + .observeOn(Schedulers.single()) + .subscribe(chAB::onNext); + storeB.storeMessages() + .observeOn(Schedulers.single()) + .subscribe(chBA::onNext); + + String uri = CRDTURI.toURI(CausalCRDTType.ormap, "test"); + // Build replicas + Replica rA = ReplicaIdGenerator.generate(uri); + Replica rB = ReplicaIdGenerator.generate(uri); + ByteString addrA = ByteString.copyFromUtf8("A"); + ByteString addrB = ByteString.copyFromUtf8("B"); + + // Host replicas + IORMap ormapA = storeA.host(rA, addrA); + IORMap ormapB = storeB.host(rB, addrB); + + // Join neighbors + storeA.join(rA, Collections.singleton(addrB)); + storeB.join(rB, Collections.singleton(addrA)); + + // Write a value from A + ByteString key = ByteString.copyFromUtf8("k"); + ByteString val = ByteString.copyFromUtf8("v1"); + ormapA.execute(ORMapOperation.update(key).with(MVRegOperation.write(val))).join(); + + await().until(() -> { + IMVReg regB = ormapB.getMVReg(key); + ByteString read = Sets.newHashSet(regB.read()).stream().findFirst().orElse(ByteString.EMPTY); + return val.equals(read); + }); + } + + @Test(groups = "integration") + public void testConvergeWithLateUnmatchedAck() { + CRDTStoreOptions optsC = CRDTStoreOptions.builder() + .inflationInterval(Duration.ofMillis(50)) + .maxEventsInDelta(16) + .build(); + CRDTStoreOptions optsD = CRDTStoreOptions.builder() + .inflationInterval(Duration.ofMillis(50)) + .maxEventsInDelta(16) + .build(); + ICRDTStore storeC = ICRDTStore.newInstance(optsC); + ICRDTStore storeD = ICRDTStore.newInstance(optsD); + + Subject cToD = PublishSubject.create().toSerialized(); + Subject dToC = PublishSubject.create().toSerialized(); + + GzipCompressor compressor = new GzipCompressor(); + final CRDTStoreMessage[] delayedAck = {null}; + final int[] deltaCountFromC = {0}; + + // Wire inbound with logic: buffer first ACK from D->C, only deliver after second DELTA from C + storeC.start(dToC + .flatMap(msg -> { + MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg); + if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.ACK && delayedAck[0] == null) { + delayedAck[0] = msg; // buffer first ACK + return Observable.empty(); + } + return Observable.just(msg); + })); + storeD.start(cToD + .flatMap(msg -> { + MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg); + if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.DELTA) { + deltaCountFromC[0]++; + if (deltaCountFromC[0] >= 2 && delayedAck[0] != null) { + CRDTStoreMessage ack = delayedAck[0]; + delayedAck[0] = null; + dToC.onNext(ack); + } + } + return Observable.just(msg); + })); + + storeC.storeMessages().observeOn(Schedulers.single()).subscribe(cToD::onNext); + storeD.storeMessages().observeOn(Schedulers.single()).subscribe(dToC::onNext); + + // Host replicas + String uri = CRDTURI.toURI(CausalCRDTType.ormap, "test-late-ack"); + Replica rC = ReplicaIdGenerator.generate(uri); + Replica rD = ReplicaIdGenerator.generate(uri); + ByteString addrC = ByteString.copyFromUtf8("C"); + ByteString addrD = ByteString.copyFromUtf8("D"); + IORMap ormapC = storeC.host(rC, addrC); + IORMap ormapD = storeD.host(rD, addrD); + storeC.join(rC, Collections.singleton(addrD)); + storeD.join(rD, Collections.singleton(addrC)); + + // Write on C + ByteString key = ByteString.copyFromUtf8("k2"); + ByteString val = ByteString.copyFromUtf8("v2"); + ormapC.execute(ORMapOperation.update(key).with(MVRegOperation.write(val))).join(); + + // Await convergence on D even though first ACK is delivered late and unmatched + await().until(() -> { + IMVReg regD = ormapD.getMVReg(key); + ByteString read = Sets.newHashSet(regD.read()).stream().findFirst().orElse(ByteString.EMPTY); + return val.equals(read); + }); + + storeC.stop(); + storeD.stop(); + } +} From 1c6e896b0c5fdeea0f5be574debfe792bcdf2363 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Thu, 28 Aug 2025 15:48:15 +0800 Subject: [PATCH 14/20] Improve stale member cleanup logic --- .../basecluster/memberlist/agent/Agent.java | 30 +++++++-------- .../bifromq/basecluster/AgentHostsTest.java | 38 +++++++++++++++++++ .../bifromq/basecluster/AgentTestCluster.java | 27 ++++++++++--- .../basecluster/AgentTestTemplate.java | 14 ++++--- 4 files changed, 81 insertions(+), 28 deletions(-) diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java index d1866ff9a..cb46467c3 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java @@ -31,7 +31,6 @@ import io.reactivex.rxjava3.disposables.CompositeDisposable; import io.reactivex.rxjava3.subjects.BehaviorSubject; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; @@ -68,7 +67,6 @@ public final class Agent implements IAgent { BehaviorSubject.createDefault(emptyMap()); private final CompositeDisposable disposables = new CompositeDisposable(); private final Gauge memberNumGauge; - private volatile Set currentAgentEndpoints = new HashSet<>(); public Agent(String agentId, AgentEndpoint endpoint, @@ -187,37 +185,35 @@ private void sync(long ts) { private void handleAgentEndpointsUpdate(Set agentEndpoints) { skipRunIfNotJoined(() -> { - Set newAgentEndpoints = Sets.newHashSet(agentEndpoints); - newAgentEndpoints.add(localEndpoint); - Set leftHosts = Sets.difference(currentAgentEndpoints, newAgentEndpoints); - // drop members on left hosts + Set aliveAgentEndpoints = Sets.newHashSet(agentEndpoints); + aliveAgentEndpoints.add(localEndpoint); + // compute alive endpoints from host member list (clean source of truth) + Set aliveAgentHostEndpoints = aliveAgentEndpoints.stream() + .map(AgentEndpoint::getEndpoint) + .collect(Collectors.toSet()); + // drop members in CRDT that are not present in alive host endpoints Map agentMemberMap = CRDTUtil.toAgentMemberMap(agentCRDT); for (AgentMemberAddr memberAddr : agentMemberMap.keySet()) { - AgentEndpoint agentEndpoint = AgentEndpoint.newBuilder() - .setEndpoint(memberAddr.getEndpoint()) - .setIncarnation(memberAddr.getIncarnation()) - .build(); - if (leftHosts.contains(agentEndpoint) - && shouldReportFailure(newAgentEndpoints, memberAddr.getEndpoint())) { + if (!aliveAgentHostEndpoints.contains(memberAddr.getEndpoint()) + && shouldClean(aliveAgentEndpoints, memberAddr.getEndpoint())) { agentCRDT.execute(ORMapOperation.remove(memberAddr.toByteString()).of(mvreg)); } } // update landscape - currentAgentEndpoints = newAgentEndpoints; store.join(agentCRDT.id(), - currentAgentEndpoints.stream().map(AbstractMessageLite::toByteString).collect(Collectors.toSet())); + aliveAgentEndpoints.stream().map(AbstractMessageLite::toByteString).collect(Collectors.toSet())); }); } - private boolean shouldReportFailure(Set allEndpoints, HostEndpoint failedMemberEndpoint) { + private boolean shouldClean(Set allEndpoints, HostEndpoint failedMemberEndpoint) { // if local member is responsible for removing the failed member from CRDT RendezvousHash hash = RendezvousHash.builder() .keyFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer())) .nodeFunnel((from, into) -> into.putBytes(from.getEndpoint().getId().asReadOnlyByteBuffer())) .nodes(allEndpoints) .build(); - AgentEndpoint reporter = hash.get(failedMemberEndpoint); - return reporter.getEndpoint().getId().equals(localEndpoint.getEndpoint().getId()); + AgentEndpoint cleaner = hash.get(failedMemberEndpoint); + return cleaner.getEndpoint().getId().equals(localEndpoint.getEndpoint().getId()); } private void skipRunIfNotJoined(Runnable runnable) { diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java index 12c64d788..973609d57 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java @@ -395,4 +395,42 @@ public void testAgentClusterPartitionAndHealing() { await().forever().until(() -> agentOnS2.membership().blockingFirst().size() == 4); await().forever().until(() -> agentOnS3.membership().blockingFirst().size() == 4); } + + @StoreCfgs(stores = { + @StoreCfg(id = "s1", isSeed = true), + @StoreCfg(id = "s2"), + }) + @Test + public void testCleanStaleAgentMembersAfterHostRestartWithNewEndpoint() { + // ensure cluster up + await().until(() -> storeMgr.membership("s1").size() == 2); + await().until(() -> storeMgr.membership("s2").size() == 2); + + // host same agent on both hosts so CRDT survives while s1 restarts + IAgent agentOnS1 = storeMgr.hostAgent("s1", "agentX"); + IAgent agentOnS2 = storeMgr.hostAgent("s2", "agentX"); + + // register a member only on s1 to create a CRDT entry bound to s1's endpoint + IAgentMember s1Member = agentOnS1.register("nodeOnS1"); + s1Member.metadata(copyFromUtf8("payload")); + + // both sides should observe exactly 1 member + await().until(() -> agentOnS1.membership().blockingFirst().size() == 1); + await().until(() -> agentOnS2.membership().blockingFirst().size() == 1); + + storeMgr.crash("s1"); + // s2 should eventually only see itself + await().forever().until(() -> storeMgr.membership("s2").size() == 1); + + // start a new s1 instance with a new endpoint (old isolated one still exists but unreachable) + storeMgr.startHost("s1"); + // rejoin cluster + storeMgr.join("s1", "s2"); + // re-host the agent on s1 (no members registered now) + IAgent newAgentOnS1 = storeMgr.hostAgent("s1", "agentX"); + + // eventually, the stale member from old s1 endpoint should be cleaned from CRDT + await().until(() -> newAgentOnS1.membership().blockingFirst().isEmpty()); + await().until(() -> agentOnS2.membership().blockingFirst().isEmpty()); + } } diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java index d6c74fdb5..399dbd924 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java @@ -49,15 +49,16 @@ public class AgentTestCluster { private final Map hostTransportMap = Maps.newConcurrentMap(); private final Map hostMap = Maps.newConcurrentMap(); private final Map> inflationLogs = Maps.newConcurrentMap(); + private final Map crashedHostEndpointMap = Maps.newConcurrentMap(); + private final Map crashedHostTransportMap = Maps.newConcurrentMap(); + private final Map crashedHostMap = Maps.newConcurrentMap(); private final CompositeDisposable disposables = new CompositeDisposable(); + public AgentTestCluster() { } - public String newHost(String hostId, AgentHostOptions options) { - hostMetaMap.computeIfAbsent(hostId, k -> { - loadStore(hostId, options); - return new AgentHostMeta(options); - }); + public String registerHost(String hostId, AgentHostOptions options) { + hostMetaMap.computeIfAbsent(hostId, k -> new AgentHostMeta(options)); return hostId; } @@ -88,6 +89,20 @@ public void isolate(String hostId) { network.isolate(hostTransportMap.get(hostId)); } + public void crash(String hostId) { + checkHost(hostId); + network.isolate(hostTransportMap.get(hostId)); + inflationLogs.remove(hostId); + + HostEndpoint crashedEndpoint = hostEndpointMap.remove(hostId); + crashedHostEndpointMap.put(hostId, crashedEndpoint); + + IAgentHost crashedAgentHost = hostMap.remove(crashedEndpoint); + crashedHostMap.put(crashedEndpoint, crashedAgentHost); + ITransport transport = hostTransportMap.remove(hostId); + crashedHostTransportMap.put(hostId, transport); + } + public void integrate(String hostId) { network.integrate(hostTransportMap.get(hostId)); } @@ -138,6 +153,8 @@ private HostEndpoint loadStore(String storeId, AgentHostOptions options) { public void shutdown() { disposables.dispose(); hostEndpointMap.keySet().forEach(this::stopHost); + crashedHostTransportMap.keySet().forEach(hostId -> + crashedHostMap.remove(crashedHostEndpointMap.get(hostId)).close()); } public IAgentHost getHost(String hostId) { diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestTemplate.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestTemplate.java index 9bc5aa25f..369cd9415 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestTemplate.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestTemplate.java @@ -14,17 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; -import org.apache.bifromq.basecluster.annotation.StoreCfg; -import org.apache.bifromq.basecluster.annotation.StoreCfgs; -import org.apache.bifromq.basecrdt.store.CRDTStoreOptions; import java.lang.reflect.Method; import java.time.Duration; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.annotation.StoreCfg; +import org.apache.bifromq.basecluster.annotation.StoreCfgs; +import org.apache.bifromq.basecrdt.store.CRDTStoreOptions; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -42,14 +42,16 @@ public void createClusterByAnnotation(Method testMethod) { if (storeMgr != null) { if (storeCfgs != null) { for (StoreCfg cfg : storeCfgs.stores()) { - storeMgr.newHost(cfg.id(), build(cfg)); + storeMgr.registerHost(cfg.id(), build(cfg)); + storeMgr.startHost(cfg.id()); if (cfg.isSeed()) { seedStoreId = cfg.id(); } } } if (storeCfg != null) { - storeMgr.newHost(storeCfg.id(), build(storeCfg)); + storeMgr.registerHost(storeCfg.id(), build(storeCfg)); + storeMgr.startHost(storeCfg.id()); } if (seedStoreId != null && storeCfgs != null) { for (StoreCfg cfg : storeCfgs.stores()) { From fab126720371a583a6744e24ea501497c4754530 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Fri, 29 Aug 2025 15:12:38 +0800 Subject: [PATCH 15/20] base-crdt: metering delta send rate and throughput correctly during anti-entropy --- .../apache/bifromq/basecrdt/store/AntiEntropy.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java index f2d0c6468..6e2a39f1d 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java @@ -204,7 +204,6 @@ private void run() { .setVer(HLC.INST.get()) .build(); lastSentHasReplacement = false; - deltaMsgBytesCounter.increment(currentDelta.getSerializedSize()); send(currentDelta); } else { // Calculate delta @@ -245,7 +244,7 @@ private void run() { private void send(DeltaMessage deltaMessage) { log.trace("Local[{}] send delta to neighbor[{}]:\n{}", toPrintable(localAddr), toPrintable(neighborAddr), toPrintable(deltaMessage)); - neighborMessageSubject.onNext(new NeighborMessage(deltaMessage, neighborAddr)); + emit(deltaMessage); // Schedule timer task for resend scheduleResend(deltaMessage); } @@ -265,9 +264,7 @@ private void resend(DeltaMessage toResend) { if (currentDelta == toResend) { log.trace("Local[{}] resend delta to neighbor[{}]:\n{}", toPrintable(localAddr), toPrintable(neighborAddr), toPrintable(toResend)); - deltaMsgCounter.increment(); - deltaMsgBytesCounter.increment(currentDelta.getSerializedSize()); - neighborMessageSubject.onNext(new NeighborMessage(currentDelta, neighborAddr)); + emit(currentDelta); if (resendCount++ < 10) { scheduleResend(toResend); } else { @@ -288,4 +285,10 @@ private void resend(DeltaMessage toResend) { private long resendDelay() { return ThreadLocalRandom.current().nextLong(500, 2000) * (resendCount + 1); } + + private void emit(DeltaMessage delta) { + deltaMsgCounter.increment(); + deltaMsgBytesCounter.increment(delta.getSerializedSize()); + neighborMessageSubject.onNext(new NeighborMessage(delta, neighborAddr)); + } } From 8ff32b323cbc83a47db89ace73ea83a1142dc669 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Fri, 29 Aug 2025 15:27:57 +0800 Subject: [PATCH 16/20] optimize balancer's log output --- .../balance/KVStoreBalanceController.java | 40 +++++++++++-------- .../balance/KVStoreBalanceControllerTest.java | 2 +- .../bifromq/basekv/balance/StoreBalancer.java | 3 +- build/build-bifromq-starter/conf/log4j2.xml | 2 +- 4 files changed, 28 insertions(+), 19 deletions(-) diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java index afe9c4f76..b169da6a5 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java @@ -31,6 +31,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; @@ -137,7 +138,8 @@ public void start(String localStoreId) { if (state.compareAndSet(State.Init, State.Started)) { this.localStoreId = localStoreId; statesReporter = metaService.balancerStatesReporter(storeClient.clusterId(), localStoreId); - log = MDCLogger.getLogger("balancer.logger", "clusterId", storeClient.clusterId(), "storeId", localStoreId); + log = MDCLogger.getLogger("balancer.logger", + "clusterId", storeClient.clusterId(), "storeId", localStoreId, "balancer", "CONTROLLER"); for (IStoreBalancerFactory factory : builtinBalancerFactories) { StoreBalancer balancer = factory.newBalancer(storeClient.clusterId(), localStoreId); @@ -146,12 +148,12 @@ public void start(String localStoreId) { } for (IStoreBalancerFactory factory : customBalancerFactories) { String balancerFactoryFQN = factory.getClass().getName(); - log.info("Create balancer from factory: {}", balancerFactoryFQN); StoreBalancer balancer = factory.newBalancer(storeClient.clusterId(), localStoreId); + log.info("Create balancer[{}] from factory: {}", balancer.getClass().getName(), balancerFactoryFQN); if (balancer instanceof RangeBootstrapBalancer || balancer instanceof RedundantRangeRemovalBalancer || balancer instanceof UnreachableReplicaRemovalBalancer) { - log.warn("{} should not be created from custom balancer factory", + log.warn("Builtin balancer[{}] should not be created from custom balancer factory", balancer.getClass().getSimpleName()); continue; } @@ -163,10 +165,12 @@ public void start(String localStoreId) { log.info("BalancerController start"); disposables.add(statesProposal.expectedBalancerStates() .subscribe(currentExpected -> { + log.trace("Expected balancer states changed: {}", currentExpected); this.expectedBalancerStates = currentExpected; trigger(); })); disposables.add(storeClient.describe().subscribe(descriptors -> { + log.trace("Landscape changed: {}", descriptors); this.landscape = descriptors; trimRangeHistory(descriptors); trigger(); @@ -176,8 +180,10 @@ public void start(String localStoreId) { for (Map.Entry entry : balancers.entrySet()) { String balancerFacClassFQN = entry.getKey(); StoreBalancerState balancerState = entry.getValue(); - statesReporter.reportBalancerState(balancerFacClassFQN, - balancerState.disabled.get(), balancerState.loadRules.get()); + if (!balancerState.isBuiltin) { + statesReporter.reportBalancerState(balancerFacClassFQN, + balancerState.disabled.get(), balancerState.loadRules.get()); + } } })); } @@ -217,7 +223,7 @@ private void updateAndBalance() { Set landscape = this.landscape; if (landscape == null || landscape.isEmpty()) { scheduling.set(false); - if (this.landscape != landscape) { + if (!Objects.equals(this.landscape, landscape)) { trigger(); } return; @@ -234,7 +240,8 @@ private void updateAndBalance() { Struct loadRules = balancerState.loadRules.get(); boolean needReport = false; if (balancerState.disabled.get() != disable) { - log.info("Balancer[{}] is {}", balancerFacClassFQN, disable ? "disabled" : "enabled"); + log.info("Balancer[{}] is {}", balancerState.balancer.getClass().getSimpleName(), + disable ? "disabled" : "enabled"); balancerState.disabled.set(disable); needReport = true; } @@ -250,7 +257,7 @@ private void updateAndBalance() { needReport = true; } else { log.warn("Balancer[{}] load rules not valid: {}", - balancerFacClassFQN, expectedLoadRules); + balancerState.balancer.getClass().getSimpleName(), expectedLoadRules); } } if (needReport) { @@ -263,7 +270,7 @@ private void updateAndBalance() { } balancerState.balancer.update(landscape); } catch (Throwable e) { - log.error("Balancer[{}] update failed", balancerFacClassFQN, e); + log.error("Balancer[{}] update failed", balancerState.balancer.getClass().getSimpleName(), e); } } balance(expectedBalancerState, landscape); @@ -273,7 +280,7 @@ private void scheduleRetry(Map expected, Set landscape, Duration delay) { task = executor.schedule(() -> { - if (expected != this.expectedBalancerStates || landscape != this.landscape) { + if (!Objects.equals(expected, this.expectedBalancerStates) || landscape != this.landscape) { // retry is preemptive return; } @@ -291,6 +298,7 @@ private void balance(final Map expected, String balancerFactoryName = entry.getKey(); StoreBalancerState fromBalancerState = entry.getValue(); StoreBalancer fromBalancer = fromBalancerState.balancer; + String balancerName = fromBalancer.getClass().getSimpleName(); if (fromBalancerState.disabled.get()) { continue; } @@ -300,9 +308,8 @@ private void balance(final Map expected, case BalanceNow -> { BalanceCommand commandToRun = ((BalanceNow) result).command; if (!isStaleCommand(commandToRun)) { - log.info("Balancer[{}] command run: {}", balancerFactoryName, commandToRun); - String balancerName = fromBalancer.getClass().getSimpleName(); String cmdName = commandToRun.getClass().getSimpleName(); + log.info("Balancer[{}] command run: {}", balancerName, commandToRun); Sample start = Timer.start(); runCommand(commandToRun) .whenCompleteAsync((success, e) -> { @@ -313,7 +320,7 @@ private void balance(final Map expected, metrics.cmdFailedCounter.increment(); } else { log.info("Balancer[{}] command run result[{}]: {}", - balancerFactoryName, success, commandToRun); + balancerName, success, commandToRun); if (success) { metrics.cmdSucceedCounter.increment(); start.stop(metrics.cmdRunTimer); @@ -323,7 +330,8 @@ private void balance(final Map expected, } scheduling.set(false); if (success) { - if (this.landscape != landscape || this.expectedBalancerStates != expected) { + if (!Objects.equals(this.landscape, landscape) + || !Objects.equals(this.expectedBalancerStates, expected)) { trigger(); } } else { @@ -342,12 +350,12 @@ private void balance(final Map expected, } } } catch (Throwable e) { - log.warn("Balancer[{}] unexpected error", balancerFactoryName, e); + log.warn("Balancer[{}] unexpected error", balancerName, e); } } // no command to run scheduling.set(false); - if (this.landscape != landscape || this.expectedBalancerStates != expected) { + if (!Objects.equals(this.landscape, landscape) || !Objects.equals(this.expectedBalancerStates, expected)) { trigger(); } else if (delay != null) { // if some balancers are in the progress of generating balance command, wait for a while diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java index 0a7ba3335..dc44b4be4 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java @@ -406,7 +406,7 @@ public void testInvalidRules() { public void testRefreshSignal() { reset(statesReporter); refreshSignal.onNext(System.currentTimeMillis()); - verify(statesReporter, times(4)) + verify(statesReporter, times(1)) .reportBalancerState(anyString(), anyBoolean(), any(Struct.class)); verify(statesReporter, times(1)) .reportBalancerState(eq(balancerFactory.getClass().getName()), diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java index c29a07839..13d6af6ad 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java @@ -40,7 +40,8 @@ public abstract class StoreBalancer { * @param localStoreId the id of the store which the balancer is responsible for */ public StoreBalancer(String clusterId, String localStoreId) { - this.log = MDCLogger.getLogger("balancer.logger", "clusterId", clusterId, "storeId", localStoreId); + this.log = MDCLogger.getLogger("balancer.logger", + "clusterId", clusterId, "storeId", localStoreId, "balancer", this.getClass().getSimpleName()); this.clusterId = clusterId; this.localStoreId = localStoreId; } diff --git a/build/build-bifromq-starter/conf/log4j2.xml b/build/build-bifromq-starter/conf/log4j2.xml index a646e40ab..c3fd5569e 100644 --- a/build/build-bifromq-starter/conf/log4j2.xml +++ b/build/build-bifromq-starter/conf/log4j2.xml @@ -41,7 +41,7 @@ %d{yyyy-MM-dd HH:mm:ss.SSS} %5p [%t] --- [%F:%L] %m - [id=%X{id},term=%X{term},state=%X{state},leader=%X{leader},f=%X{first},l=%X{last}],c=%X{commit},cfg=%X{config}]%n ]]> From 91dea9bd825d570e7e78df372fdda3a5dae9feba Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Fri, 29 Aug 2025 15:29:37 +0800 Subject: [PATCH 17/20] Improved housekeeping logic for crdt-based metadata service --- base-kv/base-kv-meta-service/pom.xml | 8 ++++ .../metaservice/BaseKVLandscapeCRDT.java | 44 ++++++++++++++++- .../metaservice/BaseKVLandscapeObserver.java | 5 ++ .../metaservice/BaseKVLandscapeReporter.java | 32 ++++--------- .../BaseKVStoreBalancerStatesCRDT.java | 47 ++++++++++++++++++- .../BaseKVStoreBalancerStatesObserver.java | 5 ++ ...BaseKVStoreBalancerStatesProposalCRDT.java | 14 +++++- .../BaseKVStoreBalancerStatesProposer.java | 5 ++ .../BaseKVStoreBalancerStatesReporter.java | 31 ++++-------- .../metaservice/IBaseKVLandscapeCRDT.java | 7 +++ .../IBaseKVStoreBalancerStatesCRDT.java | 1 + ...BaseKVStoreBalancerStatesProposalCRDT.java | 1 + .../BaseKVLandscapeReportTest.java | 5 +- base-rpc/base-rpc-traffic-governor/pom.xml | 16 +++++++ .../RPCServiceTrafficManager.java | 17 ++++++- 15 files changed, 182 insertions(+), 56 deletions(-) diff --git a/base-kv/base-kv-meta-service/pom.xml b/base-kv/base-kv-meta-service/pom.xml index 97e4c3ac3..55d6f75a6 100644 --- a/base-kv/base-kv-meta-service/pom.xml +++ b/base-kv/base-kv-meta-service/pom.xml @@ -39,6 +39,14 @@ org.apache.bifromq base-kv-type-proto + + org.apache.bifromq + base-logger + + + org.apache.bifromq + base-util + io.reactivex.rxjava3 rxjava diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java index 3b9422aad..0cbde7bd7 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java @@ -37,7 +37,7 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; -import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IMVReg; import org.apache.bifromq.basecrdt.core.api.IORMap; @@ -47,21 +47,34 @@ import org.apache.bifromq.basecrdt.service.ICRDTService; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.apache.bifromq.basekv.proto.StoreKey; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVLandscapeCRDT implements IBaseKVLandscapeCRDT { + private final String clusterId; + private final Logger log; private final ICRDTService crdtService; private final IORMap landscapeORMap; private final BehaviorSubject> landscapeSubject = BehaviorSubject.create(); private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVLandscapeCRDT(String clusterId, ICRDTService crdtService) { + this.clusterId = clusterId; + this.log = MDCLogger.getLogger(BaseKVLandscapeCRDT.class, "clusterId", clusterId); this.crdtService = crdtService; this.landscapeORMap = crdtService.host(toLandscapeURI(clusterId)); disposable.add(landscapeORMap.inflation() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) .map(this::buildLandscape) .subscribe(landscapeSubject::onNext)); + disposable.add(Observable.combineLatest(landscape(), aliveReplicas(), (StoreDescriptorAndReplicas::new)) + .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) + .subscribe(this::houseKeep)); + } + + @Override + public String clusterId() { + return clusterId; } @Override @@ -129,4 +142,31 @@ private Optional buildLandscape(IMVReg mvReg) { l.sort((a, b) -> Long.compareUnsigned(b.getHlc(), a.getHlc())); return Optional.ofNullable(l.isEmpty() ? null : l.get(0)); } + + private void houseKeep(StoreDescriptorAndReplicas storeDescriptorAndReplicas) { + Map storedDescriptors = storeDescriptorAndReplicas.descriptorMap; + Set aliveReplicas = storeDescriptorAndReplicas.replicaIds; + for (StoreKey storeKey : storedDescriptors.keySet()) { + if (!aliveReplicas.contains(storeKey.getReplicaId()) + && shouldClean(aliveReplicas, storeKey.getReplicaId())) { + log.debug("store[{}] is not alive, remove its descriptor", storeKey.getStoreId()); + removeDescriptor(storeKey); + } + } + } + + private boolean shouldClean(Set aliveReplicas, ByteString failedReplicas) { + // Choose cleaner deterministically from the identical aliveReplicas set across nodes. + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodes(aliveReplicas) + .build(); + ByteString cleaner = hash.get(failedReplicas); + return cleaner != null && cleaner.equals(landscapeORMap.id().getId()); + } + + private record StoreDescriptorAndReplicas(Map descriptorMap, + Set replicaIds) { + } } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java index 6f529b78a..57bde6ba3 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java @@ -26,13 +26,17 @@ import java.util.Map; import java.util.Optional; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; class BaseKVLandscapeObserver implements IBaseKVLandscapeObserver { + private final Logger log; private final BehaviorSubject> landscapeSubject = BehaviorSubject.create(); private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVLandscapeObserver(IBaseKVLandscapeCRDT landscapeCRDT) { + this.log = MDCLogger.getLogger(BaseKVLandscapeObserver.class, "clusterId", landscapeCRDT.clusterId()); disposable.add(landscapeCRDT.landscape() .map(descriptorMap -> { Map descriptorMapByStoreId = new HashMap<>(); @@ -42,6 +46,7 @@ class BaseKVLandscapeObserver implements IBaseKVLandscapeObserver { } return v.getHlc() > value.getHlc() ? v : value; })); + log.debug("Landscape changed: {}", descriptorMapByStoreId); return descriptorMapByStoreId; }) .subscribe(landscapeSubject::onNext)); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java index 0b1cb7610..903474fef 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java @@ -19,39 +19,38 @@ package org.apache.bifromq.basekv.metaservice; -import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.CompositeDisposable; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.concurrent.CompletableFuture; -import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.apache.bifromq.basekv.proto.StoreKey; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVLandscapeReporter implements IBaseKVLandscapeReporter { + private final Logger log; private final String storeId; private final IBaseKVLandscapeCRDT landscapeCRDT; private final CompositeDisposable disposable = new CompositeDisposable(); private volatile KVRangeStoreDescriptor latestDescriptor; BaseKVLandscapeReporter(String storeId, IBaseKVLandscapeCRDT landscapeCRDT) { + this.log = MDCLogger.getLogger(BaseKVLandscapeReporter.class, "clusterId", landscapeCRDT.clusterId(), + "storeId", storeId); this.storeId = storeId; this.landscapeCRDT = landscapeCRDT; - disposable.add(Observable.combineLatest( - landscapeCRDT.landscape(), - landscapeCRDT.aliveReplicas(), - (StoreDescriptorAndReplicas::new)) + disposable.add(landscapeCRDT.landscape() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) - .subscribe(this::houseKeep)); + .subscribe(this::afterInflation)); } @Override public CompletableFuture report(KVRangeStoreDescriptor descriptor) { Optional descriptorOnCRDT = landscapeCRDT.getStoreDescriptor(descriptor.getId()); if (descriptorOnCRDT.isEmpty() || !descriptorOnCRDT.get().equals(descriptor)) { + this.latestDescriptor = descriptor; return landscapeCRDT.setStoreDescriptor(descriptor); } return CompletableFuture.completedFuture(null); @@ -68,24 +67,13 @@ public void stop() { disposable.dispose(); } - private void houseKeep(StoreDescriptorAndReplicas storeDescriptorAndReplicas) { - Map storedDescriptors = storeDescriptorAndReplicas.descriptorMap; - Set aliveReplicas = storeDescriptorAndReplicas.replicaIds; - for (StoreKey storeKey : storedDescriptors.keySet()) { - if (!aliveReplicas.contains(storeKey.getReplicaId())) { - log.debug("store[{}] is not alive, remove its descriptor", storeKey.getStoreId()); - landscapeCRDT.removeDescriptor(storeKey); - } - } + private void afterInflation(Map storedDescriptors) { if (!storedDescriptors.containsKey(landscapeCRDT.toDescriptorKey(storeId))) { KVRangeStoreDescriptor latestDescriptor = this.latestDescriptor; if (latestDescriptor != null) { + log.debug("Rectify missing store descriptor"); landscapeCRDT.setStoreDescriptor(latestDescriptor); } } } - - private record StoreDescriptorAndReplicas(Map descriptorMap, - Set replicaIds) { - } } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java index c1d485956..9cbec5f55 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java @@ -39,7 +39,7 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; -import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IMVReg; import org.apache.bifromq.basecrdt.core.api.IORMap; @@ -50,9 +50,12 @@ import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; import org.apache.bifromq.basekv.proto.StoreKey; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVStoreBalancerStatesCRDT implements IBaseKVStoreBalancerStatesCRDT { + private final String clusterId; + private final Logger log; private final ICRDTService crdtService; // key: storeId, value: Map of balancerClassFQN -> BalancerState private final IORMap balancerStatesByStoreORMap; @@ -61,12 +64,25 @@ class BaseKVStoreBalancerStatesCRDT implements IBaseKVStoreBalancerStatesCRDT { private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVStoreBalancerStatesCRDT(String clusterId, ICRDTService crdtService) { + this.clusterId = clusterId; + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesCRDT.class, "clusterId", clusterId); this.crdtService = crdtService; this.balancerStatesByStoreORMap = crdtService.host(toBalancerStateURI(clusterId)); disposable.add(balancerStatesByStoreORMap.inflation() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) .map(this::buildBalancerStateSnapshots) .subscribe(balancerStatesSubject::onNext)); + disposable.add(Observable.combineLatest( + this.currentBalancerStates(), + this.aliveReplicas(), + (StateSnapshotsAndReplicas::new)) + .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) + .subscribe(this::houseKeep)); + } + + @Override + public String clusterId() { + return clusterId; } @Override @@ -161,4 +177,31 @@ private Map> buildBalancerStateSnap })); return currentBalancerStates; } + + private void houseKeep(StateSnapshotsAndReplicas stateSnapshotsAndReplicas) { + Map> observed = stateSnapshotsAndReplicas.observed; + Set aliveReplicas = stateSnapshotsAndReplicas.replicaIds; + for (StoreKey storeKey : observed.keySet()) { + if (!aliveReplicas.contains(storeKey.getReplicaId()) + && shouldClean(aliveReplicas, storeKey.getReplicaId())) { + log.debug("store[{}] is not alive, remove its balancer states", storeKey.getStoreId()); + this.removeStore(storeKey); + } + } + } + + private boolean shouldClean(Set aliveReplicas, ByteString failedReplicas) { + // Choose cleaner deterministically from the identical aliveReplicas set across nodes. + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodes(aliveReplicas) + .build(); + ByteString cleaner = hash.get(failedReplicas); + return cleaner != null && cleaner.equals(balancerStatesByStoreORMap.id().getId()); + } + + private record StateSnapshotsAndReplicas(Map> observed, + Set replicaIds) { + } } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java index abe81fb12..b9fa55de0 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java @@ -27,13 +27,17 @@ import java.util.HashMap; import java.util.Map; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; class BaseKVStoreBalancerStatesObserver implements IBaseKVStoreBalancerStatesObserver { + private final Logger log; private final BehaviorSubject>> currentBalancerStatesSubject = BehaviorSubject.createDefault(emptyMap()); private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVStoreBalancerStatesObserver(IBaseKVStoreBalancerStatesCRDT statesCRDT) { + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesObserver.class, "clusterId", statesCRDT.clusterId()); disposable.add(statesCRDT.currentBalancerStates() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) .map(statesMap -> { @@ -49,6 +53,7 @@ class BaseKVStoreBalancerStatesObserver implements IBaseKVStoreBalancerStatesObs } return balancerStates; })); + log.debug("Current balancer states changed: {}", currentStates); return currentStates; }) .subscribe(currentBalancerStatesSubject::onNext)); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java index 069b5be38..767dd7e06 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java @@ -36,7 +36,6 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.CompletableFuture; -import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IMVReg; import org.apache.bifromq.basecrdt.core.api.IORMap; @@ -45,9 +44,12 @@ import org.apache.bifromq.basecrdt.service.ICRDTService; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerStatesProposalCRDT { + private final String clusterId; + private final Logger log; private final ICRDTService crdtService; // key: balancerClassFQN, value: BalancerState private final IORMap expectedBalancerStatesORMap; @@ -56,6 +58,8 @@ class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerState private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVStoreBalancerStatesProposalCRDT(String clusterId, ICRDTService crdtService) { + this.clusterId = clusterId; + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesProposalCRDT.class, "clusterId", clusterId); this.crdtService = crdtService; this.expectedBalancerStatesORMap = crdtService.host(toBalancerStateProposalURI(clusterId)); disposable.add(expectedBalancerStatesORMap.inflation() @@ -64,6 +68,11 @@ class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerState .subscribe(expectedBalancerStatesSubject::onNext)); } + @Override + public String clusterId() { + return clusterId; + } + public Observable> expectedBalancerStates() { return expectedBalancerStatesSubject.distinctUntilChanged(); } @@ -115,6 +124,7 @@ private Map buildExpectedBalancerStateSnapshots(l balancerStateOpt.ifPresent(stateSnapshot -> balancerStatesMap.put(balancerClassFQN, stateSnapshot)); }); + log.debug("Expected balancer states changed: {}", balancerStatesMap); return balancerStatesMap; } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java index 20238789e..fb12538c3 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java @@ -24,11 +24,15 @@ import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; class BaseKVStoreBalancerStatesProposer implements IBaseKVStoreBalancerStatesProposer { + private final Logger log; private final IBaseKVStoreBalancerStatesProposalCRDT proposalCRDT; BaseKVStoreBalancerStatesProposer(IBaseKVStoreBalancerStatesProposalCRDT proposalCRDT) { + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesProposer.class, "clusterId", proposalCRDT.clusterId()); this.proposalCRDT = proposalCRDT; } @@ -76,6 +80,7 @@ public void stop() { private CompletableFuture proposeBalancerState(String balancerFactoryClass, BalancerStateSnapshot state) { + log.debug("Propose balancer state: balancerClass={}, state={}", balancerFactoryClass, state); CompletableFuture resultFuture = new CompletableFuture<>(); long now = state.getHlc(); proposalCRDT.expectedBalancerStates() diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java index 7a5cf549d..a2531f52c 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java @@ -19,34 +19,32 @@ package org.apache.bifromq.basekv.metaservice; -import com.google.protobuf.ByteString; import com.google.protobuf.Struct; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.CompositeDisposable; import java.util.Map; -import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; import org.apache.bifromq.basekv.proto.StoreKey; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVStoreBalancerStatesReporter implements IBaseKVStoreBalancerStatesReporter { + private final Logger log; private final String storeId; private final IBaseKVStoreBalancerStatesCRDT statesCRDT; private final CompositeDisposable disposable = new CompositeDisposable(); private final Map latestState = new ConcurrentHashMap<>(); BaseKVStoreBalancerStatesReporter(String storeId, IBaseKVStoreBalancerStatesCRDT statesCRDT) { + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesReporter.class, "clusterId", statesCRDT.clusterId(), + "storeId", storeId); this.storeId = storeId; this.statesCRDT = statesCRDT; - disposable.add(Observable.combineLatest( - statesCRDT.currentBalancerStates(), - statesCRDT.aliveReplicas(), - (StateSnapshotsAndReplicas::new)) + disposable.add(statesCRDT.currentBalancerStates() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) - .subscribe(this::houseKeep)); + .subscribe(this::afterInflation)); } @Override @@ -75,26 +73,15 @@ public void stop() { disposable.dispose(); } - private void houseKeep(StateSnapshotsAndReplicas stateSnapshotsAndReplicas) { - Map> observed = stateSnapshotsAndReplicas.observed; - Set aliveReplicas = stateSnapshotsAndReplicas.replicaIds; - for (StoreKey storeKey : observed.keySet()) { - if (!aliveReplicas.contains(storeKey.getReplicaId())) { - log.debug("store[{}] is not alive, remove its balancer states", storeKey.getStoreId()); - statesCRDT.removeStore(storeKey); - } - } + private void afterInflation(Map> observed) { if (!observed.containsKey(statesCRDT.toDescriptorKey(storeId))) { + log.debug("Rectify missing store balancer states"); latestState.forEach((balancerClassFQN, balancerState) -> statesCRDT.setStoreBalancerState(storeId, balancerClassFQN, balancerState.enable(), balancerState.loadRules())); } } - private record StateSnapshotsAndReplicas(Map> observed, - Set replicaIds) { - } - private record BalancerState(boolean enable, Struct loadRules) { } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java index 721f7c33e..4fc27eb26 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java @@ -32,12 +32,19 @@ * The interface of a BaseKV landscape CRDT. */ public interface IBaseKVLandscapeCRDT { + /** + * The id of base-kv cluster. + * @return the cluster id + */ + String clusterId(); + /** * A signal to refresh the landscape CRDT. * * @return the observable of the signal */ Observable refreshSignal(); + /** * Get the observable of alive replicas of landscape CRDT. * diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java index 99960bef0..9038a307a 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java @@ -32,6 +32,7 @@ * The interface of a BaseKV store balancer states CRDT. */ public interface IBaseKVStoreBalancerStatesCRDT { + String clusterId(); Observable refuteSignal(); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java index 3ca2d36e2..4bde0de7a 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java @@ -30,6 +30,7 @@ * The interface of a BaseKV store balancer states CRDT. */ public interface IBaseKVStoreBalancerStatesProposalCRDT { + String clusterId(); Observable> expectedBalancerStates(); diff --git a/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java b/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java index f28fbea46..87aaf741f 100644 --- a/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java +++ b/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java @@ -21,9 +21,7 @@ import static org.awaitility.Awaitility.await; import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; -import java.util.Collections; import java.util.Map; import org.apache.bifromq.basecluster.AgentHostOptions; import org.apache.bifromq.basecluster.IAgentHost; @@ -80,7 +78,6 @@ public void stop() { await().until(() -> observer.getStoreDescriptor(descriptor.getId()).isPresent()); reporter.stop(); - assertEquals(Collections.emptyMap(), observer.landscape().blockingFirst()); - assertTrue(observer.getStoreDescriptor(descriptor.getId()).isEmpty()); + await().until(() -> observer.landscape().blockingFirst().isEmpty()); } } diff --git a/base-rpc/base-rpc-traffic-governor/pom.xml b/base-rpc/base-rpc-traffic-governor/pom.xml index 7e3e47a08..78932bdb9 100644 --- a/base-rpc/base-rpc-traffic-governor/pom.xml +++ b/base-rpc/base-rpc-traffic-governor/pom.xml @@ -38,10 +38,26 @@ org.apache.bifromq base-rpc-grpc-inproc + org.awaitility awaitility + + org.apache.logging.log4j + log4j-api + test + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + diff --git a/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java b/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java index e83a6098c..b0724d0c6 100644 --- a/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java +++ b/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java @@ -37,6 +37,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; import org.apache.bifromq.basecrdt.service.ICRDTService; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.baserpc.proto.RPCServer; @@ -130,8 +131,8 @@ private Set refreshAliveServerList(Map announ for (RPCServer server : announcedServers.values()) { if (aliveAnnouncers.contains(server.getAnnouncerId())) { aliveServers.add(build(server)); - } else { - // this is a side effect: revoke the announcement made by dead announcer + } else if (shouldClean(aliveAnnouncers, server.getAnnouncerId())) { + // revoke the announcement made by dead announcer log.debug("Remove not alive server announcement: {}", server.getId()); revoke(server.getId()); } @@ -139,6 +140,18 @@ private Set refreshAliveServerList(Map announ return aliveServers; } + private boolean shouldClean(Set aliveAnnouncers, ByteString failedAnnouncer) { + aliveAnnouncers.add(id()); + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodes(aliveAnnouncers) + .build(); + ByteString cleaner = hash.get(failedAnnouncer); + return cleaner.equals(id()); + } + + private ServerEndpoint build(RPCServer server) { return new ServerEndpoint(server.getAgentHostId(), server.getId(), From cfc1e0392cf8f1939f03aa9b76021554a4492017 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Fri, 5 Sep 2025 10:48:18 +0800 Subject: [PATCH 18/20] Improved the built-in balancers efficiency --- .../balance/KVStoreBalanceController.java | 10 +- .../RedundantRangeRemovalBalancerFactory.java | 10 +- .../balance/impl/RangeBootstrapBalancer.java | 3 + .../impl/RedundantRangeRemovalBalancer.java | 114 ++++++++++++++---- .../RedundantRangeRemovalBalancerTest.java | 47 ++++++-- .../balance/command/RecoveryCommand.java | 4 +- .../basekv/balance/util/CommandUtil.java | 30 +++-- .../basekv/balance/util/CommandUtilTest.java | 30 ++--- 8 files changed, 178 insertions(+), 70 deletions(-) diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java index b169da6a5..5f73f98e5 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java @@ -121,7 +121,7 @@ public KVStoreBalanceController(IBaseKVMetaService metaService, this.customBalancerFactories = Lists.newArrayList(factories); this.builtinBalancerFactories = Lists.newArrayList( new RangeBootstrapBalancerFactory(bootstrapDelay), - new RedundantRangeRemovalBalancerFactory(), + new RedundantRangeRemovalBalancerFactory(zombieProbeDelay), new UnreachableReplicaRemovalBalancerFactory(zombieProbeDelay)); this.statesProposal = metaService.balancerStatesProposal(storeClient.clusterId()); this.balancers = new HashMap<>(); @@ -181,6 +181,7 @@ public void start(String localStoreId) { String balancerFacClassFQN = entry.getKey(); StoreBalancerState balancerState = entry.getValue(); if (!balancerState.isBuiltin) { + log.debug("Report balancer state for {}", balancerFacClassFQN); statesReporter.reportBalancerState(balancerFacClassFQN, balancerState.disabled.get(), balancerState.loadRules.get()); } @@ -214,6 +215,10 @@ public void stop() { private void trigger() { if (state.get() == State.Started && scheduling.compareAndSet(false, true)) { long jitter = ThreadLocalRandom.current().nextLong(0, retryDelay.toMillis()); + if (task != null && !task.isDone()) { + log.trace("Cancel scheduled balance task"); + task.cancel(true); + } task = executor.schedule(this::updateAndBalance, jitter, TimeUnit.MILLISECONDS); } } @@ -279,9 +284,11 @@ private void updateAndBalance() { private void scheduleRetry(Map expected, Set landscape, Duration delay) { + log.debug("Retry balance after {}s", delay.toSeconds()); task = executor.schedule(() -> { if (!Objects.equals(expected, this.expectedBalancerStates) || landscape != this.landscape) { // retry is preemptive + log.trace("Balance retry is preempted"); return; } if (scheduling.compareAndSet(false, true)) { @@ -295,7 +302,6 @@ private void balance(final Map expected, metricsManager.scheduleCount.increment(); Duration delay = null; for (Map.Entry entry : balancers.entrySet()) { - String balancerFactoryName = entry.getKey(); StoreBalancerState fromBalancerState = entry.getValue(); StoreBalancer fromBalancer = fromBalancerState.balancer; String balancerName = fromBalancer.getClass().getSimpleName(); diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java index 4eff0bed3..3bcf0197a 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java @@ -19,14 +19,22 @@ package org.apache.bifromq.basekv.balance; +import java.time.Duration; +import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.balance.impl.RedundantRangeRemovalBalancer; /** * Builtin balancer for redundant range removal. */ class RedundantRangeRemovalBalancerFactory implements IStoreBalancerFactory { + private final Duration delay; + + RedundantRangeRemovalBalancerFactory(Duration delay) { + this.delay = delay; + } + @Override public StoreBalancer newBalancer(String clusterId, String localStoreId) { - return new RedundantRangeRemovalBalancer(clusterId, localStoreId); + return new RedundantRangeRemovalBalancer(clusterId, localStoreId, delay, HLC.INST::getPhysical); } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java index 6011e4d08..c9d979d36 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java @@ -100,6 +100,9 @@ public void update(Set landscape) { KVRangeIdUtil.toString(rangeId)); bootstrapTrigger.set(new BootstrapTrigger(rangeId, FULL_BOUNDARY, randomSuspicionTimeout())); } + } else if (bootstrapTrigger.get() != null) { + log.debug("Effective epoch found: {}, cancel any pending bootstrap", effectiveEpoch.get().epoch()); + bootstrapTrigger.set(null); } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java index 38f86834a..0ae68e33a 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java @@ -24,6 +24,7 @@ import static org.apache.bifromq.basekv.utils.DescriptorUtil.organizeByEpoch; import com.google.common.collect.Sets; +import java.time.Duration; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; @@ -33,9 +34,15 @@ import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; +import org.apache.bifromq.basekv.balance.AwaitBalance; +import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.NoNeedBalance; import org.apache.bifromq.basekv.balance.StoreBalancer; +import org.apache.bifromq.basekv.balance.command.BalanceCommand; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; @@ -58,7 +65,9 @@ * caution.

*/ public class RedundantRangeRemovalBalancer extends StoreBalancer { - private volatile NavigableMap> latest = Collections.emptyNavigableMap(); + private final Supplier millisSource; + private final long suspicionDurationMillis; + private final AtomicReference pendingQuitCommand = new AtomicReference<>(); /** * Constructor of StoreBalancer. @@ -66,23 +75,60 @@ public class RedundantRangeRemovalBalancer extends StoreBalancer { * @param clusterId the id of the BaseKV cluster which the store belongs to * @param localStoreId the id of the store which the balancer is responsible for */ - public RedundantRangeRemovalBalancer(String clusterId, String localStoreId) { + public RedundantRangeRemovalBalancer(String clusterId, + String localStoreId, + Duration suspicionDuration, + Supplier millisSource) { super(clusterId, localStoreId); + this.suspicionDurationMillis = suspicionDuration.toMillis(); + this.millisSource = millisSource; } @Override public void update(Set landscape) { - latest = organizeByEpoch(landscape); + NavigableMap> landscapeByEpoch = organizeByEpoch(landscape); + if (landscapeByEpoch.isEmpty()) { + pendingQuitCommand.set(null); + return; + } + boolean scheduled = cleanupRedundantEpoch(landscapeByEpoch); + if (scheduled) { + return; + } + Map.Entry> oldestEntry = landscapeByEpoch.firstEntry(); + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(oldestEntry.getKey(), oldestEntry.getValue()); + scheduled = cleanupIdConflictRange(effectiveEpoch); + if (scheduled) { + return; + } + scheduled = cleanupBoundaryConflictRange(effectiveEpoch); + if (!scheduled) { + if (pendingQuitCommand.get() != null) { + log.debug("No redundant range found, clear pending quit command"); + pendingQuitCommand.set(null); + } + } } @Override public BalanceResult balance() { - if (latest.isEmpty()) { - return NoNeedBalance.INSTANCE; + PendingQuitCommand current = pendingQuitCommand.get(); + if (current != null) { + long nowMillis = millisSource.get(); + if (nowMillis > current.triggerTime) { + pendingQuitCommand.set(null); + return BalanceNow.of(current.quitCmd); + } else { + return AwaitBalance.of(Duration.ofMillis(current.triggerTime - nowMillis)); + } } - if (latest.size() > 1) { + return NoNeedBalance.INSTANCE; + } + + private boolean cleanupRedundantEpoch(NavigableMap> landscapeByEpoch) { + if (landscapeByEpoch.size() > 1) { // deal with epoch-conflict ranges - Set storeDescriptors = latest.lastEntry().getValue(); + Set storeDescriptors = landscapeByEpoch.lastEntry().getValue(); for (KVRangeStoreDescriptor storeDescriptor : storeDescriptors) { if (!storeDescriptor.getId().equals(localStoreId)) { continue; @@ -91,34 +137,42 @@ public BalanceResult balance() { if (rangeDescriptor.getRole() != RaftNodeStatus.Leader) { continue; } - log.debug("Remove Epoch-Conflict range: {} in store {}", - KVRangeIdUtil.toString(rangeDescriptor.getId()), - storeDescriptor.getId()); - return quit(localStoreId, rangeDescriptor); + log.debug("Schedule command to remove epoch-conflict range: id={}, boundary={}", + KVRangeIdUtil.toString(rangeDescriptor.getId()), rangeDescriptor.getBoundary()); + pendingQuitCommand.set( + new PendingQuitCommand(quit(localStoreId, rangeDescriptor), randomSuspicionTimeout())); + return true; } } - return NoNeedBalance.INSTANCE; } - Map.Entry> oldestEntry = latest.firstEntry(); - Map> conflictingRanges = findConflictingRanges(oldestEntry.getValue()); + return false; + } + + private boolean cleanupIdConflictRange(EffectiveEpoch effectiveEpoch) { + Map> conflictingRanges = + findConflictingRanges(effectiveEpoch.storeDescriptors()); if (!conflictingRanges.isEmpty()) { // deal with id-conflict ranges for (KVRangeId rangeId : conflictingRanges.keySet()) { NavigableSet leaderRanges = conflictingRanges.get(rangeId); for (LeaderRange leaderRange : leaderRanges) { if (!leaderRange.ownerStoreDescriptor().getId().equals(localStoreId)) { - return NoNeedBalance.INSTANCE; + return false; } - log.warn("Remove Id-Conflict range: {} in store {}", + log.warn("Schedule command to remove id-conflict range: id={}, boundary={}", KVRangeIdUtil.toString(leaderRange.descriptor().getId()), - leaderRange.ownerStoreDescriptor().getId()); - return quit(localStoreId, leaderRange.descriptor()); + leaderRange.descriptor().getBoundary()); + pendingQuitCommand.set( + new PendingQuitCommand(quit(localStoreId, leaderRange.descriptor()), randomSuspicionTimeout())); + return true; } } - return NoNeedBalance.INSTANCE; } + return false; + } + + private boolean cleanupBoundaryConflictRange(EffectiveEpoch effectiveEpoch) { // deal with boundary-conflict ranges - EffectiveEpoch effectiveEpoch = new EffectiveEpoch(oldestEntry.getKey(), oldestEntry.getValue()); NavigableMap effectiveLeaders = getEffectiveRoute(effectiveEpoch).leaderRanges(); for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch.storeDescriptors()) { if (!storeDescriptor.getId().equals(localStoreId)) { @@ -131,14 +185,15 @@ public BalanceResult balance() { Boundary boundary = rangeDescriptor.getBoundary(); LeaderRange leaderRange = effectiveLeaders.get(boundary); if (leaderRange == null || !leaderRange.descriptor().getId().equals(rangeDescriptor.getId())) { - log.warn("Remove Boundary-Conflict range: {} in store {}", - KVRangeIdUtil.toString(rangeDescriptor.getId()), - storeDescriptor.getId()); - return quit(localStoreId, rangeDescriptor); + log.warn("Schedule command to remove boundary-conflict range: id={}, boundary={}", + KVRangeIdUtil.toString(rangeDescriptor.getId()), rangeDescriptor.getBoundary()); + pendingQuitCommand.set( + new PendingQuitCommand(quit(localStoreId, rangeDescriptor), randomSuspicionTimeout())); + return true; } } } - return NoNeedBalance.INSTANCE; + return false; } private Map> findConflictingRanges( @@ -184,4 +239,13 @@ private boolean isDisjoint(ClusterConfig firstConfig, ClusterConfig secondConfig return Collections.disjoint(firstVoters, secondVoters) && Collections.disjoint(firstNextVoters, secondNextVoters); } + + private long randomSuspicionTimeout() { + return millisSource.get() + + ThreadLocalRandom.current().nextLong(suspicionDurationMillis, suspicionDurationMillis * 2); + } + + private record PendingQuitCommand(BalanceCommand quitCmd, long triggerTime) { + + } } diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java index 05ba95a7a..00871e263 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java @@ -23,9 +23,11 @@ import static org.testng.Assert.assertSame; import com.google.protobuf.ByteString; +import java.time.Duration; import java.util.Collections; import java.util.HashSet; import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.BalanceResultType; @@ -34,20 +36,22 @@ import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class RedundantRangeRemovalBalancerTest { - private final String clusterId = "testCluster"; private final String localStoreId = "localStore"; private RedundantRangeRemovalBalancer balancer; + private AtomicLong mockTime; @BeforeMethod public void setUp() { - balancer = new RedundantRangeRemovalBalancer(clusterId, localStoreId); + mockTime = new AtomicLong(0L); // Start time at 0 + balancer = new RedundantRangeRemovalBalancer(clusterId, localStoreId, Duration.ofSeconds(1), mockTime::get); } @Test @@ -56,6 +60,7 @@ public void noRedundantEpoch() { KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("a")) @@ -85,6 +90,7 @@ public void removeRangeInRedundantEpoch() { KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder() .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("a")) @@ -99,6 +105,7 @@ public void removeRangeInRedundantEpoch() { KVRangeDescriptor kvRangeDescriptor2 = KVRangeDescriptor.newBuilder() .setId(kvRangeId2) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("n")) @@ -121,6 +128,11 @@ public void removeRangeInRedundantEpoch() { balancer.update(storeDescriptors); BalanceResult command = balancer.balance(); + // first returns AwaitBalance due to suspicion delay + assertEquals(command.type(), BalanceResultType.AwaitBalance); + // advance mock time beyond the max suspicion window (2s) + mockTime.set(3000L); + command = balancer.balance(); assertEquals(command.type(), BalanceResultType.BalanceNow); ChangeConfigCommand changeConfigCommand = (ChangeConfigCommand) ((BalanceNow) command).command; @@ -137,6 +149,7 @@ public void noLocalLeaderRangeInRedundantEpoch() { .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("a")) .setEndKey(ByteString.copyFromUtf8("m")) @@ -150,6 +163,7 @@ public void noLocalLeaderRangeInRedundantEpoch() { KVRangeDescriptor kvRangeDescriptor2 = KVRangeDescriptor.newBuilder() .setId(kvRangeId2) .setRole(RaftNodeStatus.Follower) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("n")) @@ -187,6 +201,7 @@ public void removeRedundantEffectiveRange() { .setId(kvRangeId1) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(config) .build(); @@ -194,6 +209,7 @@ public void removeRedundantEffectiveRange() { .setId(kvRangeId2) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(config) .build(); @@ -209,7 +225,10 @@ public void removeRedundantEffectiveRange() { balancer.update(storeDescriptors); BalanceResult result = balancer.balance(); - + // first returns AwaitBalance due to suspicion delay + assertEquals(result.type(), BalanceResultType.AwaitBalance); + mockTime.set(3000L); + result = balancer.balance(); assertEquals(result.type(), BalanceResultType.BalanceNow); ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; @@ -226,6 +245,7 @@ public void ignoreNonLocalStore() { KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("a")) @@ -259,6 +279,7 @@ public void removeIdConflictingRangeWhenLocalStoreIsLoser() { KVRangeDescriptor localRange = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(boundary) .setConfig(ClusterConfig.newBuilder() @@ -289,6 +310,10 @@ public void removeIdConflictingRangeWhenLocalStoreIsLoser() { balancer.update(Set.of(localStoreDesc, peerStoreDesc)); BalanceResult result = balancer.balance(); + // first returns AwaitBalance due to suspicion delay + assertEquals(result.type(), BalanceResultType.AwaitBalance); + mockTime.set(3000L); + result = balancer.balance(); assertEquals(result.type(), BalanceResultType.BalanceNow); ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; @@ -311,6 +336,7 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(ClusterConfig.newBuilder() .addVoters(localStoreId) @@ -321,6 +347,7 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(ClusterConfig.newBuilder() .addVoters(peerStoreId) @@ -344,7 +371,7 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() { } @Test - public void idConflictButVotersOverlap_shouldNotDelete() { + public void idConflictButVotersOverlapShouldNotDelete() { String peerStoreId = "peer"; KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); Boundary boundary = Boundary.newBuilder() @@ -352,7 +379,10 @@ public void idConflictButVotersOverlap_shouldNotDelete() { .setEndKey(ByteString.copyFromUtf8("z")).build(); KVRangeDescriptor localRange = KVRangeDescriptor.newBuilder() - .setId(kvRangeId).setRole(RaftNodeStatus.Leader).setVer(1) + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(ClusterConfig.newBuilder() .addVoters(localStoreId) @@ -360,7 +390,10 @@ public void idConflictButVotersOverlap_shouldNotDelete() { .build(); KVRangeDescriptor peerRange = KVRangeDescriptor.newBuilder() - .setId(kvRangeId).setRole(RaftNodeStatus.Leader).setVer(1) + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(ClusterConfig.newBuilder() .addVoters(localStoreId) @@ -381,4 +414,4 @@ public void idConflictButVotersOverlap_shouldNotDelete() { BalanceResult result = balancer.balance(); assertSame(result.type(), BalanceResultType.NoNeedBalance); } -} \ No newline at end of file +} diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java index 2c36c3f38..99fc0701a 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java @@ -22,6 +22,7 @@ import lombok.Getter; import lombok.Setter; import lombok.experimental.SuperBuilder; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; @Getter @Setter @@ -35,6 +36,7 @@ public CommandType type() { @Override public String toString() { - return String.format("RecoveryCommand{toStore=%s}", getToStore()); + return String.format("RecoveryCommand{toStore=%s, kvRangeId=%s}", + getToStore(), KVRangeIdUtil.toString(getKvRangeId())); } } diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java index 125ff4208..dadf83ba9 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.balance.util; @@ -23,8 +23,14 @@ import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKey; import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary; -import org.apache.bifromq.basekv.balance.BalanceNow; -import org.apache.bifromq.basekv.balance.BalanceResult; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableMap; +import java.util.Set; import org.apache.bifromq.basekv.balance.command.BalanceCommand; import org.apache.bifromq.basekv.balance.command.BootstrapCommand; import org.apache.bifromq.basekv.balance.command.ChangeConfigCommand; @@ -37,14 +43,6 @@ import org.apache.bifromq.basekv.utils.EffectiveRoute; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.basekv.utils.LeaderRange; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableMap; -import java.util.Set; /** * Utility class for generating balance commands. @@ -57,24 +55,24 @@ public class CommandUtil { * @param rangeDescriptor the range descriptor of the range which the balancer is responsible for * @return the generated ChangeConfigCommand */ - public static BalanceResult quit(String localStoreId, KVRangeDescriptor rangeDescriptor) { + public static BalanceCommand quit(String localStoreId, KVRangeDescriptor rangeDescriptor) { ClusterConfig config = rangeDescriptor.getConfig(); if (config.getVotersCount() > 1 || config.getLearnersCount() > 0) { - return BalanceNow.of(ChangeConfigCommand.builder() + return ChangeConfigCommand.builder() .toStore(localStoreId) .kvRangeId(rangeDescriptor.getId()) .expectedVer(rangeDescriptor.getVer()) .voters(Set.of(localStoreId)) .learners(Collections.emptySet()) - .build()); + .build(); } else { - return BalanceNow.of(ChangeConfigCommand.builder() + return ChangeConfigCommand.builder() .toStore(localStoreId) .kvRangeId(rangeDescriptor.getId()) .expectedVer(rangeDescriptor.getVer()) .voters(Collections.emptySet()) .learners(Collections.emptySet()) - .build()); + .build(); } } diff --git a/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java b/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java index 2b06a80a7..ae41eafa1 100644 --- a/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java +++ b/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java @@ -19,17 +19,19 @@ package org.apache.bifromq.basekv.balance.util; +import static com.google.protobuf.ByteString.copyFromUtf8; import static org.apache.bifromq.basekv.balance.util.CommandUtil.diffBy; import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary; -import static com.google.protobuf.ByteString.copyFromUtf8; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.balance.BalanceNow; -import org.apache.bifromq.basekv.balance.BalanceResult; -import org.apache.bifromq.basekv.balance.BalanceResultType; +import com.google.protobuf.ByteString; +import java.util.Collections; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeMap; import org.apache.bifromq.basekv.balance.command.BalanceCommand; import org.apache.bifromq.basekv.balance.command.BootstrapCommand; import org.apache.bifromq.basekv.balance.command.ChangeConfigCommand; @@ -44,11 +46,6 @@ import org.apache.bifromq.basekv.utils.EffectiveRoute; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.basekv.utils.LeaderRange; -import com.google.protobuf.ByteString; -import java.util.Collections; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeMap; import org.testng.annotations.Test; public class CommandUtilTest { @@ -65,10 +62,9 @@ public void quitWithMultipleVoters() { .build()) .build(); - BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor); + BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor); - assertEquals(result.type(), BalanceResultType.BalanceNow); - ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; + ChangeConfigCommand command = (ChangeConfigCommand) result; assertEquals(command.getToStore(), localStoreId); assertEquals(command.getKvRangeId(), kvRangeId); @@ -89,10 +85,9 @@ public void quitWithLearners() { .build()) .build(); - BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor); + BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor); - assertEquals(result.type(), BalanceResultType.BalanceNow); - ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; + ChangeConfigCommand command = (ChangeConfigCommand) result; assertEquals(command.getToStore(), localStoreId); assertEquals(command.getKvRangeId(), kvRangeId); @@ -112,10 +107,9 @@ public void quitWithSingleVoterNoLearners() { .build()) .build(); - BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor); + BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor); - assertEquals(result.type(), BalanceResultType.BalanceNow); - ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; + ChangeConfigCommand command = (ChangeConfigCommand) result; assertEquals(command.getToStore(), localStoreId); assertEquals(command.getKvRangeId(), kvRangeId); From 7fbf1a8219dfc8609e3bf2eaefea5da8301ea29b Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Fri, 5 Sep 2025 10:51:41 +0800 Subject: [PATCH 19/20] Exclude ranges in terminated states from effective route --- .../balance/impl/RangeLeaderBalancerTest.java | 14 +- .../balance/impl/ReplicaCntBalancerTest.java | 59 +++- .../bifromq/basekv/utils/DescriptorUtil.java | 31 +- .../basekv/utils/DescriptorUtilTest.java | 274 +++++++++++++++++- 4 files changed, 350 insertions(+), 28 deletions(-) diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java index 1b1aebccd..ab1348c72 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.balance.impl; @@ -25,6 +25,8 @@ import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; +import com.google.protobuf.ByteString; +import java.util.Set; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.BalanceResultType; @@ -34,10 +36,9 @@ import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; -import com.google.protobuf.ByteString; -import java.util.Set; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -58,6 +59,7 @@ public void noEffectiveRouteNoBalanceNeeded() { KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary( Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a")).setEndKey(ByteString.copyFromUtf8("z")) .build()) @@ -139,6 +141,7 @@ public void balanceToOtherNoLeaderStore() { KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder() .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(toBoundary(null, ByteString.copyFromUtf8("z"))) .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addLearners("otherStore").build()) .build(); @@ -148,6 +151,7 @@ public void balanceToOtherNoLeaderStore() { .setId(kvRangeId2) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(toBoundary(ByteString.copyFromUtf8("z"), null)) .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addVoters("otherStore").build()) .build(); @@ -161,6 +165,7 @@ public void balanceToOtherNoLeaderStore() { KVRangeDescriptor kvRangeDescriptor3 = KVRangeDescriptor.newBuilder() .setId(kvRangeId3) .setRole(RaftNodeStatus.Follower) + .setState(State.StateType.Normal) .setBoundary(toBoundary(ByteString.copyFromUtf8("z"), null)) .setConfig(ClusterConfig.newBuilder().addVoters("otherStore").build()) .build(); @@ -185,6 +190,7 @@ public void transferLeadership() { KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder() .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addLearners("otherStore").build()) .build(); @@ -193,6 +199,7 @@ public void transferLeadership() { .setId(kvRangeId2) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addVoters("otherStore").build()) .build(); @@ -205,6 +212,7 @@ public void transferLeadership() { KVRangeDescriptor kvRangeDescriptor3 = KVRangeDescriptor.newBuilder() .setId(kvRangeId3) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder().addVoters("otherStore").build()) .build(); diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java index f5f7e1a55..1b5b7fc1f 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java @@ -40,6 +40,7 @@ import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.basekv.utils.EffectiveRoute; @@ -69,6 +70,7 @@ public void balanceToAddVoter() { .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("a")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -82,6 +84,7 @@ public void balanceToAddVoter() { .setId(kvRangeId2) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -127,6 +130,7 @@ public void balanceToAddLearner() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -169,6 +173,7 @@ public void balanceToRemoveLearner() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -200,6 +205,7 @@ public void promoteLearnersToVoters() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -210,6 +216,7 @@ public void promoteLearnersToVoters() { .setId(kvRangeId) .setRole(RaftNodeStatus.Follower) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -247,6 +254,7 @@ public void balanceToAddAllRestLearners() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -298,6 +306,7 @@ public void balanceVoterCount() { KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder() .setId(kvRangeId1) .setVer(1) + .setState(State.StateType.Normal) .setRole(RaftNodeStatus.Leader) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() @@ -309,6 +318,7 @@ public void balanceVoterCount() { .setId(kvRangeId2) .setVer(2) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -344,6 +354,7 @@ public void balanceLearnerCount() { .setId(kvRangeId1) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("a")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -360,6 +371,7 @@ public void balanceLearnerCount() { .setId(kvRangeId2) .setVer(2) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a")) .setEndKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() @@ -378,6 +390,7 @@ public void balanceLearnerCount() { .setId(kvRangeId3) .setVer(2) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("s3") @@ -412,6 +425,7 @@ public void generateCorrectClusterConfig() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -454,6 +468,7 @@ public void removeDeadVoterAndBackfillEvenIfCountEqualsExpected() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -528,6 +543,7 @@ public void learnersMinusOneUsesLiveMinusVotersAndSanitizes() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -597,6 +613,7 @@ public void learnersMinusOnePreferPromoteLearnersToFillVoters() { .setId(rid) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -638,6 +655,7 @@ public void noChangeWhenLiveLessThanExpectedAndNoDeadVoter() { .setId(rid) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -662,6 +680,7 @@ public void neverRemoveLeaderWhenShrinkingVoters() { .setId(rid) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("leader") @@ -722,6 +741,7 @@ public void fixedLearnerCountRemovesDeadAndBackfills() { .setId(rid) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -754,8 +774,16 @@ public void zeroLearnersTargetClearsLearners() { KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); KVRangeDescriptor range = KVRangeDescriptor.newBuilder() - .setId(rid).setRole(RaftNodeStatus.Leader).setVer(1).setBoundary(FULL_BOUNDARY) - .setConfig(ClusterConfig.newBuilder().addVoters("s1").addLearners("s2").addLearners("s3").build()) + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addLearners("s2") + .addLearners("s3") + .build()) .build(); KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); @@ -776,9 +804,15 @@ public void learnersMinusOneWithAllLiveAsVotersMakesLearnersEmpty() { KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); KVRangeDescriptor range = KVRangeDescriptor.newBuilder() - .setId(rid).setRole(RaftNodeStatus.Leader).setVer(1).setBoundary(FULL_BOUNDARY) + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() - .addVoters("s1").addVoters("s2").addVoters("s3") + .addVoters("s1") + .addVoters("s2") + .addVoters("s3") .addLearners("ghost") // should be sanitized away .build()) .build(); @@ -805,6 +839,7 @@ public void balanceVoterCountPrefersZeroCountStoreFirst() { .setId(r1) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) .build(); @@ -812,6 +847,7 @@ public void balanceVoterCountPrefersZeroCountStoreFirst() { .setId(r2) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("m")) .setEndKey(ByteString.copyFromUtf8("z")) @@ -823,7 +859,9 @@ public void balanceVoterCountPrefersZeroCountStoreFirst() { KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB") .addRanges(KVRangeDescriptor.newBuilder() .setId(r3) - .setVer(1).setRole(RaftNodeStatus.Leader) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("z")) .build()) @@ -851,6 +889,7 @@ public void balanceVoterCountDoesOnlyOneChangePerRound() { .setId(r1) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) .build(); @@ -858,6 +897,7 @@ public void balanceVoterCountDoesOnlyOneChangePerRound() { .setId(r2) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build()) .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) .build(); @@ -879,7 +919,10 @@ public void balanceVoterCountDoesOnlyOneChangePerRound() { public void balanceVoterCountSkipsTargetsAlreadyInVotersOrLearners() { KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() - .setId(r1).setVer(1).setRole(RaftNodeStatus.Leader) + .setId(r1) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("sA") @@ -889,7 +932,9 @@ public void balanceVoterCountSkipsTargetsAlreadyInVotersOrLearners() { KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() .setId(KVRangeId.newBuilder().setEpoch(1).setId(2).build()) - .setVer(1).setRole(RaftNodeStatus.Leader) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("sA") diff --git a/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java b/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java index c39045fa6..44abd56e4 100644 --- a/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java +++ b/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.utils; @@ -22,10 +22,6 @@ import static org.apache.bifromq.basekv.utils.BoundaryUtil.endKey; import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKey; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import com.google.protobuf.ByteString; import java.util.Comparator; import java.util.HashMap; @@ -37,6 +33,10 @@ import java.util.TreeMap; import java.util.TreeSet; import java.util.stream.Collectors; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; /** * Utilities for processing descriptor. @@ -100,14 +100,21 @@ public static EffectiveRoute getEffectiveRoute(EffectiveEpoch effectiveEpoch) { for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch.storeDescriptors()) { for (KVRangeDescriptor rangeDescriptor : storeDescriptor.getRangesList()) { if (rangeDescriptor.getRole() == RaftNodeStatus.Leader) { - ByteString startKey = startKey(rangeDescriptor.getBoundary()); - if (startKey == null) { - firstLeaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor)); - continue; + switch (rangeDescriptor.getState()) { + case Normal, ConfigChanging, PreparedMerging, WaitingForMerge -> { + ByteString startKey = startKey(rangeDescriptor.getBoundary()); + if (startKey == null) { + firstLeaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor)); + continue; + } + sortedLeaderRanges.computeIfAbsent(startKey, + k -> new TreeSet<>(Comparator.comparingLong(l -> l.descriptor().getId().getId()))) + .add(new LeaderRange(rangeDescriptor, storeDescriptor)); + } + default -> { + // skip other states + } } - sortedLeaderRanges.computeIfAbsent(startKey, - k -> new TreeSet<>(Comparator.comparingLong(l -> l.descriptor().getId().getId()))) - .add(new LeaderRange(rangeDescriptor, storeDescriptor)); } } } diff --git a/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java b/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java index 3b724b5af..40b19a232 100644 --- a/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java +++ b/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java @@ -19,16 +19,15 @@ package org.apache.bifromq.basekv.utils; +import static org.apache.bifromq.basekv.proto.State.StateType.Merged; +import static org.apache.bifromq.basekv.proto.State.StateType.Normal; +import static org.apache.bifromq.basekv.proto.State.StateType.PreparedMerging; +import static org.apache.bifromq.basekv.proto.State.StateType.Removed; import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveEpoch; import static org.apache.bifromq.basekv.utils.DescriptorUtil.organizeByEpoch; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import com.google.protobuf.ByteString; import java.util.Arrays; import java.util.HashSet; @@ -37,6 +36,11 @@ import java.util.NavigableMap; import java.util.Set; import java.util.stream.Collectors; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.testng.annotations.Test; public class DescriptorUtilTest { @@ -460,6 +464,7 @@ public void getEffectiveRouteContiguousChain() { KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() .setId(id1) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundaryBuilder1.build()) .build(); @@ -470,6 +475,7 @@ public void getEffectiveRouteContiguousChain() { KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() .setId(id2) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundaryBuilder2.build()) .build(); @@ -479,6 +485,7 @@ public void getEffectiveRouteContiguousChain() { KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder() .setId(id3) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundaryBuilder3.build()) .build(); @@ -517,11 +524,13 @@ public void getEffectiveRouteSelectsSmallestVer() { KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() .setId(id1) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundary) .build(); KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() .setId(id2) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundary) .build(); @@ -568,4 +577,257 @@ public void getEffectiveRouteWithNoLeaders() { assertTrue(routeMap.isEmpty()); } -} \ No newline at end of file + + @Test + public void getEffectiveRouteFiltersByState() { + KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeId id3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build(); + KVRangeId id4 = KVRangeId.newBuilder().setEpoch(1).setId(4).build(); + + // Allowed states + KVRangeDescriptor rNormal = KVRangeDescriptor.newBuilder() + .setId(id1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("b")).build()) + .setState(Normal) + .build(); + KVRangeDescriptor rPreparedMerging = KVRangeDescriptor.newBuilder() + .setId(id2) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("b")) + .setEndKey(ByteString.copyFromUtf8("m")) + .build()) + .setState(PreparedMerging) + .build(); + + // Disallowed states + KVRangeDescriptor rMerged = KVRangeDescriptor.newBuilder() + .setId(id3) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("m")) + .setEndKey(ByteString.copyFromUtf8("z")) + .build()) + .setState(Merged) + .build(); + KVRangeDescriptor rRemoved = KVRangeDescriptor.newBuilder() + .setId(id4) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("z")) + .build()) + .setState(Removed) + .build(); + + KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() + .setId("store1") + .addRanges(rNormal) + .addRanges(rPreparedMerging) + .addRanges(rMerged) + .addRanges(rRemoved) + .build(); + + Set storeDescriptors = new HashSet<>(); + storeDescriptors.add(storeDescriptor); + + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors); + EffectiveRoute effectiveRoute = DescriptorUtil.getEffectiveRoute(effectiveEpoch); + NavigableMap routeMap = effectiveRoute.leaderRanges(); + + // Only two allowed ranges should be present + assertEquals(routeMap.size(), 2); + List ids = routeMap.values().stream().map(lr -> lr.descriptor().getId().getId()).toList(); + assertTrue(ids.contains(1L)); + assertTrue(ids.contains(2L)); + } + + @Test + public void getEffectiveRoutePrefersNullStartKeyAsFirst() { + KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + + // First range without startKey (should be chosen as first) + KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() + .setId(id1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setEndKey(ByteString.copyFromUtf8("m")) + .build()) + .setState(Normal) + .build(); + + // Second range with explicit startKey + KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() + .setId(id2) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build()) + .setState(Normal) + .build(); + + KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() + .setId("store1") + .addRanges(r2) + .addRanges(r1) + .build(); + + Set storeDescriptors = new HashSet<>(); + storeDescriptors.add(storeDescriptor); + + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors); + NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges(); + + assertEquals(routeMap.firstEntry().getValue().descriptor().getId(), id1); + } + + @Test + public void getEffectiveRouteStopsAtNullEndKey() { + KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeId id3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build(); + + KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() + .setId(id1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("b")).build()) + .setState(Normal) + .build(); + // Tail range with null endKey + KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() + .setId(id2) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("b")).build()) + .setState(Normal) + .build(); + // An extra range that should never be reached after tail + KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder() + .setId(id3) + .setRole(RaftNodeStatus.Leader) + .setBoundary( + Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("c")).setEndKey(ByteString.copyFromUtf8("d")) + .build()) + .setState(Normal) + .build(); + + KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() + .setId("store1") + .addRanges(r1) + .addRanges(r2) + .addRanges(r3) + .build(); + + Set storeDescriptors = new HashSet<>(); + storeDescriptors.add(storeDescriptor); + + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors); + NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges(); + + // Should stop at r2 (endKey null) + assertEquals(routeMap.size(), 2); + List ids = routeMap.values().stream().map(lr -> lr.descriptor().getId().getId()).toList(); + assertTrue(ids.contains(1L)); + assertTrue(ids.contains(2L)); + } + + @Test + public void getEffectiveRouteAllowsGapsByCeilingStartKey() { + KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + + KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() + .setId(id1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setEndKey(ByteString.copyFromUtf8("b")) + .build()) + .setState(Normal) + .build(); + KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() + .setId(id2) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("c")) + .setEndKey(ByteString.copyFromUtf8("z")) + .build()) + .setState(Normal) + .build(); + + KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() + .setId("store1") + .addRanges(r1) + .addRanges(r2) + .build(); + + Set storeDescriptors = new HashSet<>(); + storeDescriptors.add(storeDescriptor); + + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors); + NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges(); + assertEquals(routeMap.size(), 2); + } + + @Test + public void organizeByEpochRetainsStoresWithoutRangesInEpoch() { + // store1 has epoch 1 & 2 ranges, store2 has only epoch 2 + KVRangeId id11 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id21 = KVRangeId.newBuilder().setEpoch(2).setId(1).build(); + KVRangeDescriptor r11 = KVRangeDescriptor.newBuilder() + .setId(id11) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) + .build(); + KVRangeDescriptor r21 = KVRangeDescriptor.newBuilder() + .setId(id21) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("n")).build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("store1").addRanges(r11).addRanges(r21) + .build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("store2").addRanges(r21).build(); + + Set set = new HashSet<>(); + set.add(s1); + set.add(s2); + + NavigableMap> byEpoch = DescriptorUtil.organizeByEpoch(set); + // Epoch 1 should contain both stores, with store2 having 0 ranges + assertTrue(byEpoch.containsKey(1L)); + Set epoch1 = byEpoch.get(1L); + assertEquals(epoch1.size(), 2); + for (KVRangeStoreDescriptor d : epoch1) { + if (d.getId().equals("store1")) { + assertEquals(d.getRangesCount(), 1); + assertEquals(d.getRanges(0).getId().getEpoch(), 1L); + } else if (d.getId().equals("store2")) { + assertEquals(d.getRangesCount(), 0); + } + } + } + + @Test + public void getEffectiveEpochOldestSelectionWithMixedStores() { + // store1 has epoch 2, store2 has epoch 3; no epoch 1 present => pick epoch 2 + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(2).setId(1).build(); + KVRangeId id3 = KVRangeId.newBuilder().setEpoch(3).setId(1).build(); + KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder().setId(id2).build(); + KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder().setId(id3).build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(r2).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").addRanges(r3).build(); + Set set = new HashSet<>(); + set.add(s1); + set.add(s2); + + Set result = DescriptorUtil.getEffectiveEpoch(set).get().storeDescriptors(); + assertEquals(result.size(), 2); + // All descriptors in effective epoch must be epoch 2 versions of both stores, with s2 having 0 ranges + for (KVRangeStoreDescriptor d : result) { + if (d.getId().equals("s1")) { + assertEquals(d.getRangesCount(), 1); + assertEquals(d.getRanges(0).getId().getEpoch(), 2L); + } else if (d.getId().equals("s2")) { + assertEquals(d.getRangesCount(), 0); + } + } + } +} From 91723a2293b55deb84a5054fc493cd16db4b7970 Mon Sep 17 00:00:00 2001 From: Yonny Hao Date: Mon, 15 Sep 2025 15:07:30 +0800 Subject: [PATCH 20/20] Enable manually triggered cov build --- .github/workflows/build-cov.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-cov.yaml b/.github/workflows/build-cov.yaml index 5fa51550e..bd0c4b8e6 100644 --- a/.github/workflows/build-cov.yaml +++ b/.github/workflows/build-cov.yaml @@ -1,6 +1,7 @@ name: Cov-Build on: + workflow_dispatch: pull_request: branches: - 'main'