diff --git a/.github/workflows/build-cov.yaml b/.github/workflows/build-cov.yaml
index 5fa51550e..bd0c4b8e6 100644
--- a/.github/workflows/build-cov.yaml
+++ b/.github/workflows/build-cov.yaml
@@ -1,6 +1,7 @@
name: Cov-Build
on:
+ workflow_dispatch:
pull_request:
branches:
- 'main'
diff --git a/base-cluster/pom.xml b/base-cluster/pom.xml
index 34c978411..1bcc08082 100644
--- a/base-cluster/pom.xml
+++ b/base-cluster/pom.xml
@@ -33,6 +33,10 @@
org.apache.bifromq
base-env-provider
+
+ org.apache.bifromq
+ base-util
+
org.apache.bifromq
base-hlc
diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java
index 1633b3cf5..d74230271 100644
--- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java
+++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java
@@ -14,14 +14,31 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecluster;
-import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI;
import static com.google.common.base.Preconditions.checkArgument;
+import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Strings;
+import com.google.protobuf.ByteString;
+import io.micrometer.core.instrument.Metrics;
+import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics;
+import io.reactivex.rxjava3.core.Observable;
+import io.reactivex.rxjava3.core.Scheduler;
+import io.reactivex.rxjava3.disposables.CompositeDisposable;
+import io.reactivex.rxjava3.schedulers.Schedulers;
+import java.net.InetSocketAddress;
+import java.time.Duration;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.atomic.AtomicReference;
+import lombok.extern.slf4j.Slf4j;
import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint;
import org.apache.bifromq.basecluster.fd.FailureDetector;
import org.apache.bifromq.basecluster.fd.IFailureDetector;
@@ -43,23 +60,6 @@
import org.apache.bifromq.basecrdt.store.ICRDTStore;
import org.apache.bifromq.basecrdt.store.proto.CRDTStoreMessage;
import org.apache.bifromq.baseenv.EnvProvider;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Strings;
-import com.google.protobuf.ByteString;
-import io.micrometer.core.instrument.Metrics;
-import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics;
-import io.reactivex.rxjava3.core.Observable;
-import io.reactivex.rxjava3.core.Scheduler;
-import io.reactivex.rxjava3.disposables.CompositeDisposable;
-import io.reactivex.rxjava3.schedulers.Schedulers;
-import java.net.InetSocketAddress;
-import java.time.Duration;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.atomic.AtomicReference;
-import lombok.extern.slf4j.Slf4j;
@Slf4j
final class AgentHost implements IAgentHost {
@@ -173,6 +173,11 @@ public Observable
+
+ org.awaitility
+ awaitility
+
org.apache.logging.log4j
log4j-api
diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java
index 72b13ae93..25eb0c825 100644
--- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java
+++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java
@@ -73,7 +73,7 @@ class CRDTCluster> {
this.store = store;
this.agentHost = agentHost;
replicaId = generate(uri);
- log = MDCLogger.getLogger(CRDTCluster.class, "replica", print(replicaId));
+ log = MDCLogger.getLogger(CRDTCluster.class, "store", store.id(), "replica", print(replicaId));
membershipAgent = agentHost.host(replicaId.getUri());
endpoint = AgentMemberAddr.newBuilder()
.setName(AgentUtil.toAgentMemberName(replicaId))
diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java
index 94213d86e..e74ee85d8 100644
--- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java
+++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java
@@ -115,6 +115,11 @@ public Observable> aliveCRDTs() {
});
}
+ @Override
+ public Observable refreshSignal() {
+ return agentHost.refuteSignal();
+ }
+
private CompletableFuture stopHostingInternal(String uri) {
return hostedCRDT.remove(uri).close();
}
diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java
index 14b1d72db..97375ff37 100644
--- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java
+++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java
@@ -14,20 +14,20 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.service;
-import org.apache.bifromq.basecluster.IAgentHost;
-import org.apache.bifromq.basecrdt.core.api.ICRDTOperation;
-import org.apache.bifromq.basecrdt.core.api.ICausalCRDT;
-import org.apache.bifromq.basecrdt.proto.Replica;
import com.google.protobuf.ByteString;
import io.reactivex.rxjava3.core.Observable;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import lombok.NonNull;
+import org.apache.bifromq.basecluster.IAgentHost;
+import org.apache.bifromq.basecrdt.core.api.ICRDTOperation;
+import org.apache.bifromq.basecrdt.core.api.ICausalCRDT;
+import org.apache.bifromq.basecrdt.proto.Replica;
/**
* The CRDT service with decentralized membership management based on base-cluster.
@@ -89,6 +89,13 @@ static ICRDTService newInstance(IAgentHost agentHost, @NonNull CRDTServiceOption
*/
Observable> aliveCRDTs();
+ /**
+ * A signal to refresh the CRDT replica hosted in the service.
+ *
+ * @return an observable that emits refresh signal
+ */
+ Observable refreshSignal();
+
/**
* Stop the store.
*/
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java
index d3662f976..0d91e8410 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java
@@ -19,21 +19,22 @@
package org.apache.bifromq.basecrdt.core.internal;
+import java.time.Duration;
+import java.util.concurrent.ScheduledExecutorService;
import org.apache.bifromq.basecrdt.core.api.AWORSetOperation;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.IAWORSet;
import org.apache.bifromq.basecrdt.core.api.IAWORSetInflater;
import org.apache.bifromq.basecrdt.proto.Replica;
-import java.time.Duration;
-import java.util.concurrent.ScheduledExecutorService;
class AWORSetInflater extends CausalCRDTInflater implements IAWORSetInflater {
- AWORSetInflater(Replica replica,
+ AWORSetInflater(String storeId,
+ Replica replica,
IReplicaStateLattice stateLattice,
ScheduledExecutorService executor,
Duration inflationInterval,
String... tags) {
- super(replica, stateLattice, executor, inflationInterval, tags);
+ super(storeId, replica, stateLattice, executor, inflationInterval, tags);
}
@Override
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java
index 4bef5345e..06c7e5810 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java
@@ -19,21 +19,22 @@
package org.apache.bifromq.basecrdt.core.internal;
+import java.time.Duration;
+import java.util.concurrent.ScheduledExecutorService;
import org.apache.bifromq.basecrdt.core.api.CCounterOperation;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.ICCounter;
import org.apache.bifromq.basecrdt.core.api.ICCounterInflater;
import org.apache.bifromq.basecrdt.proto.Replica;
-import java.time.Duration;
-import java.util.concurrent.ScheduledExecutorService;
class CCounterInflater extends CausalCRDTInflater implements ICCounterInflater {
- CCounterInflater(Replica replica,
+ CCounterInflater(String storeId,
+ Replica replica,
IReplicaStateLattice stateLattice,
ScheduledExecutorService executor,
Duration inflationInterval,
String... tags) {
- super(replica, stateLattice, executor, inflationInterval, tags);
+ super(storeId, replica, stateLattice, executor, inflationInterval, tags);
}
@Override
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java
index f38d8bd02..ba9e912c5 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java
@@ -73,11 +73,13 @@ abstract class CausalCRDTInflater new AWORSetInflater(replicaId, lattice, executor, inflationInterval, tags);
- case rworset -> new RWORSetInflater(replicaId, lattice, executor, inflationInterval, tags);
- case ormap -> new ORMapInflater(replicaId, lattice, executor, inflationInterval, tags);
- case cctr -> new CCounterInflater(replicaId, lattice, executor, inflationInterval, tags);
- case dwflag -> new DWFlagInflater(replicaId, lattice, executor, inflationInterval, tags);
- case ewflag -> new EWFlagInflater(replicaId, lattice, executor, inflationInterval, tags);
- case mvreg -> new MVRegInflater(replicaId, lattice, executor, inflationInterval, tags);
+ case aworset -> new AWORSetInflater(storeId, replicaId, lattice, executor, inflationInterval, tags);
+ case rworset -> new RWORSetInflater(storeId, replicaId, lattice, executor, inflationInterval, tags);
+ case ormap -> new ORMapInflater(storeId, replicaId, lattice, executor, inflationInterval, tags);
+ case cctr -> new CCounterInflater(storeId, replicaId, lattice, executor, inflationInterval, tags);
+ case dwflag -> new DWFlagInflater(storeId, replicaId, lattice, executor, inflationInterval, tags);
+ case ewflag -> new EWFlagInflater(storeId, replicaId, lattice, executor, inflationInterval, tags);
+ case mvreg -> new MVRegInflater(storeId, replicaId, lattice, executor, inflationInterval, tags);
};
}
}
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java
index bd8a654ea..af8276cca 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java
@@ -19,21 +19,22 @@
package org.apache.bifromq.basecrdt.core.internal;
+import java.time.Duration;
+import java.util.concurrent.ScheduledExecutorService;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.DWFlagOperation;
import org.apache.bifromq.basecrdt.core.api.IDWFlag;
import org.apache.bifromq.basecrdt.core.api.IDWFlagInflater;
import org.apache.bifromq.basecrdt.proto.Replica;
-import java.time.Duration;
-import java.util.concurrent.ScheduledExecutorService;
class DWFlagInflater extends CausalCRDTInflater implements IDWFlagInflater {
- DWFlagInflater(Replica replica,
+ DWFlagInflater(String storeId,
+ Replica replica,
IReplicaStateLattice stateLattice,
ScheduledExecutorService executor,
Duration inflationInterval,
String... tags) {
- super(replica, stateLattice, executor, inflationInterval, tags);
+ super(storeId, replica, stateLattice, executor, inflationInterval, tags);
}
@Override
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java
index e48b94642..dbcd80958 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java
@@ -19,18 +19,18 @@
package org.apache.bifromq.basecrdt.core.internal;
+import java.time.Duration;
+import java.util.concurrent.ScheduledExecutorService;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.EWFlagOperation;
import org.apache.bifromq.basecrdt.core.api.IEWFlag;
import org.apache.bifromq.basecrdt.core.api.IEWFlagInflater;
import org.apache.bifromq.basecrdt.proto.Replica;
-import java.time.Duration;
-import java.util.concurrent.ScheduledExecutorService;
class EWFlagInflater extends CausalCRDTInflater implements IEWFlagInflater {
- EWFlagInflater(Replica replica, IReplicaStateLattice stateLattice,
+ EWFlagInflater(String storeId, Replica replica, IReplicaStateLattice stateLattice,
ScheduledExecutorService executor, Duration inflationInterval, String... tags) {
- super(replica, stateLattice, executor, inflationInterval, tags);
+ super(storeId, replica, stateLattice, executor, inflationInterval, tags);
}
@Override
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java
index d12fa3209..0b42d7c54 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.internal;
@@ -69,9 +69,10 @@ class InMemReplicaStateLattice implements IReplicaStateLattice {
private final Duration historyExpire;
private final long maxCompactionDuration;
- InMemReplicaStateLattice(Replica ownerReplica, Duration historyExpire, Duration maxCompactionTime) {
+ InMemReplicaStateLattice(String storeId, Replica ownerReplica, Duration historyExpire, Duration maxCompactionTime) {
this.ownerReplica = ownerReplica;
- this.log = MDCLogger.getLogger(InMemReplicaStateLattice.class, "replica", print(ownerReplica));
+ this.log = MDCLogger.getLogger(InMemReplicaStateLattice.class,
+ "store", storeId, "replica", print(ownerReplica));
this.historyExpire = historyExpire;
this.maxCompactionDuration = maxCompactionTime.toNanos();
}
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java
index 3044da2f4..497844b6d 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java
@@ -14,26 +14,27 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.internal;
+import java.time.Duration;
+import java.util.concurrent.ScheduledExecutorService;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.IMVReg;
import org.apache.bifromq.basecrdt.core.api.IMVRegInflater;
import org.apache.bifromq.basecrdt.core.api.MVRegOperation;
import org.apache.bifromq.basecrdt.proto.Replica;
-import java.time.Duration;
-import java.util.concurrent.ScheduledExecutorService;
class MVRegInflater extends CausalCRDTInflater implements IMVRegInflater {
- MVRegInflater(Replica replica,
+ MVRegInflater(String storeId,
+ Replica replica,
IReplicaStateLattice stateLattice,
ScheduledExecutorService executor,
Duration inflationInterval,
String... tags) {
- super(replica, stateLattice, executor, inflationInterval, tags);
+ super(storeId, replica, stateLattice, executor, inflationInterval, tags);
}
@Override
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java
index 137b62343..aa8bec0b6 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java
@@ -14,26 +14,27 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.internal;
+import java.time.Duration;
+import java.util.concurrent.ScheduledExecutorService;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.IORMap;
import org.apache.bifromq.basecrdt.core.api.IORMapInflater;
import org.apache.bifromq.basecrdt.core.api.ORMapOperation;
import org.apache.bifromq.basecrdt.proto.Replica;
-import java.time.Duration;
-import java.util.concurrent.ScheduledExecutorService;
class ORMapInflater extends CausalCRDTInflater implements IORMapInflater {
- ORMapInflater(Replica replica,
+ ORMapInflater(String storeId,
+ Replica replica,
IReplicaStateLattice stateLattice,
ScheduledExecutorService executor,
Duration inflationInterval,
String... tags) {
- super(replica, stateLattice, executor, inflationInterval, tags);
+ super(storeId, replica, stateLattice, executor, inflationInterval, tags);
}
@Override
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java
index 8698aa25c..c842a58a4 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java
@@ -14,26 +14,27 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.internal;
+import java.time.Duration;
+import java.util.concurrent.ScheduledExecutorService;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.IRWORSet;
import org.apache.bifromq.basecrdt.core.api.IRWORSetInflater;
import org.apache.bifromq.basecrdt.core.api.RWORSetOperation;
import org.apache.bifromq.basecrdt.proto.Replica;
-import java.time.Duration;
-import java.util.concurrent.ScheduledExecutorService;
class RWORSetInflater extends CausalCRDTInflater implements IRWORSetInflater {
- RWORSetInflater(Replica replica,
+ RWORSetInflater(String storeId,
+ Replica replica,
IReplicaStateLattice stateLattice,
ScheduledExecutorService executor,
Duration inflationInterval,
String... tags) {
- super(replica, stateLattice, executor, inflationInterval, tags);
+ super(storeId, replica, stateLattice, executor, inflationInterval, tags);
}
@Override
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java
index dff26bf68..6e2a39f1d 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java
@@ -65,6 +65,9 @@ final class AntiEntropy {
private long currentNeighborVer;
private long currentInflationTs;
private DeltaMessage currentDelta = null;
+ // track if the last sent delta contains replacements (i.e., real diff),
+ // so that after ACK we can proactively continue to drain remaining deltas
+ private boolean lastSentHasReplacement = false;
AntiEntropy(String storeId,
ByteString localAddr,
@@ -110,31 +113,51 @@ void updateObservedNeighborHistory(long ver,
}
void handleAck(AckMessage ack) {
- if (canceled.get() || !running.get()) {
+ if (canceled.get()) {
return;
}
synchronized (this) {
- if (!running.get() || currentDelta == null) {
- return;
- }
- if (ack.getSeqNo() != currentDelta.getSeqNo()) {
+ // Case 1: Matched ACK for in-flight delta
+ if (running.get() && currentDelta != null && ack.getSeqNo() == currentDelta.getSeqNo()) {
+ // currentDelta has been ack'ed
+ currentDelta = null;
+ if (resendTask != null) {
+ resendTask.cancel(false);
+ }
+ // reset resend counter after a successful ack to avoid inflated backoff
+ resendCount = 0;
+ if (ack.getVer() > neighborVer) {
+ // got newer neighbor's history
+ neighborVer = ack.getVer();
+ neighborLatticeIndex = to(ack.getLatticeEventsList());
+ neighborHistoryIndex = to(ack.getHistoryEventsList());
+ }
+ running.set(false);
+ // Proactively continue if:
+ // - probe success (currentNeighborVer==0), or
+ // - local inflation happened, or
+ // - neighbor's version advanced since we computed delta, or
+ // - we just sent a batch of replacements and may have more to drain
+ if (currentNeighborVer == 0
+ || lastInflationTs != currentInflationTs
+ || ack.getVer() > currentNeighborVer
+ || lastSentHasReplacement) {
+ scheduleRun();
+ }
+ // clear the flag after scheduling decision
+ lastSentHasReplacement = false;
return;
}
- // currentDelta has been ack'ed
- currentDelta = null;
- if (resendTask != null) {
- resendTask.cancel(false);
- }
+
+ // Case 2: Late or unmatched ACK. Use it to advance neighbor index if it's newer.
if (ack.getVer() > neighborVer) {
- // got newer neighbor's history
neighborVer = ack.getVer();
neighborLatticeIndex = to(ack.getLatticeEventsList());
neighborHistoryIndex = to(ack.getHistoryEventsList());
- }
- running.set(false);
- // if there are new inflation happened or probe success, restart the task
- if (currentNeighborVer == 0 || lastInflationTs != currentInflationTs) {
- scheduleRun();
+ // try schedule a run if we are not currently running
+ if (!running.get()) {
+ scheduleRun();
+ }
}
}
}
@@ -180,6 +203,7 @@ private void run() {
.addAllHistoryEvents(to(crdtInflater.historyEvents()))
.setVer(HLC.INST.get())
.build();
+ lastSentHasReplacement = false;
send(currentDelta);
} else {
// Calculate delta
@@ -200,6 +224,7 @@ private void run() {
.addAllHistoryEvents(to(crdtInflater.historyEvents()))
.setVer(HLC.INST.get())
.build();
+ lastSentHasReplacement = true;
send(currentDelta);
} else {
currentDelta = null;
@@ -219,7 +244,7 @@ private void run() {
private void send(DeltaMessage deltaMessage) {
log.trace("Local[{}] send delta to neighbor[{}]:\n{}",
toPrintable(localAddr), toPrintable(neighborAddr), toPrintable(deltaMessage));
- neighborMessageSubject.onNext(new NeighborMessage(deltaMessage, neighborAddr));
+ emit(deltaMessage);
// Schedule timer task for resend
scheduleResend(deltaMessage);
}
@@ -239,9 +264,7 @@ private void resend(DeltaMessage toResend) {
if (currentDelta == toResend) {
log.trace("Local[{}] resend delta to neighbor[{}]:\n{}",
toPrintable(localAddr), toPrintable(neighborAddr), toPrintable(toResend));
- deltaMsgCounter.increment(1D);
- deltaMsgBytesCounter.increment(currentDelta.getSerializedSize());
- neighborMessageSubject.onNext(new NeighborMessage(currentDelta, neighborAddr));
+ emit(currentDelta);
if (resendCount++ < 10) {
scheduleResend(toResend);
} else {
@@ -262,4 +285,10 @@ private void resend(DeltaMessage toResend) {
private long resendDelay() {
return ThreadLocalRandom.current().nextLong(500, 2000) * (resendCount + 1);
}
+
+ private void emit(DeltaMessage delta) {
+ deltaMsgCounter.increment();
+ deltaMsgBytesCounter.increment(delta.getSerializedSize());
+ neighborMessageSubject.onNext(new NeighborMessage(delta, neighborAddr));
+ }
}
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java
index e942ddaf6..2ed3cd52c 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java
@@ -99,7 +99,17 @@ Observable neighborMessages() {
CompletableFuture receive(DeltaMessage delta, ByteString sender) {
log.trace("Local[{}] receive delta[{}] from addr[{}]:\n{}",
toPrintable(localAddr), delta.getSeqNo(), toPrintable(sender), toPrintable(delta));
- metricManager.receiveDeltaNum.increment(1D);
+ return handleDelta(delta, sender).thenApply(ack -> {
+ metricManager.sendAckNum.increment();
+ metricManager.sendAckBytes.increment(ack.getSerializedSize());
+ log.trace("Local[{}] send ack[{}] to addr[{}]:\n{}",
+ toPrintable(localAddr), ack.getSeqNo(), toPrintable(sender), toPrintable(ack));
+ return ack;
+ });
+ }
+
+ private CompletableFuture handleDelta(DeltaMessage delta, ByteString sender) {
+ metricManager.receiveDeltaNum.increment();
metricManager.receiveDeltaBytes.increment(delta.getSerializedSize());
AntiEntropy neighborAntiEntropy = neighborMap.get(sender);
if (neighborAntiEntropy != null) {
@@ -124,7 +134,7 @@ CompletableFuture receive(DeltaMessage delta, ByteString sender) {
}
void receive(AckMessage ack, ByteString neighborAddr) {
- metricManager.receiveAckNum.increment(1D);
+ metricManager.receiveAckNum.increment();
metricManager.receiveAckBytes.increment(ack.getSerializedSize());
AntiEntropy neighborAntiEntropy = neighborMap.get(neighborAddr);
if (neighborAntiEntropy != null) {
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java
index 9201d72c1..7a639809c 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java
@@ -72,6 +72,7 @@ public CRDTStore(CRDTStoreOptions options) {
storeExecutor = options.storeExecutor();
String[] tags = new String[] {"store.id", storeId};
inflaterFactory = new CausalCRDTInflaterFactory(
+ options.id(),
options.inflationInterval(),
options.orHistoryExpireTime(),
options.maxCompactionTime(),
diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java
index 4fc6aa7b4..a7672009e 100644
--- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java
+++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java
@@ -38,33 +38,39 @@ public static String print(Replica replica) {
}
public static Supplier toPrintable(Replica replica) {
- return () -> replica.getUri() + "-" + BaseEncoding.base32().encode(replica.getId().toByteArray());
+ return () -> replica.getUri() + "-" + replica.hashCode();
}
- public static String toPrintable(DeltaMessage delta) {
- try {
- return JsonFormat.printer().print(delta);
- } catch (Exception e) {
- // ignore
- return delta.toString();
- }
+ public static Supplier toPrintable(DeltaMessage delta) {
+ return () -> {
+ try {
+ return JsonFormat.printer().print(delta);
+ } catch (Exception e) {
+ // ignore
+ return delta.toString();
+ }
+ };
}
- public static String toPrintable(AckMessage ack) {
- try {
- return JsonFormat.printer().print(ack);
- } catch (Exception e) {
- // ignore
- return ack.toString();
- }
+ public static Supplier toPrintable(AckMessage ack) {
+ return () -> {
+ try {
+ return JsonFormat.printer().print(ack);
+ } catch (Exception e) {
+ // ignore
+ return ack.toString();
+ }
+ };
}
- public static String toPrintable(CRDTStoreMessage ack) {
- try {
- return JsonFormat.printer().print(ack);
- } catch (Exception e) {
- // ignore
- return ack.toString();
- }
+ public static Supplier toPrintable(CRDTStoreMessage ack) {
+ return () -> {
+ try {
+ return JsonFormat.printer().print(ack);
+ } catch (Exception e) {
+ // ignore
+ return ack.toString();
+ }
+ };
}
}
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java
index 398c4a70a..6adbba88f 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java
@@ -14,20 +14,20 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.benchmark;
import static com.google.protobuf.UnsafeByteOperations.unsafeWrap;
-import org.apache.bifromq.basecrdt.core.internal.CausalCRDTInflaterFactory;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.time.Duration;
import java.util.concurrent.Executors;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.basecrdt.core.internal.CausalCRDTInflaterFactory;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.runner.Runner;
@@ -42,7 +42,7 @@ public abstract class CRDTBenchmarkTemplate {
@Setup
public void setup() throws IOException {
- inflaterFactory = new CausalCRDTInflaterFactory(
+ inflaterFactory = new CausalCRDTInflaterFactory("testStoreId",
Duration.ofMillis(200), Duration.ofSeconds(20), Duration.ofMillis(200),
Executors.newSingleThreadScheduledExecutor());
doSetup();
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java
index c8e7d12e5..98293ad94 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java
@@ -25,12 +25,12 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basecrdt.core.api.AWORSetOperation;
-import org.apache.bifromq.basecrdt.core.api.IAWORSet;
-import org.apache.bifromq.basecrdt.proto.Replica;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import java.time.Duration;
+import org.apache.bifromq.basecrdt.core.api.AWORSetOperation;
+import org.apache.bifromq.basecrdt.core.api.IAWORSet;
+import org.apache.bifromq.basecrdt.proto.Replica;
import org.testng.annotations.Test;
public class AWORSetTest extends CRDTTest {
@@ -48,9 +48,9 @@ public class AWORSetTest extends CRDTTest {
@Test
public void testOperation() {
- AWORSetInflater aworSetInflater =
- new AWORSetInflater(leftReplica, newStateLattice(leftReplica, 1000),
- executor, Duration.ofMillis(100));
+ AWORSetInflater aworSetInflater = new AWORSetInflater("testStore", leftReplica,
+ newStateLattice(leftReplica, 1000),
+ executor, Duration.ofMillis(100));
IAWORSet aworSet = aworSetInflater.getCRDT();
assertEquals(aworSet.id(), leftReplica);
@@ -82,11 +82,11 @@ public void testOperation() {
@Test
public void testJoin() {
- AWORSetInflater leftInflater = new AWORSetInflater(leftReplica,
+ AWORSetInflater leftInflater = new AWORSetInflater("leftStore", leftReplica,
newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100));
IAWORSet left = leftInflater.getCRDT();
- AWORSetInflater rightInflater = new AWORSetInflater(rightReplica,
+ AWORSetInflater rightInflater = new AWORSetInflater("rightStore", rightReplica,
newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100));
IAWORSet right = rightInflater.getCRDT();
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java
index e0ffbf94f..6352f320a 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java
@@ -24,14 +24,14 @@
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basecrdt.core.api.CCounterOperation;
-import org.apache.bifromq.basecrdt.core.api.ICCounter;
-import org.apache.bifromq.basecrdt.proto.Replacement;
-import org.apache.bifromq.basecrdt.proto.Replica;
import com.google.protobuf.ByteString;
import io.reactivex.rxjava3.observers.TestObserver;
import java.time.Duration;
import java.util.Optional;
+import org.apache.bifromq.basecrdt.core.api.CCounterOperation;
+import org.apache.bifromq.basecrdt.core.api.ICCounter;
+import org.apache.bifromq.basecrdt.proto.Replacement;
+import org.apache.bifromq.basecrdt.proto.Replica;
import org.testng.annotations.Test;
public class CCounterTest extends CRDTTest {
@@ -46,7 +46,7 @@ public class CCounterTest extends CRDTTest {
@Test
public void testOperation() {
- CCounterInflater cctrInflater = new CCounterInflater(leftReplica,
+ CCounterInflater cctrInflater = new CCounterInflater("leftStore", leftReplica,
newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100));
ICCounter cctr = cctrInflater.getCRDT();
assertEquals(cctr.id(), leftReplica);
@@ -73,11 +73,11 @@ public void testOperation() {
@Test
public void testJoin() {
- CCounterInflater leftInflater = new CCounterInflater(leftReplica,
+ CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica,
newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100));
ICCounter left = leftInflater.getCRDT();
- CCounterInflater rightInflater = new CCounterInflater(rightReplica,
+ CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica,
newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100));
ICCounter right = rightInflater.getCRDT();
@@ -101,11 +101,11 @@ public void testJoin() {
@Test
public void testZeroOut() {
- CCounterInflater leftInflater = new CCounterInflater(leftReplica,
+ CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica,
newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100));
ICCounter left = leftInflater.getCRDT();
- CCounterInflater rightInflater = new CCounterInflater(rightReplica,
+ CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica,
newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100));
ICCounter right = rightInflater.getCRDT();
@@ -129,11 +129,11 @@ public void testZeroOut() {
@Test
public void testZeroOutInBatch() {
- CCounterInflater leftInflater = new CCounterInflater(leftReplica,
+ CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica,
newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100));
ICCounter left = leftInflater.getCRDT();
- CCounterInflater rightInflater = new CCounterInflater(rightReplica,
+ CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica,
newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100));
ICCounter right = rightInflater.getCRDT();
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java
index 1bd623184..5510283ee 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java
@@ -19,8 +19,6 @@
package org.apache.bifromq.basecrdt.core.internal;
-import org.apache.bifromq.basecrdt.proto.Replacement;
-import org.apache.bifromq.basecrdt.proto.Replica;
import com.google.common.util.concurrent.MoreExecutors;
import java.time.Duration;
import java.util.Optional;
@@ -28,6 +26,8 @@
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
+import org.apache.bifromq.basecrdt.proto.Replacement;
+import org.apache.bifromq.basecrdt.proto.Replica;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
@@ -45,7 +45,7 @@ public void tearDown() {
}
protected IReplicaStateLattice newStateLattice(Replica ownerReplica, long historyDurationInMS) {
- return new InMemReplicaStateLattice(ownerReplica,
+ return new InMemReplicaStateLattice("storeId", ownerReplica,
Duration.ofMillis(historyDurationInMS),
Duration.ofMillis(200));
}
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java
index 6f645d657..8a15a977a 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.internal;
@@ -25,12 +25,12 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basecrdt.core.api.DWFlagOperation;
-import org.apache.bifromq.basecrdt.core.api.IDWFlag;
-import org.apache.bifromq.basecrdt.proto.Replica;
import com.google.protobuf.ByteString;
import java.time.Duration;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.basecrdt.core.api.DWFlagOperation;
+import org.apache.bifromq.basecrdt.core.api.IDWFlag;
+import org.apache.bifromq.basecrdt.proto.Replica;
import org.testng.annotations.Test;
@Slf4j
@@ -46,7 +46,7 @@ public class DWFlagTest extends CRDTTest {
@Test
public void testOperation() {
- DWFlagInflater dwFlagInflater = new DWFlagInflater(leftReplica,
+ DWFlagInflater dwFlagInflater = new DWFlagInflater("testStore", leftReplica,
newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100));
IDWFlag dwFlag = dwFlagInflater.getCRDT();
assertEquals(dwFlag.id(), leftReplica);
@@ -65,12 +65,12 @@ public void testOperation() {
@Test
public void testJoin() {
- DWFlagInflater leftInflater = new DWFlagInflater(leftReplica,
+ DWFlagInflater leftInflater = new DWFlagInflater("store1", leftReplica,
newStateLattice(leftReplica, 1000000),
executor, Duration.ofMillis(100));
IDWFlag left = leftInflater.getCRDT();
- DWFlagInflater rightInflater = new DWFlagInflater(rightReplica,
+ DWFlagInflater rightInflater = new DWFlagInflater("store2", rightReplica,
newStateLattice(rightReplica, 1000000),
executor, Duration.ofMillis(100));
IDWFlag right = rightInflater.getCRDT();
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java
index 86bdf261a..8e6f216e6 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.internal;
@@ -25,13 +25,13 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basecrdt.core.api.EWFlagOperation;
-import org.apache.bifromq.basecrdt.core.api.IEWFlag;
-import org.apache.bifromq.basecrdt.proto.Replica;
import com.google.protobuf.ByteString;
import io.reactivex.rxjava3.observers.TestObserver;
import java.time.Duration;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.basecrdt.core.api.EWFlagOperation;
+import org.apache.bifromq.basecrdt.core.api.IEWFlag;
+import org.apache.bifromq.basecrdt.proto.Replica;
import org.testng.annotations.Test;
@Slf4j
@@ -47,7 +47,7 @@ public class EWFlagTest extends CRDTTest {
@Test
public void testOperation() {
- EWFlagInflater ewFlagInflater = new EWFlagInflater(leftReplica,
+ EWFlagInflater ewFlagInflater = new EWFlagInflater("testStore", leftReplica,
newStateLattice(leftReplica, 1000),
executor, Duration.ofMillis(100));
IEWFlag ewFlag = ewFlagInflater.getCRDT();
@@ -67,11 +67,11 @@ public void testOperation() {
@Test
public void testJoin() {
- EWFlagInflater leftInflater = new EWFlagInflater(leftReplica,
+ EWFlagInflater leftInflater = new EWFlagInflater("lestStore", leftReplica,
newStateLattice(leftReplica, 1000000), executor, Duration.ofMillis(100));
IEWFlag left = leftInflater.getCRDT();
- EWFlagInflater rightInflater = new EWFlagInflater(rightReplica,
+ EWFlagInflater rightInflater = new EWFlagInflater("rightStore", rightReplica,
newStateLattice(rightReplica, 1000000), executor, Duration.ofMillis(100));
IEWFlag right = rightInflater.getCRDT();
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java
index 3ce126904..6c0c91805 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java
@@ -14,46 +14,47 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.internal;
-import static org.apache.bifromq.basecrdt.core.internal.EventHistoryUtil.isRemembering;
-import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.dot;
-import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacement;
-import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacements;
-import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.singleDot;
import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Sets.newHashSet;
import static com.google.protobuf.ByteString.copyFromUtf8;
import static java.util.Collections.emptyMap;
import static java.util.Collections.singleton;
+import static org.apache.bifromq.basecrdt.core.internal.EventHistoryUtil.isRemembering;
+import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.dot;
+import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacement;
+import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacements;
+import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.singleDot;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basecrdt.proto.Replacement;
-import org.apache.bifromq.basecrdt.proto.Replica;
-import org.apache.bifromq.basecrdt.proto.StateLattice;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import java.time.Duration;
import java.util.List;
import java.util.Optional;
import java.util.Set;
+import org.apache.bifromq.basecrdt.proto.Replacement;
+import org.apache.bifromq.basecrdt.proto.Replica;
+import org.apache.bifromq.basecrdt.proto.StateLattice;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
public class InMemReplicaStateLatticeTest {
- private InMemReplicaStateLattice testLattice;
private final Replica ownerReplica = Replica.newBuilder().setId(copyFromUtf8("Owner")).build();
private final ByteString replicaA = copyFromUtf8("A");
private final ByteString replicaB = copyFromUtf8("B");
+ private InMemReplicaStateLattice testLattice;
@BeforeMethod
public void setup() {
- testLattice = new InMemReplicaStateLattice(ownerReplica, Duration.ofMillis(1000), Duration.ofMillis(200));
+ testLattice = new InMemReplicaStateLattice("storeId", ownerReplica, Duration.ofMillis(1000),
+ Duration.ofMillis(200));
assertFalse(testLattice.lattices().hasNext());
}
@@ -495,7 +496,7 @@ public void testCompact6() throws InterruptedException {
}
@Test
- public void compact7() throws InterruptedException {
+ public void testCompact7() throws InterruptedException {
Set states = newHashSet(
replacement(dot(replicaA, 4, singleDot(replicaA, 4)), dot(replicaA, 3), dot(replicaA, 1)),
replacement(dot(replicaA, 2), dot(replicaA, 1)));
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java
index 3f18b2793..cb99c3ea6 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java
@@ -19,19 +19,19 @@
package org.apache.bifromq.basecrdt.core.internal;
+import static java.util.Collections.emptyIterator;
import static org.apache.bifromq.basecrdt.core.api.CRDTURI.toURI;
import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg;
-import static java.util.Collections.emptyIterator;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
-import org.apache.bifromq.basecrdt.core.api.IMVReg;
-import org.apache.bifromq.basecrdt.core.api.MVRegOperation;
-import org.apache.bifromq.basecrdt.proto.Replica;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import java.time.Duration;
+import org.apache.bifromq.basecrdt.core.api.IMVReg;
+import org.apache.bifromq.basecrdt.core.api.MVRegOperation;
+import org.apache.bifromq.basecrdt.proto.Replica;
import org.testng.annotations.Test;
public class MVRegTest extends CRDTTest {
@@ -49,7 +49,7 @@ public class MVRegTest extends CRDTTest {
@Test
public void testOperation() {
- MVRegInflater mvRegInflater = new MVRegInflater(leftReplica,
+ MVRegInflater mvRegInflater = new MVRegInflater("storeId", leftReplica,
newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100));
IMVReg mvReg = mvRegInflater.getCRDT();
assertEquals(mvReg.id(), leftReplica);
@@ -67,11 +67,11 @@ public void testOperation() {
@Test
public void testJoin() {
- MVRegInflater leftInflater = new MVRegInflater(leftReplica, newStateLattice(leftReplica, 10000),
+ MVRegInflater leftInflater = new MVRegInflater("leftStore", leftReplica, newStateLattice(leftReplica, 10000),
executor, Duration.ofMillis(100));
IMVReg left = leftInflater.getCRDT();
- MVRegInflater rightInflater = new MVRegInflater(rightReplica, newStateLattice(rightReplica, 10000),
+ MVRegInflater rightInflater = new MVRegInflater("rightStore", rightReplica, newStateLattice(rightReplica, 10000),
executor, Duration.ofMillis(100));
IMVReg right = rightInflater.getCRDT();
@@ -96,11 +96,11 @@ public void testJoin() {
@Test
public void testJoin1() throws InterruptedException {
- MVRegInflater leftInflater = new MVRegInflater(leftReplica, newStateLattice(leftReplica, 1000),
+ MVRegInflater leftInflater = new MVRegInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000),
executor, Duration.ofMillis(100));
IMVReg left = leftInflater.getCRDT();
- MVRegInflater rightInflater = new MVRegInflater(rightReplica, newStateLattice(rightReplica, 1000),
+ MVRegInflater rightInflater = new MVRegInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000),
executor, Duration.ofMillis(100));
IMVReg right = rightInflater.getCRDT();
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java
index 54afb219a..fd5c5ecc3 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java
@@ -19,15 +19,23 @@
package org.apache.bifromq.basecrdt.core.internal;
+import static java.util.Collections.emptySet;
import static org.apache.bifromq.basecrdt.core.api.CRDTURI.toURI;
import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg;
import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.ormap;
-import static java.util.Collections.emptySet;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertTrue;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Sets;
+import com.google.protobuf.ByteString;
+import io.reactivex.rxjava3.disposables.Disposable;
+import java.time.Duration;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import lombok.extern.slf4j.Slf4j;
import org.apache.bifromq.basecrdt.core.api.AWORSetOperation;
import org.apache.bifromq.basecrdt.core.api.CCounterOperation;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
@@ -44,14 +52,6 @@
import org.apache.bifromq.basecrdt.core.api.ORMapOperation;
import org.apache.bifromq.basecrdt.core.api.RWORSetOperation;
import org.apache.bifromq.basecrdt.proto.Replica;
-import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
-import com.google.protobuf.ByteString;
-import io.reactivex.rxjava3.disposables.Disposable;
-import java.time.Duration;
-import java.util.List;
-import java.util.concurrent.atomic.AtomicInteger;
-import lombok.extern.slf4j.Slf4j;
import org.testng.annotations.Test;
@Slf4j
@@ -76,7 +76,7 @@ public class ORMapTest extends CRDTTest {
@Test
public void testOperation() {
- ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000),
+ ORMapInflater orMapInflater = new ORMapInflater("storeId", leftReplica, newStateLattice(leftReplica, 1000),
executor, Duration.ofMillis(100));
IORMap ormap = orMapInflater.getCRDT();
assertEquals(ormap.id(), leftReplica);
@@ -191,11 +191,11 @@ public void testOperation() {
@Test
public void testJoin() {
- ORMapInflater leftInflater = new ORMapInflater(leftReplica,
+ ORMapInflater leftInflater = new ORMapInflater("leftStore", leftReplica,
newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100));
IORMap leftMap = leftInflater.getCRDT();
- ORMapInflater rightInflater = new ORMapInflater(rightReplica,
+ ORMapInflater rightInflater = new ORMapInflater("rightStore", rightReplica,
newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100));
IORMap rightMap = rightInflater.getCRDT();
@@ -286,11 +286,11 @@ public void testJoin() {
@Test
public void testJoinAfterCompaction() throws InterruptedException {
- ORMapInflater leftInflater = new ORMapInflater(leftReplica,
+ ORMapInflater leftInflater = new ORMapInflater("leftStore", leftReplica,
newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100));
IORMap leftMap = leftInflater.getCRDT();
- ORMapInflater rightInflater = new ORMapInflater(rightReplica,
+ ORMapInflater rightInflater = new ORMapInflater("rightStore", rightReplica,
newStateLattice(rightReplica, 100), executor, Duration.ofMillis(100));
IORMap rightMap = rightInflater.getCRDT();
@@ -318,7 +318,7 @@ public void testJoinAfterCompaction() throws InterruptedException {
@Test
public void testSubCRDTGC() {
- ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000),
+ ORMapInflater orMapInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000),
executor, Duration.ofMillis(100));
IORMap orMap = orMapInflater.getCRDT();
@@ -344,7 +344,7 @@ public void testSubCRDTGC() {
@Test
public void testInflationSubscriptionWhenGC() {
- ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000),
+ ORMapInflater orMapInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000),
executor, Duration.ofMillis(100));
IORMap orMap = orMapInflater.getCRDT();
AtomicInteger inflationCount = new AtomicInteger();
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java
index 4d3320501..e677b8dda 100644
--- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basecrdt.core.internal;
@@ -25,12 +25,12 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basecrdt.core.api.IRWORSet;
-import org.apache.bifromq.basecrdt.core.api.RWORSetOperation;
-import org.apache.bifromq.basecrdt.proto.Replica;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import java.time.Duration;
+import org.apache.bifromq.basecrdt.core.api.IRWORSet;
+import org.apache.bifromq.basecrdt.core.api.RWORSetOperation;
+import org.apache.bifromq.basecrdt.proto.Replica;
import org.testng.annotations.Test;
public class RWORSetTest extends CRDTTest {
@@ -49,7 +49,7 @@ public class RWORSetTest extends CRDTTest {
@Test
public void testOperation() {
RWORSetInflater rworSetInflater =
- new RWORSetInflater(leftReplica, newStateLattice(leftReplica, 1000),
+ new RWORSetInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000),
executor, Duration.ofMillis(100));
IRWORSet rworSet = rworSetInflater.getCRDT();
assertEquals(rworSet.id(), leftReplica);
@@ -82,11 +82,11 @@ public void testOperation() {
@Test
public void testJoin() {
- RWORSetInflater leftInflater = new RWORSetInflater(leftReplica,
+ RWORSetInflater leftInflater = new RWORSetInflater("leftStore", leftReplica,
newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100));
IRWORSet left = leftInflater.getCRDT();
- RWORSetInflater rightInflater = new RWORSetInflater(rightReplica,
+ RWORSetInflater rightInflater = new RWORSetInflater("rightStore", rightReplica,
newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100));
IRWORSet right = rightInflater.getCRDT();
diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java
new file mode 100644
index 000000000..5d414a735
--- /dev/null
+++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.bifromq.basecrdt.store;
+
+import static org.awaitility.Awaitility.await;
+
+import com.google.common.collect.Sets;
+import com.google.protobuf.ByteString;
+import io.reactivex.rxjava3.core.Observable;
+import io.reactivex.rxjava3.schedulers.Schedulers;
+import io.reactivex.rxjava3.subjects.PublishSubject;
+import io.reactivex.rxjava3.subjects.Subject;
+import java.time.Duration;
+import java.util.Collections;
+import org.apache.bifromq.basecrdt.core.api.CRDTURI;
+import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
+import org.apache.bifromq.basecrdt.core.api.IMVReg;
+import org.apache.bifromq.basecrdt.core.api.IORMap;
+import org.apache.bifromq.basecrdt.core.api.MVRegOperation;
+import org.apache.bifromq.basecrdt.core.api.ORMapOperation;
+import org.apache.bifromq.basecrdt.proto.Replica;
+import org.apache.bifromq.basecrdt.store.compressor.GzipCompressor;
+import org.apache.bifromq.basecrdt.store.proto.CRDTStoreMessage;
+import org.apache.bifromq.basecrdt.store.proto.MessagePayload;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.Test;
+
+public class AntiEntropyResilienceTest {
+ private ICRDTStore storeA;
+ private ICRDTStore storeB;
+ private Subject chAB;
+ private Subject chBA;
+
+ @AfterMethod(alwaysRun = true)
+ public void teardown() {
+ if (storeA != null) {
+ storeA.stop();
+ storeA = null;
+ }
+ if (storeB != null) {
+ storeB.stop();
+ storeB = null;
+ }
+ }
+
+ @Test(groups = "integration")
+ public void testConvergeWithDroppedAckOnce() {
+ CRDTStoreOptions optsA = CRDTStoreOptions.builder()
+ .inflationInterval(Duration.ofMillis(50))
+ .maxEventsInDelta(16)
+ .build();
+ CRDTStoreOptions optsB = CRDTStoreOptions.builder()
+ .inflationInterval(Duration.ofMillis(50))
+ .maxEventsInDelta(16)
+ .build();
+ storeA = ICRDTStore.newInstance(optsA);
+ storeB = ICRDTStore.newInstance(optsB);
+
+ chAB = PublishSubject.create().toSerialized();
+ chBA = PublishSubject.create().toSerialized();
+
+ // Interpose B->A path to drop the first ACK intentionally to exercise resend/late-ack path
+ GzipCompressor compressor = new GzipCompressor();
+ final boolean[] firstAckDropped = {false};
+
+ // Start stores with the interposed channels
+ storeA.start(chBA);
+ storeB.start(chAB
+ .flatMap(msg -> {
+ // inspect payload; if it's ACK and first time, drop it once
+ MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg);
+ if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.ACK && !firstAckDropped[0]) {
+ firstAckDropped[0] = true;
+ // drop this ack
+ return Observable.empty();
+ }
+ return Observable.just(msg);
+ }));
+
+ storeA.storeMessages()
+ .observeOn(Schedulers.single())
+ .subscribe(chAB::onNext);
+ storeB.storeMessages()
+ .observeOn(Schedulers.single())
+ .subscribe(chBA::onNext);
+
+ String uri = CRDTURI.toURI(CausalCRDTType.ormap, "test");
+ // Build replicas
+ Replica rA = ReplicaIdGenerator.generate(uri);
+ Replica rB = ReplicaIdGenerator.generate(uri);
+ ByteString addrA = ByteString.copyFromUtf8("A");
+ ByteString addrB = ByteString.copyFromUtf8("B");
+
+ // Host replicas
+ IORMap ormapA = storeA.host(rA, addrA);
+ IORMap ormapB = storeB.host(rB, addrB);
+
+ // Join neighbors
+ storeA.join(rA, Collections.singleton(addrB));
+ storeB.join(rB, Collections.singleton(addrA));
+
+ // Write a value from A
+ ByteString key = ByteString.copyFromUtf8("k");
+ ByteString val = ByteString.copyFromUtf8("v1");
+ ormapA.execute(ORMapOperation.update(key).with(MVRegOperation.write(val))).join();
+
+ await().until(() -> {
+ IMVReg regB = ormapB.getMVReg(key);
+ ByteString read = Sets.newHashSet(regB.read()).stream().findFirst().orElse(ByteString.EMPTY);
+ return val.equals(read);
+ });
+ }
+
+ @Test(groups = "integration")
+ public void testConvergeWithLateUnmatchedAck() {
+ CRDTStoreOptions optsC = CRDTStoreOptions.builder()
+ .inflationInterval(Duration.ofMillis(50))
+ .maxEventsInDelta(16)
+ .build();
+ CRDTStoreOptions optsD = CRDTStoreOptions.builder()
+ .inflationInterval(Duration.ofMillis(50))
+ .maxEventsInDelta(16)
+ .build();
+ ICRDTStore storeC = ICRDTStore.newInstance(optsC);
+ ICRDTStore storeD = ICRDTStore.newInstance(optsD);
+
+ Subject cToD = PublishSubject.create().toSerialized();
+ Subject dToC = PublishSubject.create().toSerialized();
+
+ GzipCompressor compressor = new GzipCompressor();
+ final CRDTStoreMessage[] delayedAck = {null};
+ final int[] deltaCountFromC = {0};
+
+ // Wire inbound with logic: buffer first ACK from D->C, only deliver after second DELTA from C
+ storeC.start(dToC
+ .flatMap(msg -> {
+ MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg);
+ if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.ACK && delayedAck[0] == null) {
+ delayedAck[0] = msg; // buffer first ACK
+ return Observable.empty();
+ }
+ return Observable.just(msg);
+ }));
+ storeD.start(cToD
+ .flatMap(msg -> {
+ MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg);
+ if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.DELTA) {
+ deltaCountFromC[0]++;
+ if (deltaCountFromC[0] >= 2 && delayedAck[0] != null) {
+ CRDTStoreMessage ack = delayedAck[0];
+ delayedAck[0] = null;
+ dToC.onNext(ack);
+ }
+ }
+ return Observable.just(msg);
+ }));
+
+ storeC.storeMessages().observeOn(Schedulers.single()).subscribe(cToD::onNext);
+ storeD.storeMessages().observeOn(Schedulers.single()).subscribe(dToC::onNext);
+
+ // Host replicas
+ String uri = CRDTURI.toURI(CausalCRDTType.ormap, "test-late-ack");
+ Replica rC = ReplicaIdGenerator.generate(uri);
+ Replica rD = ReplicaIdGenerator.generate(uri);
+ ByteString addrC = ByteString.copyFromUtf8("C");
+ ByteString addrD = ByteString.copyFromUtf8("D");
+ IORMap ormapC = storeC.host(rC, addrC);
+ IORMap ormapD = storeD.host(rD, addrD);
+ storeC.join(rC, Collections.singleton(addrD));
+ storeD.join(rD, Collections.singleton(addrC));
+
+ // Write on C
+ ByteString key = ByteString.copyFromUtf8("k2");
+ ByteString val = ByteString.copyFromUtf8("v2");
+ ormapC.execute(ORMapOperation.update(key).with(MVRegOperation.write(val))).join();
+
+ // Await convergence on D even though first ACK is delivered late and unmatched
+ await().until(() -> {
+ IMVReg regD = ormapD.getMVReg(key);
+ ByteString read = Sets.newHashSet(regD.read()).stream().findFirst().orElse(ByteString.EMPTY);
+ return val.equals(read);
+ });
+
+ storeC.stop();
+ storeD.stop();
+ }
+}
diff --git a/base-kv/base-kv-meta-service/pom.xml b/base-kv/base-kv-meta-service/pom.xml
index 97e4c3ac3..55d6f75a6 100644
--- a/base-kv/base-kv-meta-service/pom.xml
+++ b/base-kv/base-kv-meta-service/pom.xml
@@ -39,6 +39,14 @@
org.apache.bifromq
base-kv-type-proto
+
+ org.apache.bifromq
+ base-logger
+
+
+ org.apache.bifromq
+ base-util
+
io.reactivex.rxjava3
rxjava
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java
index cc6523180..0cbde7bd7 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java
@@ -37,7 +37,7 @@
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
-import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.base.util.RendezvousHash;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.IMVReg;
import org.apache.bifromq.basecrdt.core.api.IORMap;
@@ -47,21 +47,39 @@
import org.apache.bifromq.basecrdt.service.ICRDTService;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
import org.apache.bifromq.basekv.proto.StoreKey;
+import org.apache.bifromq.logger.MDCLogger;
+import org.slf4j.Logger;
-@Slf4j
class BaseKVLandscapeCRDT implements IBaseKVLandscapeCRDT {
+ private final String clusterId;
+ private final Logger log;
private final ICRDTService crdtService;
private final IORMap landscapeORMap;
private final BehaviorSubject> landscapeSubject = BehaviorSubject.create();
private final CompositeDisposable disposable = new CompositeDisposable();
BaseKVLandscapeCRDT(String clusterId, ICRDTService crdtService) {
+ this.clusterId = clusterId;
+ this.log = MDCLogger.getLogger(BaseKVLandscapeCRDT.class, "clusterId", clusterId);
this.crdtService = crdtService;
this.landscapeORMap = crdtService.host(toLandscapeURI(clusterId));
disposable.add(landscapeORMap.inflation()
.observeOn(IBaseKVMetaService.SHARED_SCHEDULER)
.map(this::buildLandscape)
.subscribe(landscapeSubject::onNext));
+ disposable.add(Observable.combineLatest(landscape(), aliveReplicas(), (StoreDescriptorAndReplicas::new))
+ .observeOn(IBaseKVMetaService.SHARED_SCHEDULER)
+ .subscribe(this::houseKeep));
+ }
+
+ @Override
+ public String clusterId() {
+ return clusterId;
+ }
+
+ @Override
+ public Observable refreshSignal() {
+ return crdtService.refreshSignal();
}
public Observable> aliveReplicas() {
@@ -124,4 +142,31 @@ private Optional buildLandscape(IMVReg mvReg) {
l.sort((a, b) -> Long.compareUnsigned(b.getHlc(), a.getHlc()));
return Optional.ofNullable(l.isEmpty() ? null : l.get(0));
}
+
+ private void houseKeep(StoreDescriptorAndReplicas storeDescriptorAndReplicas) {
+ Map storedDescriptors = storeDescriptorAndReplicas.descriptorMap;
+ Set aliveReplicas = storeDescriptorAndReplicas.replicaIds;
+ for (StoreKey storeKey : storedDescriptors.keySet()) {
+ if (!aliveReplicas.contains(storeKey.getReplicaId())
+ && shouldClean(aliveReplicas, storeKey.getReplicaId())) {
+ log.debug("store[{}] is not alive, remove its descriptor", storeKey.getStoreId());
+ removeDescriptor(storeKey);
+ }
+ }
+ }
+
+ private boolean shouldClean(Set aliveReplicas, ByteString failedReplicas) {
+ // Choose cleaner deterministically from the identical aliveReplicas set across nodes.
+ RendezvousHash hash = RendezvousHash.builder()
+ .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer()))
+ .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer()))
+ .nodes(aliveReplicas)
+ .build();
+ ByteString cleaner = hash.get(failedReplicas);
+ return cleaner != null && cleaner.equals(landscapeORMap.id().getId());
+ }
+
+ private record StoreDescriptorAndReplicas(Map descriptorMap,
+ Set replicaIds) {
+ }
}
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java
index 6f529b78a..57bde6ba3 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java
@@ -26,13 +26,17 @@
import java.util.Map;
import java.util.Optional;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
+import org.apache.bifromq.logger.MDCLogger;
+import org.slf4j.Logger;
class BaseKVLandscapeObserver implements IBaseKVLandscapeObserver {
+ private final Logger log;
private final BehaviorSubject> landscapeSubject =
BehaviorSubject.create();
private final CompositeDisposable disposable = new CompositeDisposable();
BaseKVLandscapeObserver(IBaseKVLandscapeCRDT landscapeCRDT) {
+ this.log = MDCLogger.getLogger(BaseKVLandscapeObserver.class, "clusterId", landscapeCRDT.clusterId());
disposable.add(landscapeCRDT.landscape()
.map(descriptorMap -> {
Map descriptorMapByStoreId = new HashMap<>();
@@ -42,6 +46,7 @@ class BaseKVLandscapeObserver implements IBaseKVLandscapeObserver {
}
return v.getHlc() > value.getHlc() ? v : value;
}));
+ log.debug("Landscape changed: {}", descriptorMapByStoreId);
return descriptorMapByStoreId;
})
.subscribe(landscapeSubject::onNext));
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java
index 3c45e0a4c..903474fef 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java
@@ -19,68 +19,61 @@
package org.apache.bifromq.basekv.metaservice;
-import com.google.protobuf.ByteString;
import io.reactivex.rxjava3.core.Observable;
import io.reactivex.rxjava3.disposables.CompositeDisposable;
import java.util.Map;
import java.util.Optional;
-import java.util.Set;
import java.util.concurrent.CompletableFuture;
-import lombok.extern.slf4j.Slf4j;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
import org.apache.bifromq.basekv.proto.StoreKey;
+import org.apache.bifromq.logger.MDCLogger;
+import org.slf4j.Logger;
-@Slf4j
class BaseKVLandscapeReporter implements IBaseKVLandscapeReporter {
+ private final Logger log;
private final String storeId;
private final IBaseKVLandscapeCRDT landscapeCRDT;
private final CompositeDisposable disposable = new CompositeDisposable();
private volatile KVRangeStoreDescriptor latestDescriptor;
BaseKVLandscapeReporter(String storeId, IBaseKVLandscapeCRDT landscapeCRDT) {
+ this.log = MDCLogger.getLogger(BaseKVLandscapeReporter.class, "clusterId", landscapeCRDT.clusterId(),
+ "storeId", storeId);
this.storeId = storeId;
this.landscapeCRDT = landscapeCRDT;
- disposable.add(Observable.combineLatest(
- landscapeCRDT.landscape(),
- landscapeCRDT.aliveReplicas(),
- (StoreDescriptorAndReplicas::new))
+ disposable.add(landscapeCRDT.landscape()
.observeOn(IBaseKVMetaService.SHARED_SCHEDULER)
- .subscribe(this::houseKeep));
+ .subscribe(this::afterInflation));
}
@Override
public CompletableFuture report(KVRangeStoreDescriptor descriptor) {
Optional descriptorOnCRDT = landscapeCRDT.getStoreDescriptor(descriptor.getId());
if (descriptorOnCRDT.isEmpty() || !descriptorOnCRDT.get().equals(descriptor)) {
+ this.latestDescriptor = descriptor;
return landscapeCRDT.setStoreDescriptor(descriptor);
}
return CompletableFuture.completedFuture(null);
}
+ @Override
+ public Observable refreshSignal() {
+ return landscapeCRDT.refreshSignal();
+ }
+
@Override
public void stop() {
landscapeCRDT.removeDescriptor(storeId).join();
disposable.dispose();
}
- private void houseKeep(StoreDescriptorAndReplicas storeDescriptorAndReplicas) {
- Map storedDescriptors = storeDescriptorAndReplicas.descriptorMap;
- Set aliveReplicas = storeDescriptorAndReplicas.replicaIds;
- for (StoreKey storeKey : storedDescriptors.keySet()) {
- if (!aliveReplicas.contains(storeKey.getReplicaId())) {
- log.debug("store[{}] is not alive, remove its descriptor", storeKey.getStoreId());
- landscapeCRDT.removeDescriptor(storeKey);
- }
- }
+ private void afterInflation(Map storedDescriptors) {
if (!storedDescriptors.containsKey(landscapeCRDT.toDescriptorKey(storeId))) {
KVRangeStoreDescriptor latestDescriptor = this.latestDescriptor;
if (latestDescriptor != null) {
+ log.debug("Rectify missing store descriptor");
landscapeCRDT.setStoreDescriptor(latestDescriptor);
}
}
}
-
- private record StoreDescriptorAndReplicas(Map descriptorMap,
- Set replicaIds) {
- }
}
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java
index f541c9d62..9cbec5f55 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java
@@ -39,7 +39,7 @@
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
-import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.base.util.RendezvousHash;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.IMVReg;
import org.apache.bifromq.basecrdt.core.api.IORMap;
@@ -50,9 +50,12 @@
import org.apache.bifromq.basehlc.HLC;
import org.apache.bifromq.basekv.proto.BalancerStateSnapshot;
import org.apache.bifromq.basekv.proto.StoreKey;
+import org.apache.bifromq.logger.MDCLogger;
+import org.slf4j.Logger;
-@Slf4j
class BaseKVStoreBalancerStatesCRDT implements IBaseKVStoreBalancerStatesCRDT {
+ private final String clusterId;
+ private final Logger log;
private final ICRDTService crdtService;
// key: storeId, value: Map of balancerClassFQN -> BalancerState
private final IORMap balancerStatesByStoreORMap;
@@ -61,12 +64,30 @@ class BaseKVStoreBalancerStatesCRDT implements IBaseKVStoreBalancerStatesCRDT {
private final CompositeDisposable disposable = new CompositeDisposable();
BaseKVStoreBalancerStatesCRDT(String clusterId, ICRDTService crdtService) {
+ this.clusterId = clusterId;
+ this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesCRDT.class, "clusterId", clusterId);
this.crdtService = crdtService;
this.balancerStatesByStoreORMap = crdtService.host(toBalancerStateURI(clusterId));
disposable.add(balancerStatesByStoreORMap.inflation()
.observeOn(IBaseKVMetaService.SHARED_SCHEDULER)
.map(this::buildBalancerStateSnapshots)
.subscribe(balancerStatesSubject::onNext));
+ disposable.add(Observable.combineLatest(
+ this.currentBalancerStates(),
+ this.aliveReplicas(),
+ (StateSnapshotsAndReplicas::new))
+ .observeOn(IBaseKVMetaService.SHARED_SCHEDULER)
+ .subscribe(this::houseKeep));
+ }
+
+ @Override
+ public String clusterId() {
+ return clusterId;
+ }
+
+ @Override
+ public Observable refuteSignal() {
+ return crdtService.refreshSignal();
}
public Observable> aliveReplicas() {
@@ -156,4 +177,31 @@ private Map> buildBalancerStateSnap
}));
return currentBalancerStates;
}
+
+ private void houseKeep(StateSnapshotsAndReplicas stateSnapshotsAndReplicas) {
+ Map> observed = stateSnapshotsAndReplicas.observed;
+ Set aliveReplicas = stateSnapshotsAndReplicas.replicaIds;
+ for (StoreKey storeKey : observed.keySet()) {
+ if (!aliveReplicas.contains(storeKey.getReplicaId())
+ && shouldClean(aliveReplicas, storeKey.getReplicaId())) {
+ log.debug("store[{}] is not alive, remove its balancer states", storeKey.getStoreId());
+ this.removeStore(storeKey);
+ }
+ }
+ }
+
+ private boolean shouldClean(Set aliveReplicas, ByteString failedReplicas) {
+ // Choose cleaner deterministically from the identical aliveReplicas set across nodes.
+ RendezvousHash hash = RendezvousHash.builder()
+ .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer()))
+ .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer()))
+ .nodes(aliveReplicas)
+ .build();
+ ByteString cleaner = hash.get(failedReplicas);
+ return cleaner != null && cleaner.equals(balancerStatesByStoreORMap.id().getId());
+ }
+
+ private record StateSnapshotsAndReplicas(Map> observed,
+ Set replicaIds) {
+ }
}
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java
index abe81fb12..b9fa55de0 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java
@@ -27,13 +27,17 @@
import java.util.HashMap;
import java.util.Map;
import org.apache.bifromq.basekv.proto.BalancerStateSnapshot;
+import org.apache.bifromq.logger.MDCLogger;
+import org.slf4j.Logger;
class BaseKVStoreBalancerStatesObserver implements IBaseKVStoreBalancerStatesObserver {
+ private final Logger log;
private final BehaviorSubject>> currentBalancerStatesSubject =
BehaviorSubject.createDefault(emptyMap());
private final CompositeDisposable disposable = new CompositeDisposable();
BaseKVStoreBalancerStatesObserver(IBaseKVStoreBalancerStatesCRDT statesCRDT) {
+ this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesObserver.class, "clusterId", statesCRDT.clusterId());
disposable.add(statesCRDT.currentBalancerStates()
.observeOn(IBaseKVMetaService.SHARED_SCHEDULER)
.map(statesMap -> {
@@ -49,6 +53,7 @@ class BaseKVStoreBalancerStatesObserver implements IBaseKVStoreBalancerStatesObs
}
return balancerStates;
}));
+ log.debug("Current balancer states changed: {}", currentStates);
return currentStates;
})
.subscribe(currentBalancerStatesSubject::onNext));
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java
index 069b5be38..767dd7e06 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java
@@ -36,7 +36,6 @@
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
-import lombok.extern.slf4j.Slf4j;
import org.apache.bifromq.basecrdt.core.api.CausalCRDTType;
import org.apache.bifromq.basecrdt.core.api.IMVReg;
import org.apache.bifromq.basecrdt.core.api.IORMap;
@@ -45,9 +44,12 @@
import org.apache.bifromq.basecrdt.service.ICRDTService;
import org.apache.bifromq.basehlc.HLC;
import org.apache.bifromq.basekv.proto.BalancerStateSnapshot;
+import org.apache.bifromq.logger.MDCLogger;
+import org.slf4j.Logger;
-@Slf4j
class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerStatesProposalCRDT {
+ private final String clusterId;
+ private final Logger log;
private final ICRDTService crdtService;
// key: balancerClassFQN, value: BalancerState
private final IORMap expectedBalancerStatesORMap;
@@ -56,6 +58,8 @@ class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerState
private final CompositeDisposable disposable = new CompositeDisposable();
BaseKVStoreBalancerStatesProposalCRDT(String clusterId, ICRDTService crdtService) {
+ this.clusterId = clusterId;
+ this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesProposalCRDT.class, "clusterId", clusterId);
this.crdtService = crdtService;
this.expectedBalancerStatesORMap = crdtService.host(toBalancerStateProposalURI(clusterId));
disposable.add(expectedBalancerStatesORMap.inflation()
@@ -64,6 +68,11 @@ class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerState
.subscribe(expectedBalancerStatesSubject::onNext));
}
+ @Override
+ public String clusterId() {
+ return clusterId;
+ }
+
public Observable> expectedBalancerStates() {
return expectedBalancerStatesSubject.distinctUntilChanged();
}
@@ -115,6 +124,7 @@ private Map buildExpectedBalancerStateSnapshots(l
balancerStateOpt.ifPresent(stateSnapshot -> balancerStatesMap.put(balancerClassFQN,
stateSnapshot));
});
+ log.debug("Expected balancer states changed: {}", balancerStatesMap);
return balancerStatesMap;
}
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java
index 20238789e..fb12538c3 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java
@@ -24,11 +24,15 @@
import java.util.concurrent.CompletableFuture;
import org.apache.bifromq.basehlc.HLC;
import org.apache.bifromq.basekv.proto.BalancerStateSnapshot;
+import org.apache.bifromq.logger.MDCLogger;
+import org.slf4j.Logger;
class BaseKVStoreBalancerStatesProposer implements IBaseKVStoreBalancerStatesProposer {
+ private final Logger log;
private final IBaseKVStoreBalancerStatesProposalCRDT proposalCRDT;
BaseKVStoreBalancerStatesProposer(IBaseKVStoreBalancerStatesProposalCRDT proposalCRDT) {
+ this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesProposer.class, "clusterId", proposalCRDT.clusterId());
this.proposalCRDT = proposalCRDT;
}
@@ -76,6 +80,7 @@ public void stop() {
private CompletableFuture proposeBalancerState(String balancerFactoryClass,
BalancerStateSnapshot state) {
+ log.debug("Propose balancer state: balancerClass={}, state={}", balancerFactoryClass, state);
CompletableFuture resultFuture = new CompletableFuture<>();
long now = state.getHlc();
proposalCRDT.expectedBalancerStates()
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java
index c4a192d0a..a2531f52c 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java
@@ -19,34 +19,32 @@
package org.apache.bifromq.basekv.metaservice;
-import com.google.protobuf.ByteString;
import com.google.protobuf.Struct;
import io.reactivex.rxjava3.core.Observable;
import io.reactivex.rxjava3.disposables.CompositeDisposable;
import java.util.Map;
-import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
-import lombok.extern.slf4j.Slf4j;
import org.apache.bifromq.basekv.proto.BalancerStateSnapshot;
import org.apache.bifromq.basekv.proto.StoreKey;
+import org.apache.bifromq.logger.MDCLogger;
+import org.slf4j.Logger;
-@Slf4j
class BaseKVStoreBalancerStatesReporter implements IBaseKVStoreBalancerStatesReporter {
+ private final Logger log;
private final String storeId;
private final IBaseKVStoreBalancerStatesCRDT statesCRDT;
private final CompositeDisposable disposable = new CompositeDisposable();
private final Map latestState = new ConcurrentHashMap<>();
BaseKVStoreBalancerStatesReporter(String storeId, IBaseKVStoreBalancerStatesCRDT statesCRDT) {
+ this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesReporter.class, "clusterId", statesCRDT.clusterId(),
+ "storeId", storeId);
this.storeId = storeId;
this.statesCRDT = statesCRDT;
- disposable.add(Observable.combineLatest(
- statesCRDT.currentBalancerStates(),
- statesCRDT.aliveReplicas(),
- (StateSnapshotsAndReplicas::new))
+ disposable.add(statesCRDT.currentBalancerStates()
.observeOn(IBaseKVMetaService.SHARED_SCHEDULER)
- .subscribe(this::houseKeep));
+ .subscribe(this::afterInflation));
}
@Override
@@ -64,32 +62,26 @@ public CompletableFuture reportBalancerState(String balancerFactoryClassFQ
return CompletableFuture.completedFuture(null);
}
+ @Override
+ public Observable refreshSignal() {
+ return statesCRDT.refuteSignal();
+ }
+
@Override
public void stop() {
statesCRDT.removeStore(storeId).join();
disposable.dispose();
}
- private void houseKeep(StateSnapshotsAndReplicas stateSnapshotsAndReplicas) {
- Map> observed = stateSnapshotsAndReplicas.observed;
- Set aliveReplicas = stateSnapshotsAndReplicas.replicaIds;
- for (StoreKey storeKey : observed.keySet()) {
- if (!aliveReplicas.contains(storeKey.getReplicaId())) {
- log.debug("store[{}] is not alive, remove its balancer states", storeKey.getStoreId());
- statesCRDT.removeStore(storeKey);
- }
- }
+ private void afterInflation(Map> observed) {
if (!observed.containsKey(statesCRDT.toDescriptorKey(storeId))) {
+ log.debug("Rectify missing store balancer states");
latestState.forEach((balancerClassFQN, balancerState) ->
statesCRDT.setStoreBalancerState(storeId, balancerClassFQN,
balancerState.enable(), balancerState.loadRules()));
}
}
- private record StateSnapshotsAndReplicas(Map> observed,
- Set replicaIds) {
- }
-
private record BalancerState(boolean enable, Struct loadRules) {
}
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java
index 1a58c7e94..4fc27eb26 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java
@@ -32,6 +32,19 @@
* The interface of a BaseKV landscape CRDT.
*/
public interface IBaseKVLandscapeCRDT {
+ /**
+ * The id of base-kv cluster.
+ * @return the cluster id
+ */
+ String clusterId();
+
+ /**
+ * A signal to refresh the landscape CRDT.
+ *
+ * @return the observable of the signal
+ */
+ Observable refreshSignal();
+
/**
* Get the observable of alive replicas of landscape CRDT.
*
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java
index 5bbdf5b7e..7b39b0c9b 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java
@@ -19,6 +19,7 @@
package org.apache.bifromq.basekv.metaservice;
+import io.reactivex.rxjava3.core.Observable;
import java.util.concurrent.CompletableFuture;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
@@ -33,6 +34,13 @@ public interface IBaseKVLandscapeReporter {
*/
CompletableFuture report(KVRangeStoreDescriptor descriptor);
+ /**
+ * A signal to refresh the landscape reporter's state.
+ *
+ * @return an observable that emits a timestamp when the reporter should refresh its state
+ */
+ Observable refreshSignal();
+
/**
* Stop the reporter.
*/
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java
index a0e44b963..9038a307a 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java
@@ -32,6 +32,10 @@
* The interface of a BaseKV store balancer states CRDT.
*/
public interface IBaseKVStoreBalancerStatesCRDT {
+ String clusterId();
+
+ Observable refuteSignal();
+
Observable> aliveReplicas();
Observable>> currentBalancerStates();
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java
index 3ca2d36e2..4bde0de7a 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java
@@ -30,6 +30,7 @@
* The interface of a BaseKV store balancer states CRDT.
*/
public interface IBaseKVStoreBalancerStatesProposalCRDT {
+ String clusterId();
Observable> expectedBalancerStates();
diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java
index c4ca82670..9e6891f16 100644
--- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java
+++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java
@@ -20,6 +20,7 @@
package org.apache.bifromq.basekv.metaservice;
import com.google.protobuf.Struct;
+import io.reactivex.rxjava3.core.Observable;
import java.util.concurrent.CompletableFuture;
/**
@@ -36,6 +37,13 @@ public interface IBaseKVStoreBalancerStatesReporter {
*/
CompletableFuture reportBalancerState(String balancerFactoryClassFQN, boolean disable, Struct loadRules);
+ /**
+ * A signal to refresh the reporter's state.
+ *
+ * @return an observable that emits a timestamp when the reporter should refresh its state
+ */
+ Observable refreshSignal();
+
/**
* Stop the reporter.
*/
diff --git a/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java b/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java
index f28fbea46..87aaf741f 100644
--- a/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java
+++ b/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java
@@ -21,9 +21,7 @@
import static org.awaitility.Awaitility.await;
import static org.testng.Assert.assertEquals;
-import static org.testng.Assert.assertTrue;
-import java.util.Collections;
import java.util.Map;
import org.apache.bifromq.basecluster.AgentHostOptions;
import org.apache.bifromq.basecluster.IAgentHost;
@@ -80,7 +78,6 @@ public void stop() {
await().until(() -> observer.getStoreDescriptor(descriptor.getId()).isPresent());
reporter.stop();
- assertEquals(Collections.emptyMap(), observer.landscape().blockingFirst());
- assertTrue(observer.getStoreDescriptor(descriptor.getId()).isEmpty());
+ await().until(() -> observer.landscape().blockingFirst().isEmpty());
}
}
diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java
index 62bf66661..c488260e1 100644
--- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java
+++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java
@@ -14,11 +14,21 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basekv.raft;
+import com.google.protobuf.ByteString;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.concurrent.CompletableFuture;
import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException;
import org.apache.bifromq.basekv.raft.exception.DropProposalException;
import org.apache.bifromq.basekv.raft.exception.LeaderTransferException;
@@ -39,16 +49,6 @@
import org.apache.bifromq.basekv.raft.proto.RequestVote;
import org.apache.bifromq.basekv.raft.proto.Snapshot;
import org.apache.bifromq.basekv.raft.proto.Voting;
-import com.google.protobuf.ByteString;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.Map;
-import java.util.Optional;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.concurrent.CompletableFuture;
class RaftNodeStateFollower extends RaftNodeState {
private final TreeMap stabilizingIndexes = new TreeMap<>(Long::compareTo);
@@ -387,7 +387,7 @@ void changeClusterConfig(String correlateId,
void onSnapshotRestored(ByteString requested, ByteString installed, Throwable ex, CompletableFuture onDone) {
if (currentISSRequest == null) {
log.debug("Snapshot installation request not found");
- onDone.completeExceptionally(new SnapshotException("No snapshot installation request"));
+ onDone.completeExceptionally(SnapshotException.noSnapshot());
return;
}
InstallSnapshot iss = currentISSRequest;
@@ -398,7 +398,7 @@ void onSnapshotRestored(ByteString requested, ByteString installed, Throwable ex
onDone.completeExceptionally(ex);
} else {
log.debug("Obsolete snapshot installation");
- onDone.completeExceptionally(new SnapshotException("Obsolete snapshot installed by FSM"));
+ onDone.completeExceptionally(SnapshotException.obsolete());
}
return;
}
diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java
index ec397ed25..3f4bdf66d 100644
--- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java
+++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java
@@ -14,17 +14,47 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basekv.raft.exception;
+/**
+ * Exception thrown during snapshot operations in the Raft protocol.
+ * This exception can indicate that a snapshot is obsolete or has other issues.
+ */
public class SnapshotException extends RuntimeException {
- public SnapshotException(String message) {
+ private SnapshotException(String message) {
super(message);
}
- public SnapshotException(Throwable e) {
+ private SnapshotException(Throwable e) {
super(e);
}
+
+ public static ObsoleteSnapshotException obsolete() {
+ return new ObsoleteSnapshotException();
+ }
+
+ public static NoSnapshotException noSnapshot() {
+ return new NoSnapshotException();
+ }
+
+ /**
+ * Exception indicating that no snapshot is available for installation.
+ */
+ public static class NoSnapshotException extends SnapshotException {
+ private NoSnapshotException() {
+ super("No snapshot available");
+ }
+ }
+
+ /**
+ * Exception indicating that the snapshot is obsolete by a newer snapshot during installation.
+ */
+ public static class ObsoleteSnapshotException extends SnapshotException {
+ private ObsoleteSnapshotException() {
+ super("The installed snapshot has been obsoleted by a newer snapshot");
+ }
+ }
}
diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java
index 5e511d11a..5f73f98e5 100644
--- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java
+++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java
@@ -31,6 +31,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
@@ -120,7 +121,7 @@ public KVStoreBalanceController(IBaseKVMetaService metaService,
this.customBalancerFactories = Lists.newArrayList(factories);
this.builtinBalancerFactories = Lists.newArrayList(
new RangeBootstrapBalancerFactory(bootstrapDelay),
- new RedundantRangeRemovalBalancerFactory(),
+ new RedundantRangeRemovalBalancerFactory(zombieProbeDelay),
new UnreachableReplicaRemovalBalancerFactory(zombieProbeDelay));
this.statesProposal = metaService.balancerStatesProposal(storeClient.clusterId());
this.balancers = new HashMap<>();
@@ -137,7 +138,8 @@ public void start(String localStoreId) {
if (state.compareAndSet(State.Init, State.Started)) {
this.localStoreId = localStoreId;
statesReporter = metaService.balancerStatesReporter(storeClient.clusterId(), localStoreId);
- log = MDCLogger.getLogger("balancer.logger", "clusterId", storeClient.clusterId(), "storeId", localStoreId);
+ log = MDCLogger.getLogger("balancer.logger",
+ "clusterId", storeClient.clusterId(), "storeId", localStoreId, "balancer", "CONTROLLER");
for (IStoreBalancerFactory factory : builtinBalancerFactories) {
StoreBalancer balancer = factory.newBalancer(storeClient.clusterId(), localStoreId);
@@ -146,12 +148,12 @@ public void start(String localStoreId) {
}
for (IStoreBalancerFactory factory : customBalancerFactories) {
String balancerFactoryFQN = factory.getClass().getName();
- log.info("Create balancer from factory: {}", balancerFactoryFQN);
StoreBalancer balancer = factory.newBalancer(storeClient.clusterId(), localStoreId);
+ log.info("Create balancer[{}] from factory: {}", balancer.getClass().getName(), balancerFactoryFQN);
if (balancer instanceof RangeBootstrapBalancer
|| balancer instanceof RedundantRangeRemovalBalancer
|| balancer instanceof UnreachableReplicaRemovalBalancer) {
- log.warn("{} should not be created from custom balancer factory",
+ log.warn("Builtin balancer[{}] should not be created from custom balancer factory",
balancer.getClass().getSimpleName());
continue;
}
@@ -163,14 +165,28 @@ public void start(String localStoreId) {
log.info("BalancerController start");
disposables.add(statesProposal.expectedBalancerStates()
.subscribe(currentExpected -> {
+ log.trace("Expected balancer states changed: {}", currentExpected);
this.expectedBalancerStates = currentExpected;
trigger();
}));
disposables.add(storeClient.describe().subscribe(descriptors -> {
+ log.trace("Landscape changed: {}", descriptors);
this.landscape = descriptors;
trimRangeHistory(descriptors);
trigger();
}));
+ disposables.add(statesReporter.refreshSignal()
+ .subscribe(ts -> {
+ for (Map.Entry entry : balancers.entrySet()) {
+ String balancerFacClassFQN = entry.getKey();
+ StoreBalancerState balancerState = entry.getValue();
+ if (!balancerState.isBuiltin) {
+ log.debug("Report balancer state for {}", balancerFacClassFQN);
+ statesReporter.reportBalancerState(balancerFacClassFQN,
+ balancerState.disabled.get(), balancerState.loadRules.get());
+ }
+ }
+ }));
}
}
@@ -199,6 +215,10 @@ public void stop() {
private void trigger() {
if (state.get() == State.Started && scheduling.compareAndSet(false, true)) {
long jitter = ThreadLocalRandom.current().nextLong(0, retryDelay.toMillis());
+ if (task != null && !task.isDone()) {
+ log.trace("Cancel scheduled balance task");
+ task.cancel(true);
+ }
task = executor.schedule(this::updateAndBalance, jitter, TimeUnit.MILLISECONDS);
}
}
@@ -208,6 +228,9 @@ private void updateAndBalance() {
Set landscape = this.landscape;
if (landscape == null || landscape.isEmpty()) {
scheduling.set(false);
+ if (!Objects.equals(this.landscape, landscape)) {
+ trigger();
+ }
return;
}
for (Map.Entry entry : balancers.entrySet()) {
@@ -222,18 +245,25 @@ private void updateAndBalance() {
Struct loadRules = balancerState.loadRules.get();
boolean needReport = false;
if (balancerState.disabled.get() != disable) {
- log.info("Balancer[{}] is {}", balancerFacClassFQN, disable ? "disabled" : "enabled");
+ log.info("Balancer[{}] is {}", balancerState.balancer.getClass().getSimpleName(),
+ disable ? "disabled" : "enabled");
balancerState.disabled.set(disable);
needReport = true;
}
- Struct expectedLoadRules = expectedState.getLoadRules();
- if (!loadRules.equals(expectedLoadRules)
- && balancerState.balancer.validate(expectedLoadRules)) {
- loadRules = expectedLoadRules;
- // report the balancer state
- balancerState.loadRules.set(expectedLoadRules);
- balancerState.balancer.update(expectedLoadRules);
- needReport = true;
+ Struct expectedLoadRules = loadRules.toBuilder()
+ .mergeFrom(expectedState.getLoadRules())
+ .build();
+ if (!loadRules.equals(expectedLoadRules)) {
+ if (balancerState.balancer.validate(expectedLoadRules)) {
+ loadRules = expectedLoadRules;
+ // report the balancer state
+ balancerState.loadRules.set(expectedLoadRules);
+ balancerState.balancer.update(expectedLoadRules);
+ needReport = true;
+ } else {
+ log.warn("Balancer[{}] load rules not valid: {}",
+ balancerState.balancer.getClass().getSimpleName(), expectedLoadRules);
+ }
}
if (needReport) {
statesReporter.reportBalancerState(balancerFacClassFQN, disable, loadRules);
@@ -245,7 +275,7 @@ private void updateAndBalance() {
}
balancerState.balancer.update(landscape);
} catch (Throwable e) {
- log.error("Balancer[{}] update failed", balancerFacClassFQN, e);
+ log.error("Balancer[{}] update failed", balancerState.balancer.getClass().getSimpleName(), e);
}
}
balance(expectedBalancerState, landscape);
@@ -254,9 +284,11 @@ private void updateAndBalance() {
private void scheduleRetry(Map expected,
Set landscape,
Duration delay) {
+ log.debug("Retry balance after {}s", delay.toSeconds());
task = executor.schedule(() -> {
- if (expected != this.expectedBalancerStates || landscape != this.landscape) {
+ if (!Objects.equals(expected, this.expectedBalancerStates) || landscape != this.landscape) {
// retry is preemptive
+ log.trace("Balance retry is preempted");
return;
}
if (scheduling.compareAndSet(false, true)) {
@@ -268,11 +300,11 @@ private void scheduleRetry(Map expected,
private void balance(final Map expected,
final Set landscape) {
metricsManager.scheduleCount.increment();
- Duration delay = Duration.ZERO;
+ Duration delay = null;
for (Map.Entry entry : balancers.entrySet()) {
- String balancerFactoryName = entry.getKey();
StoreBalancerState fromBalancerState = entry.getValue();
StoreBalancer fromBalancer = fromBalancerState.balancer;
+ String balancerName = fromBalancer.getClass().getSimpleName();
if (fromBalancerState.disabled.get()) {
continue;
}
@@ -282,9 +314,8 @@ private void balance(final Map expected,
case BalanceNow -> {
BalanceCommand commandToRun = ((BalanceNow>) result).command;
if (!isStaleCommand(commandToRun)) {
- log.info("Balancer[{}] command run: {}", balancerFactoryName, commandToRun);
- String balancerName = fromBalancer.getClass().getSimpleName();
String cmdName = commandToRun.getClass().getSimpleName();
+ log.info("Balancer[{}] command run: {}", balancerName, commandToRun);
Sample start = Timer.start();
runCommand(commandToRun)
.whenCompleteAsync((success, e) -> {
@@ -295,7 +326,7 @@ private void balance(final Map expected,
metrics.cmdFailedCounter.increment();
} else {
log.info("Balancer[{}] command run result[{}]: {}",
- balancerFactoryName, success, commandToRun);
+ balancerName, success, commandToRun);
if (success) {
metrics.cmdSucceedCounter.increment();
start.stop(metrics.cmdRunTimer);
@@ -305,7 +336,8 @@ private void balance(final Map expected,
}
scheduling.set(false);
if (success) {
- if (this.landscape != landscape || this.expectedBalancerStates != expected) {
+ if (!Objects.equals(this.landscape, landscape)
+ || !Objects.equals(this.expectedBalancerStates, expected)) {
trigger();
}
} else {
@@ -317,21 +349,21 @@ private void balance(final Map expected,
}
case AwaitBalance -> {
Duration await = ((AwaitBalance) result).await;
- delay = await.toNanos() > delay.toNanos() ? await : delay;
+ delay = delay != null ? (await.toNanos() < delay.toNanos() ? await : delay) : await;
}
default -> {
// do nothing
}
}
} catch (Throwable e) {
- log.warn("Balancer[{}] unexpected error", balancerFactoryName, e);
+ log.warn("Balancer[{}] unexpected error", balancerName, e);
}
}
// no command to run
scheduling.set(false);
- if (this.landscape != landscape || this.expectedBalancerStates != expected) {
+ if (!Objects.equals(this.landscape, landscape) || !Objects.equals(this.expectedBalancerStates, expected)) {
trigger();
- } else if (!delay.isZero()) {
+ } else if (delay != null) {
// if some balancers are in the progress of generating balance command, wait for a while
scheduleRetry(expected, landscape, delay);
}
diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java
index 4eff0bed3..3bcf0197a 100644
--- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java
+++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java
@@ -19,14 +19,22 @@
package org.apache.bifromq.basekv.balance;
+import java.time.Duration;
+import org.apache.bifromq.basehlc.HLC;
import org.apache.bifromq.basekv.balance.impl.RedundantRangeRemovalBalancer;
/**
* Builtin balancer for redundant range removal.
*/
class RedundantRangeRemovalBalancerFactory implements IStoreBalancerFactory {
+ private final Duration delay;
+
+ RedundantRangeRemovalBalancerFactory(Duration delay) {
+ this.delay = delay;
+ }
+
@Override
public StoreBalancer newBalancer(String clusterId, String localStoreId) {
- return new RedundantRangeRemovalBalancer(clusterId, localStoreId);
+ return new RedundantRangeRemovalBalancer(clusterId, localStoreId, delay, HLC.INST::getPhysical);
}
}
diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java
index 38b7eebec..c9d979d36 100644
--- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java
+++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java
@@ -22,7 +22,14 @@
import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY;
import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveEpoch;
+import java.time.Duration;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Supplier;
import org.apache.bifromq.basehlc.HLC;
+import org.apache.bifromq.basekv.balance.AwaitBalance;
import org.apache.bifromq.basekv.balance.BalanceNow;
import org.apache.bifromq.basekv.balance.BalanceResult;
import org.apache.bifromq.basekv.balance.NoNeedBalance;
@@ -33,12 +40,6 @@
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
import org.apache.bifromq.basekv.utils.EffectiveEpoch;
import org.apache.bifromq.basekv.utils.KVRangeIdUtil;
-import java.time.Duration;
-import java.util.Optional;
-import java.util.Set;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.concurrent.atomic.AtomicReference;
-import java.util.function.Supplier;
/**
* RangeBootstrapBalancer is a specialized StoreBalancer designed to handle the bootstrap process of creating the
@@ -49,6 +50,7 @@ public class RangeBootstrapBalancer extends StoreBalancer {
private final Supplier millisSource;
private final long suspicionDurationMillis;
private final AtomicReference bootstrapTrigger = new AtomicReference<>();
+
/**
* Constructor of StoreBalancer.
*
@@ -98,19 +100,27 @@ public void update(Set landscape) {
KVRangeIdUtil.toString(rangeId));
bootstrapTrigger.set(new BootstrapTrigger(rangeId, FULL_BOUNDARY, randomSuspicionTimeout()));
}
+ } else if (bootstrapTrigger.get() != null) {
+ log.debug("Effective epoch found: {}, cancel any pending bootstrap", effectiveEpoch.get().epoch());
+ bootstrapTrigger.set(null);
}
}
@Override
public BalanceResult balance() {
BootstrapTrigger current = bootstrapTrigger.get();
- if (current != null && millisSource.get() > current.triggerTime) {
- bootstrapTrigger.set(null);
- return BalanceNow.of(BootstrapCommand.builder()
- .toStore(localStoreId)
- .kvRangeId(current.id)
- .boundary(current.boundary)
- .build());
+ if (current != null) {
+ long nowMillis = millisSource.get();
+ if (nowMillis > current.triggerTime) {
+ bootstrapTrigger.set(null);
+ return BalanceNow.of(BootstrapCommand.builder()
+ .toStore(localStoreId)
+ .kvRangeId(current.id)
+ .boundary(current.boundary)
+ .build());
+ } else {
+ return AwaitBalance.of(Duration.ofMillis(current.triggerTime - nowMillis));
+ }
}
return NoNeedBalance.INSTANCE;
}
diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java
index b9642fe4e..7089ef3cb 100644
--- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java
+++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java
@@ -26,9 +26,12 @@
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
+import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
+import java.util.Set;
import org.apache.bifromq.basekv.proto.Boundary;
import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
@@ -131,6 +134,10 @@ protected Map doGenerate(Struct loadRules,
KVRangeDescriptor rangeDescriptor = leaderRange.descriptor();
KVRangeStoreDescriptor storeDescriptor = landscape.get(leaderRange.ownerStoreDescriptor().getId());
ClusterConfig clusterConfig = rangeDescriptor.getConfig();
+ if (containsDeadMember(clusterConfig, landscape.keySet())) {
+ // shortcut when config contains dead members
+ return Collections.emptyMap();
+ }
Optional splitHintOpt = rangeDescriptor
.getHintsList()
.stream()
@@ -170,4 +177,13 @@ && compareEndKeys(splitHint.getSplitKey(), endKey(boundary)) < 0) {
}
return expectedRangeLayout;
}
+
+ private boolean containsDeadMember(ClusterConfig clusterConfig, Set live) {
+ Set members = new HashSet<>();
+ members.addAll(clusterConfig.getVotersList());
+ members.addAll(clusterConfig.getLearnersList());
+ members.addAll(clusterConfig.getNextVotersList());
+ members.addAll(clusterConfig.getNextLearnersList());
+ return members.stream().anyMatch(m -> !live.contains(m));
+ }
}
diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java
index bedf16fbd..0ae68e33a 100644
--- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java
+++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java
@@ -23,21 +23,31 @@
import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveRoute;
import static org.apache.bifromq.basekv.utils.DescriptorUtil.organizeByEpoch;
+import com.google.common.collect.Sets;
+import java.time.Duration;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.NavigableMap;
+import java.util.NavigableSet;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.function.Supplier;
+import org.apache.bifromq.basekv.balance.AwaitBalance;
+import org.apache.bifromq.basekv.balance.BalanceNow;
import org.apache.bifromq.basekv.balance.BalanceResult;
import org.apache.bifromq.basekv.balance.NoNeedBalance;
import org.apache.bifromq.basekv.balance.StoreBalancer;
+import org.apache.bifromq.basekv.balance.command.BalanceCommand;
import org.apache.bifromq.basekv.proto.Boundary;
import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
import org.apache.bifromq.basekv.proto.KVRangeId;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
+import org.apache.bifromq.basekv.raft.proto.ClusterConfig;
import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
import org.apache.bifromq.basekv.utils.EffectiveEpoch;
import org.apache.bifromq.basekv.utils.KVRangeIdUtil;
@@ -55,7 +65,9 @@
* caution.
*/
public class RedundantRangeRemovalBalancer extends StoreBalancer {
- private volatile NavigableMap> latest = Collections.emptyNavigableMap();
+ private final Supplier millisSource;
+ private final long suspicionDurationMillis;
+ private final AtomicReference pendingQuitCommand = new AtomicReference<>();
/**
* Constructor of StoreBalancer.
@@ -63,23 +75,60 @@ public class RedundantRangeRemovalBalancer extends StoreBalancer {
* @param clusterId the id of the BaseKV cluster which the store belongs to
* @param localStoreId the id of the store which the balancer is responsible for
*/
- public RedundantRangeRemovalBalancer(String clusterId, String localStoreId) {
+ public RedundantRangeRemovalBalancer(String clusterId,
+ String localStoreId,
+ Duration suspicionDuration,
+ Supplier millisSource) {
super(clusterId, localStoreId);
+ this.suspicionDurationMillis = suspicionDuration.toMillis();
+ this.millisSource = millisSource;
}
@Override
public void update(Set landscape) {
- latest = organizeByEpoch(landscape);
+ NavigableMap> landscapeByEpoch = organizeByEpoch(landscape);
+ if (landscapeByEpoch.isEmpty()) {
+ pendingQuitCommand.set(null);
+ return;
+ }
+ boolean scheduled = cleanupRedundantEpoch(landscapeByEpoch);
+ if (scheduled) {
+ return;
+ }
+ Map.Entry> oldestEntry = landscapeByEpoch.firstEntry();
+ EffectiveEpoch effectiveEpoch = new EffectiveEpoch(oldestEntry.getKey(), oldestEntry.getValue());
+ scheduled = cleanupIdConflictRange(effectiveEpoch);
+ if (scheduled) {
+ return;
+ }
+ scheduled = cleanupBoundaryConflictRange(effectiveEpoch);
+ if (!scheduled) {
+ if (pendingQuitCommand.get() != null) {
+ log.debug("No redundant range found, clear pending quit command");
+ pendingQuitCommand.set(null);
+ }
+ }
}
@Override
public BalanceResult balance() {
- if (latest.isEmpty()) {
- return NoNeedBalance.INSTANCE;
+ PendingQuitCommand current = pendingQuitCommand.get();
+ if (current != null) {
+ long nowMillis = millisSource.get();
+ if (nowMillis > current.triggerTime) {
+ pendingQuitCommand.set(null);
+ return BalanceNow.of(current.quitCmd);
+ } else {
+ return AwaitBalance.of(Duration.ofMillis(current.triggerTime - nowMillis));
+ }
}
- if (latest.size() > 1) {
+ return NoNeedBalance.INSTANCE;
+ }
+
+ private boolean cleanupRedundantEpoch(NavigableMap> landscapeByEpoch) {
+ if (landscapeByEpoch.size() > 1) {
// deal with epoch-conflict ranges
- Set storeDescriptors = latest.lastEntry().getValue();
+ Set storeDescriptors = landscapeByEpoch.lastEntry().getValue();
for (KVRangeStoreDescriptor storeDescriptor : storeDescriptors) {
if (!storeDescriptor.getId().equals(localStoreId)) {
continue;
@@ -88,34 +137,42 @@ public BalanceResult balance() {
if (rangeDescriptor.getRole() != RaftNodeStatus.Leader) {
continue;
}
- log.debug("Remove Epoch-Conflict range: {} in store {}",
- KVRangeIdUtil.toString(rangeDescriptor.getId()),
- storeDescriptor.getId());
- return quit(localStoreId, rangeDescriptor);
+ log.debug("Schedule command to remove epoch-conflict range: id={}, boundary={}",
+ KVRangeIdUtil.toString(rangeDescriptor.getId()), rangeDescriptor.getBoundary());
+ pendingQuitCommand.set(
+ new PendingQuitCommand(quit(localStoreId, rangeDescriptor), randomSuspicionTimeout()));
+ return true;
}
}
- return NoNeedBalance.INSTANCE;
}
- Map.Entry> oldestEntry = latest.firstEntry();
- Map> conflictingRanges = findConflictingRanges(oldestEntry.getValue());
+ return false;
+ }
+
+ private boolean cleanupIdConflictRange(EffectiveEpoch effectiveEpoch) {
+ Map> conflictingRanges =
+ findConflictingRanges(effectiveEpoch.storeDescriptors());
if (!conflictingRanges.isEmpty()) {
// deal with id-conflict ranges
for (KVRangeId rangeId : conflictingRanges.keySet()) {
- SortedSet leaderRanges = conflictingRanges.get(rangeId);
+ NavigableSet leaderRanges = conflictingRanges.get(rangeId);
for (LeaderRange leaderRange : leaderRanges) {
if (!leaderRange.ownerStoreDescriptor().getId().equals(localStoreId)) {
- return NoNeedBalance.INSTANCE;
+ return false;
}
- log.debug("Remove Id-Conflict range: {} in store {}",
+ log.warn("Schedule command to remove id-conflict range: id={}, boundary={}",
KVRangeIdUtil.toString(leaderRange.descriptor().getId()),
- leaderRange.ownerStoreDescriptor().getId());
- return quit(localStoreId, leaderRange.descriptor());
+ leaderRange.descriptor().getBoundary());
+ pendingQuitCommand.set(
+ new PendingQuitCommand(quit(localStoreId, leaderRange.descriptor()), randomSuspicionTimeout()));
+ return true;
}
}
- return NoNeedBalance.INSTANCE;
}
+ return false;
+ }
+
+ private boolean cleanupBoundaryConflictRange(EffectiveEpoch effectiveEpoch) {
// deal with boundary-conflict ranges
- EffectiveEpoch effectiveEpoch = new EffectiveEpoch(oldestEntry.getKey(), oldestEntry.getValue());
NavigableMap effectiveLeaders = getEffectiveRoute(effectiveEpoch).leaderRanges();
for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch.storeDescriptors()) {
if (!storeDescriptor.getId().equals(localStoreId)) {
@@ -128,19 +185,21 @@ public BalanceResult balance() {
Boundary boundary = rangeDescriptor.getBoundary();
LeaderRange leaderRange = effectiveLeaders.get(boundary);
if (leaderRange == null || !leaderRange.descriptor().getId().equals(rangeDescriptor.getId())) {
- log.debug("Remove Boundary-Conflict range: {} in store {}",
- KVRangeIdUtil.toString(rangeDescriptor.getId()),
- storeDescriptor.getId());
- return quit(localStoreId, rangeDescriptor);
+ log.warn("Schedule command to remove boundary-conflict range: id={}, boundary={}",
+ KVRangeIdUtil.toString(rangeDescriptor.getId()), rangeDescriptor.getBoundary());
+ pendingQuitCommand.set(
+ new PendingQuitCommand(quit(localStoreId, rangeDescriptor), randomSuspicionTimeout()));
+ return true;
}
}
}
- return NoNeedBalance.INSTANCE;
+ return false;
}
- private Map> findConflictingRanges(Set effectiveEpoch) {
- Map> leaderRangesByRangeId = new HashMap<>();
- Map> conflictingRanges = new HashMap<>();
+ private Map> findConflictingRanges(
+ Set effectiveEpoch) {
+ Map> leaderRangesByRangeId = new HashMap<>();
+ Map> conflictingRanges = new HashMap<>();
for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch) {
for (KVRangeDescriptor rangeDescriptor : storeDescriptor.getRangesList()) {
if (rangeDescriptor.getRole() != RaftNodeStatus.Leader) {
@@ -151,12 +210,42 @@ private Map> findConflictingRanges(Set lr.ownerStoreDescriptor().getId(), String::compareTo)
.reversed()));
leaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor));
- if (leaderRanges.size() > 1) {
- // More than one leader for the same range, add to conflicting ranges
- conflictingRanges.put(rangeId, leaderRanges);
+ }
+ }
+ for (KVRangeId rangeId : leaderRangesByRangeId.keySet()) {
+ NavigableSet leaderRanges = leaderRangesByRangeId.get(rangeId);
+ LeaderRange firstLeaderRange = leaderRanges.first();
+ ClusterConfig firstLeaderClusterConfig = firstLeaderRange.descriptor().getConfig();
+ if (leaderRanges.size() > 1) {
+ NavigableSet restLeaderRanges = leaderRanges.tailSet(firstLeaderRange, false);
+ // check if rest leader ranges are conflicting: disjoint voter set
+ for (LeaderRange restLeaderRange : restLeaderRanges) {
+ ClusterConfig restLeaderClusterConfig = restLeaderRange.descriptor().getConfig();
+ if (isDisjoint(firstLeaderClusterConfig, restLeaderClusterConfig)) {
+ // if disjoint, add to conflicting ranges
+ conflictingRanges.put(rangeId, leaderRanges);
+ }
}
}
}
return conflictingRanges;
}
+
+ private boolean isDisjoint(ClusterConfig firstConfig, ClusterConfig secondConfig) {
+ Set firstVoters = Sets.newHashSet(firstConfig.getVotersList());
+ Set secondVoters = Sets.newHashSet(secondConfig.getVotersList());
+ Set firstNextVoters = Sets.newHashSet(firstConfig.getNextVotersList());
+ Set secondNextVoters = Sets.newHashSet(secondConfig.getNextVotersList());
+ return Collections.disjoint(firstVoters, secondVoters)
+ && Collections.disjoint(firstNextVoters, secondNextVoters);
+ }
+
+ private long randomSuspicionTimeout() {
+ return millisSource.get()
+ + ThreadLocalRandom.current().nextLong(suspicionDurationMillis, suspicionDurationMillis * 2);
+ }
+
+ private record PendingQuitCommand(BalanceCommand quitCmd, long triggerTime) {
+
+ }
}
diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java
index dcb47face..7fabc17db 100644
--- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java
+++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java
@@ -19,14 +19,11 @@
package org.apache.bifromq.basekv.balance.impl;
-import static com.google.common.collect.Sets.difference;
-import static com.google.common.collect.Sets.union;
-
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
-import java.util.Collections;
+import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
@@ -76,6 +73,17 @@ public ReplicaCntBalancer(String clusterId,
Preconditions.checkArgument(validate(defaultLoadRules), "Invalid default load rules");
}
+ private ClusterConfig buildConfig(Set voters, Set learners) {
+ return ClusterConfig.newBuilder()
+ .addAllVoters(voters)
+ .addAllLearners(learners)
+ .build();
+ }
+
+ private void sanitize(Set s, Set live) {
+ s.retainAll(live);
+ }
+
@Override
public Struct initialLoadRules() {
return defaultLoadRules;
@@ -116,137 +124,190 @@ private boolean meetExpectedConfig(Struct loadRules,
Map landscape,
EffectiveRoute effectiveRoute,
Map expectedRangeLayout) {
- int expectedVoters = (int) loadRules.getFieldsMap().get(LOAD_RULE_VOTERS).getNumberValue();
- int expectedLearners = (int) loadRules.getFieldsMap().get(LOAD_RULE_LEARNERS).getNumberValue();
- // meeting goal one - meet the expected number of Voter replicas and learner replicas for each Range dynamically
+ final Set liveStores = landscape.keySet();
+ final int expectedVoters = (int) loadRules.getFieldsMap().get(LOAD_RULE_VOTERS).getNumberValue();
+ final int expectedLearners = (int) loadRules.getFieldsMap().get(LOAD_RULE_LEARNERS).getNumberValue();
+
+ if (liveStores.size() < expectedVoters) {
+ for (Map.Entry e : effectiveRoute.leaderRanges().entrySet()) {
+ ClusterConfig cc = e.getValue().descriptor().getConfig();
+ for (String v : cc.getVotersList()) {
+ if (!liveStores.contains(v)) {
+ // shortcut for rolling restart
+ return true;
+ }
+ }
+ }
+ }
+
boolean meetingGoal = false;
+
for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) {
- Boundary boundary = entry.getKey();
LeaderRange leaderRange = entry.getValue();
KVRangeDescriptor rangeDescriptor = leaderRange.descriptor();
ClusterConfig clusterConfig = rangeDescriptor.getConfig();
- if (meetingGoal) {
- expectedRangeLayout.put(boundary, clusterConfig);
- continue;
- }
+
+ // if there is running config change process, abort generation and wait for the next round
+ // keep range config change as linear as possible
if (clusterConfig.getNextVotersCount() > 0 || clusterConfig.getNextLearnersCount() > 0) {
- // if there is running config change process, abort generation
- expectedRangeLayout.put(boundary, clusterConfig);
- meetingGoal = true;
- continue;
+ expectedRangeLayout.clear();
+ // shortcut
+ return true;
}
- // voter count not meet expectation or exceeds actual store node amount
- Set voters = new HashSet<>(clusterConfig.getVotersList());
- Set learners = new HashSet<>(clusterConfig.getLearnersList());
- if (clusterConfig.getVotersCount() != expectedVoters || clusterConfig.getVotersCount() > landscape.size()) {
- if (clusterConfig.getVotersCount() < expectedVoters) {
- // add some voters from the least range count store
- List aliveStoresSortedByRangeCountAsc = landscape.entrySet().stream()
- .filter(e ->
- !learners.contains(e.getKey()) && !voters.contains(e.getKey()))
+
+ final Set voters = new HashSet<>(clusterConfig.getVotersList());
+ final Set learners = new HashSet<>(clusterConfig.getLearnersList());
+
+ // remove unreachable stores from voters and learners
+ sanitize(voters, liveStores);
+ sanitize(learners, liveStores);
+
+ Boundary boundary = entry.getKey();
+ int targetVoters = Math.min(expectedVoters, liveStores.size());
+ boolean needFix = voters.size() != targetVoters;
+ if (!meetingGoal && needFix) {
+ String leaderStore = leaderRange.ownerStoreDescriptor().getId();
+ if (voters.size() < targetVoters) {
+ if (!learners.isEmpty()) {
+ List learnerCandidates = landscape.entrySet().stream()
+ .filter(e -> learners.contains(e.getKey()))
+ .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount()))
+ .map(Map.Entry::getKey)
+ .toList();
+ for (String s : learnerCandidates) {
+ learners.remove(s); // promote learner -> voter
+ voters.add(s);
+ if (voters.size() == targetVoters) {
+ break;
+ }
+ }
+ }
+
+ if (voters.size() < targetVoters) {
+ List freeCandidates = landscape.entrySet().stream()
+ .filter(e -> !learners.contains(e.getKey()) && !voters.contains(e.getKey()))
+ .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount()))
+ .map(Map.Entry::getKey)
+ .toList();
+ for (String s : freeCandidates) {
+ voters.add(s);
+ if (voters.size() == targetVoters) {
+ break;
+ }
+ }
+ }
+
+ if (expectedLearners == -1) {
+ Set newLearners = new HashSet<>(liveStores);
+ newLearners.removeAll(voters);
+ learners.clear();
+ learners.addAll(newLearners);
+ }
+ List candidates = landscape.entrySet().stream()
+ .filter(e -> !learners.contains(e.getKey()) && !voters.contains(e.getKey()))
.sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount()))
.map(Map.Entry::getKey)
.toList();
- for (String aliveStoreId : aliveStoresSortedByRangeCountAsc) {
- voters.add(aliveStoreId);
- if (voters.size() == expectedVoters) {
+ for (String s : candidates) {
+ voters.add(s);
+ if (voters.size() == targetVoters) {
break;
}
}
- } else {
- // remove some voters from the most range count store
- List aliveStoresSortedByRangeCountDesc = landscape.entrySet().stream()
+ } else { // voters.size() > targetVoters
+ List overloaded = landscape.entrySet().stream()
.sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount())
.map(Map.Entry::getKey)
.toList();
- for (String aliveStoreId : aliveStoresSortedByRangeCountDesc) {
- if (!aliveStoreId.equals(leaderRange.ownerStoreDescriptor().getId())) {
- voters.remove(aliveStoreId);
+ for (String s : overloaded) {
+ if (!s.equals(leaderStore) && voters.contains(s)) {
+ voters.remove(s);
+ if (voters.size() == targetVoters) {
+ break;
+ }
}
- if (voters.size() == expectedVoters) {
- break;
+ }
+ if (voters.size() > targetVoters) {
+ for (String s : new ArrayList<>(voters)) {
+ if (!s.equals(leaderStore)) {
+ voters.remove(s);
+ if (voters.size() == targetVoters) {
+ break;
+ }
+ }
}
}
}
- // remove unreachable voters
- voters.removeIf(voter -> !landscape.containsKey(voter));
- ClusterConfig newConfig = ClusterConfig.newBuilder()
- .mergeFrom(clusterConfig)
- .clearVoters()
- .addAllVoters(voters)
- .build();
- if (!newConfig.equals(clusterConfig)) {
- meetingGoal = true;
- }
- expectedRangeLayout.put(boundary, newConfig);
+ expectedRangeLayout.put(boundary, buildConfig(voters, learners));
+ meetingGoal = true;
} else {
- expectedRangeLayout.put(boundary, clusterConfig);
+ expectedRangeLayout.put(boundary, buildConfig(voters, learners));
}
}
+
if (meetingGoal) {
return true;
}
- // voter count met the expectation, check learner count
+
for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) {
- Boundary boundary = entry.getKey();
LeaderRange leaderRange = entry.getValue();
KVRangeDescriptor rangeDescriptor = leaderRange.descriptor();
ClusterConfig clusterConfig = rangeDescriptor.getConfig();
- if (meetingGoal) {
- expectedRangeLayout.put(boundary, clusterConfig);
- continue;
- }
+
Set voters = new HashSet<>(clusterConfig.getVotersList());
Set learners = new HashSet<>(clusterConfig.getLearnersList());
- if (expectedLearners == -1
- || clusterConfig.getLearnersCount() != expectedLearners
- || clusterConfig.getLearnersCount() > landscape.size()) {
- if (expectedLearners == -1) {
- Set newLearners = new HashSet<>(landscape.keySet());
- newLearners.removeAll(voters);
- learners.addAll(newLearners);
- } else {
- if (clusterConfig.getLearnersCount() < expectedLearners) {
- // add some learners from the least range count store
- List aliveStoresSortedByRangeCountAsc = landscape.entrySet().stream()
- .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount()))
- .map(Map.Entry::getKey)
- .toList();
- for (String aliveStoreId : aliveStoresSortedByRangeCountAsc) {
- if (!voters.contains(aliveStoreId)) {
- learners.add(aliveStoreId);
- }
- if (learners.size() == expectedVoters) {
+ sanitize(voters, liveStores);
+ sanitize(learners, liveStores);
+
+ boolean changed = false;
+
+ if (expectedLearners == -1) {
+ // learners = live - voters
+ Set newLearners = new HashSet<>(liveStores);
+ newLearners.removeAll(voters);
+ if (!newLearners.equals(learners)) {
+ learners = newLearners;
+ changed = true;
+ }
+ } else {
+ int maxPossible = Math.max(0, liveStores.size() - voters.size());
+ int targetLearners = Math.min(expectedLearners, maxPossible);
+
+ if (learners.size() < targetLearners) {
+ List candidates = landscape.entrySet().stream()
+ .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount()))
+ .map(Map.Entry::getKey)
+ .toList();
+ for (String s : candidates) {
+ if (!voters.contains(s) && !learners.contains(s)) {
+ learners.add(s);
+ if (learners.size() == targetLearners) {
break;
}
}
- } else {
- // remove some learners from the most range count store
- List aliveStoresSortedByRangeCountDesc = landscape.entrySet().stream()
- .sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount())
- .map(Map.Entry::getKey)
- .toList();
- for (String aliveStoreId : aliveStoresSortedByRangeCountDesc) {
- learners.remove(aliveStoreId);
- if (learners.size() == expectedLearners) {
+ }
+ changed = true;
+ } else if (learners.size() > targetLearners) {
+ List overloaded = landscape.entrySet().stream()
+ .sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount())
+ .map(Map.Entry::getKey)
+ .toList();
+ for (String s : overloaded) {
+ if (learners.contains(s)) {
+ learners.remove(s);
+ if (learners.size() == targetLearners) {
break;
}
}
}
+ changed = true;
}
- // remove unreachable learners
- learners.removeIf(learner -> !landscape.containsKey(learner));
- ClusterConfig newConfig = ClusterConfig.newBuilder()
- .mergeFrom(clusterConfig)
- .clearLearners()
- .addAllLearners(learners)
- .build();
- if (!newConfig.equals(clusterConfig)) {
- meetingGoal = true;
- }
- expectedRangeLayout.put(boundary, newConfig);
- } else {
- expectedRangeLayout.put(boundary, clusterConfig);
+ }
+
+ Boundary boundary = entry.getKey();
+ expectedRangeLayout.put(boundary, buildConfig(voters, learners));
+ if (!meetingGoal && changed) {
+ meetingGoal = true;
}
}
return meetingGoal;
@@ -255,133 +316,148 @@ private boolean meetExpectedConfig(Struct loadRules,
private boolean balanceVoterCount(Map landscape,
EffectiveRoute effectiveRoute,
Map expectedRangeLayout) {
- // goal one has met, meeting goal two - evenly distributed voter replicas across all stores
- boolean meetingGoal = false;
+ final Set liveStores = landscape.keySet();
Map storeVoterCount = new HashMap<>();
for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) {
ClusterConfig config = entry.getValue().descriptor().getConfig();
- config.getVotersList()
+ config.getVotersList().stream()
+ .filter(liveStores::contains)
.forEach(storeId -> storeVoterCount.put(storeId, storeVoterCount.getOrDefault(storeId, 0) + 1));
}
- landscape.keySet().forEach(storeId -> {
- if (!storeVoterCount.containsKey(storeId)) {
- storeVoterCount.put(storeId, 0);
- }
- });
- record StoreVoterCount(String storeId, int voterCount) {
- }
+ liveStores.forEach(s -> storeVoterCount.putIfAbsent(s, 0));
+
+ record StoreVoterCount(String storeId, int voterCount) {}
- SortedSet storeVoterCountSorted = new TreeSet<>(Comparator
- .comparingInt(StoreVoterCount::voterCount).thenComparing(StoreVoterCount::storeId));
+ SortedSet storeVoterCountSorted = new TreeSet<>(
+ Comparator.comparingInt(StoreVoterCount::voterCount).thenComparing(StoreVoterCount::storeId));
storeVoterCount.forEach(
(storeId, voterCount) -> storeVoterCountSorted.add(new StoreVoterCount(storeId, voterCount)));
+
double totalVoters = storeVoterCount.values().stream().mapToInt(Integer::intValue).sum();
- double targetVotersPerStore = totalVoters / landscape.size();
- int maxVotersPerStore = (int) Math.ceil(targetVotersPerStore);
+ double targetVotersPerStore = liveStores.isEmpty() ? 0 : totalVoters / liveStores.size();
int minVotersPerStore = (int) Math.floor(targetVotersPerStore);
- int globalMax = Collections.max(storeVoterCount.values());
- int globalMin = Collections.min(storeVoterCount.values());
+ int globalMax = storeVoterCount.values().stream().mapToInt(Integer::intValue).max().orElse(0);
+ int globalMin = storeVoterCount.values().stream().mapToInt(Integer::intValue).min().orElse(0);
if (globalMax - globalMin <= 1) {
return false;
}
+ boolean meetingGoal = false;
for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) {
Boundary boundary = entry.getKey();
- LeaderRange leaderRange = entry.getValue();
- KVRangeDescriptor rangeDescriptor = leaderRange.descriptor();
- ClusterConfig clusterConfig = rangeDescriptor.getConfig();
- if (meetingGoal) {
- expectedRangeLayout.put(boundary, clusterConfig);
- continue;
- }
- // examine in sorted order to ensure the result is deterministic
- Set learners = Sets.newHashSet(clusterConfig.getLearnersList());
- SortedSet voterSorted = Sets.newTreeSet(clusterConfig.getVotersList());
- for (String voter : voterSorted) {
- if (storeVoterCount.get(voter) >= maxVotersPerStore) {
- // voter store has overloaded voters
- for (StoreVoterCount underloadedStore : storeVoterCountSorted) {
- // move to one underloaded store which is current not in the voter list
- if (storeVoterCount.get(underloadedStore.storeId) <= minVotersPerStore
- && !voterSorted.contains(underloadedStore.storeId)
- && !learners.contains(underloadedStore.storeId)) {
- meetingGoal = true;
- ClusterConfig newConfig = ClusterConfig.newBuilder()
- .addAllVoters(
- difference(union(voterSorted, Set.of(underloadedStore.storeId)), Set.of(voter)))
- .addAllLearners(learners)
- .build();
- expectedRangeLayout.put(boundary, newConfig);
- break;
+ LeaderRange lr = entry.getValue();
+ ClusterConfig cc = lr.descriptor().getConfig();
+
+ Set learners = Sets.newHashSet(cc.getLearnersList());
+ SortedSet voterSorted = Sets.newTreeSet(cc.getVotersList());
+ sanitize(learners, liveStores);
+ voterSorted.retainAll(liveStores);
+
+ if (!meetingGoal) {
+ meet:
+ for (String voter : new ArrayList<>(voterSorted)) {
+ int voters = storeVoterCount.getOrDefault(voter, 0);
+ if (voters == globalMax) {
+ for (StoreVoterCount under : storeVoterCountSorted) {
+ if (storeVoterCount.getOrDefault(under.storeId, 0) <= minVotersPerStore
+ && !voterSorted.contains(under.storeId)
+ && !learners.contains(under.storeId)) {
+ // move voter -> underloaded
+ Set newVoters = new HashSet<>(voterSorted);
+ newVoters.remove(voter);
+ newVoters.add(under.storeId);
+
+ expectedRangeLayout.put(boundary, buildConfig(newVoters, learners));
+ meetingGoal = true;
+ break meet;
+ }
}
}
}
+ if (!meetingGoal) {
+ expectedRangeLayout.put(boundary, buildConfig(voterSorted, learners));
+ }
+ } else {
+ expectedRangeLayout.put(boundary, buildConfig(voterSorted, learners));
}
}
+ if (!meetingGoal) {
+ expectedRangeLayout.clear();
+ }
return meetingGoal;
}
- private boolean balanceLearnerCount(Map landscape,
- EffectiveRoute effectiveRoute,
- Map expectedRangeLayout) {
- boolean meetingGoal = false;
+ private void balanceLearnerCount(Map landscape,
+ EffectiveRoute effectiveRoute,
+ Map expectedRangeLayout) {
+ final Set liveStores = landscape.keySet();
+
Map storeLearnerCount = new HashMap<>();
for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) {
ClusterConfig config = entry.getValue().descriptor().getConfig();
- config.getLearnersList()
+ config.getLearnersList().stream()
+ .filter(liveStores::contains)
.forEach(storeId -> storeLearnerCount.put(storeId, storeLearnerCount.getOrDefault(storeId, 0) + 1));
}
- landscape.keySet().forEach(storeId -> {
- if (!storeLearnerCount.containsKey(storeId)) {
- storeLearnerCount.put(storeId, 0);
- }
- });
- record StoreLearnerCount(String storeId, int voterCount) {
- }
+ liveStores.forEach(s -> storeLearnerCount.putIfAbsent(s, 0));
+
+ record StoreLearnerCount(String storeId, int learnerCount) {}
- SortedSet storeVoterCountSorted = new TreeSet<>(Comparator
- .comparingInt(StoreLearnerCount::voterCount).thenComparing(StoreLearnerCount::storeId));
- storeLearnerCount.forEach(
- (storeId, voterCount) -> storeVoterCountSorted.add(new StoreLearnerCount(storeId, voterCount)));
+ SortedSet storeLearnerCountSorted = new TreeSet<>(
+ Comparator.comparingInt(StoreLearnerCount::learnerCount).thenComparing(StoreLearnerCount::storeId));
+ storeLearnerCount.forEach((id, c) -> storeLearnerCountSorted.add(new StoreLearnerCount(id, c)));
double totalLearners = storeLearnerCount.values().stream().mapToInt(Integer::intValue).sum();
- double targetLearnersPerStore = totalLearners / landscape.size();
- int maxLearnersPerStore = (int) Math.ceil(targetLearnersPerStore);
+ double targetLearnersPerStore = liveStores.isEmpty() ? 0 : totalLearners / liveStores.size();
+ int minLearnersPerStore = (int) Math.floor(targetLearnersPerStore);
+
+ int globalMax = storeLearnerCount.values().stream().mapToInt(Integer::intValue).max().orElse(0);
+ int globalMin = storeLearnerCount.values().stream().mapToInt(Integer::intValue).min().orElse(0);
+ if (globalMax - globalMin <= 1) {
+ return;
+ }
+ boolean meetingGoal = false;
for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) {
Boundary boundary = entry.getKey();
- LeaderRange leaderRange = entry.getValue();
- KVRangeDescriptor rangeDescriptor = leaderRange.descriptor();
- ClusterConfig clusterConfig = rangeDescriptor.getConfig();
- if (meetingGoal) {
- expectedRangeLayout.put(boundary, clusterConfig);
- continue;
- }
- // examine in sorted order to ensure the result is deterministic
- Set voters = Sets.newHashSet(clusterConfig.getVotersList());
- SortedSet learnerSorted = Sets.newTreeSet(clusterConfig.getLearnersList());
- for (String learner : learnerSorted) {
- if (storeLearnerCount.get(learner) > maxLearnersPerStore) {
- // learner store has overloaded learners
- for (StoreLearnerCount underloadedStore : storeVoterCountSorted) {
- // move to one underloaded store which is current not in the voter or learner list
- if (storeLearnerCount.get(underloadedStore.storeId) < maxLearnersPerStore
- && !voters.contains(underloadedStore.storeId)
- && !learnerSorted.contains(underloadedStore.storeId)) {
- meetingGoal = true;
- ClusterConfig newConfig = ClusterConfig.newBuilder()
- .addAllVoters(voters)
- .addAllLearners(difference(
- union(learnerSorted, Set.of(underloadedStore.storeId)), Set.of(learner)))
- .build();
- expectedRangeLayout.put(boundary, newConfig);
- break;
+ LeaderRange lr = entry.getValue();
+ ClusterConfig cc = lr.descriptor().getConfig();
+
+ Set voters = Sets.newHashSet(cc.getVotersList());
+ SortedSet learnerSorted = Sets.newTreeSet(cc.getLearnersList());
+ sanitize(voters, liveStores);
+ learnerSorted.retainAll(liveStores);
+
+ if (!meetingGoal) {
+ meet:
+ for (String learner : new ArrayList<>(learnerSorted)) {
+ int learners = storeLearnerCount.getOrDefault(learner, 0);
+ if (learners == globalMax) {
+ for (StoreLearnerCount under : storeLearnerCountSorted) {
+ if (storeLearnerCount.getOrDefault(under.storeId, 0) < minLearnersPerStore
+ && !voters.contains(under.storeId)
+ && !learnerSorted.contains(under.storeId)) {
+ Set newLearners = new HashSet<>(learnerSorted);
+ newLearners.remove(learner);
+ newLearners.add(under.storeId);
+
+ expectedRangeLayout.put(boundary, buildConfig(voters, newLearners));
+ meetingGoal = true;
+ break meet;
+ }
}
}
}
+ if (!meetingGoal) {
+ expectedRangeLayout.put(boundary, buildConfig(voters, learnerSorted));
+ }
+ } else {
+ expectedRangeLayout.put(boundary, buildConfig(voters, learnerSorted));
}
}
- return meetingGoal;
+ if (!meetingGoal) {
+ expectedRangeLayout.clear();
+ }
}
}
diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java
index b3ea01c5b..abf7b204f 100644
--- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java
+++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java
@@ -14,13 +14,21 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basekv.balance.impl;
import static org.apache.bifromq.basekv.proto.State.StateType.Normal;
+import com.google.common.collect.Sets;
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Supplier;
import org.apache.bifromq.basehlc.HLC;
import org.apache.bifromq.basekv.balance.BalanceNow;
import org.apache.bifromq.basekv.balance.BalanceResult;
@@ -34,14 +42,6 @@
import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState;
import org.apache.bifromq.basekv.utils.KVRangeIdUtil;
-import com.google.common.collect.Sets;
-import java.time.Duration;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.function.Supplier;
/**
* The UnreachableReplicaRemovalBalancer is a specialized balancer responsible for managing and removing unreachable
@@ -99,7 +99,10 @@ public UnreachableReplicaRemovalBalancer(String clusterId, String localStoreId,
public void update(Set landscape) {
Map> descriptorMap = build(landscape);
latestDescriptorMap = descriptorMap;
-
+ if (!descriptorMap.containsKey(localStoreId)) {
+ replicaSuspicionTimeMap.clear();
+ return; // No need to process if local store is not present in the landscape
+ }
// Track the current leaders
Set currentLeaders = new HashSet<>();
diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java
index e3f537967..dc44b4be4 100644
--- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java
+++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java
@@ -79,6 +79,7 @@ public class KVStoreBalanceControllerTest {
private static final String LOCAL_STORE_ID = "localStoreId";
private final PublishSubject> proposalSubject = PublishSubject.create();
private final PublishSubject> storeDescSubject = PublishSubject.create();
+ private final PublishSubject refreshSignal = PublishSubject.create();
@Mock
private IBaseKVMetaService metaService;
@Mock
@@ -103,6 +104,7 @@ public void setup() throws IOException {
when(balancerFactory.newBalancer(eq(CLUSTER_ID), eq(LOCAL_STORE_ID))).thenReturn(storeBalancer);
when(metaService.balancerStatesProposal(eq(CLUSTER_ID))).thenReturn(statesProposal);
when(metaService.balancerStatesReporter(eq(CLUSTER_ID), eq(LOCAL_STORE_ID))).thenReturn(statesReporter);
+ when(statesReporter.refreshSignal()).thenReturn(refreshSignal);
when(statesProposal.expectedBalancerStates()).thenReturn(proposalSubject);
when(storeClient.describe()).thenReturn(storeDescSubject);
executor = Executors.newScheduledThreadPool(1);
@@ -400,6 +402,18 @@ public void testInvalidRules() {
verify(statesReporter, never()).reportBalancerState(anyString(), anyBoolean(), any(Struct.class));
}
+ @Test
+ public void testRefreshSignal() {
+ reset(statesReporter);
+ refreshSignal.onNext(System.currentTimeMillis());
+ verify(statesReporter, times(1))
+ .reportBalancerState(anyString(), anyBoolean(), any(Struct.class));
+ verify(statesReporter, times(1))
+ .reportBalancerState(eq(balancerFactory.getClass().getName()),
+ eq(false),
+ eq(Struct.getDefaultInstance()));
+ }
+
private Set generateDescriptor(KVRangeId id, long ver) {
List voters = Lists.newArrayList(LOCAL_STORE_ID, "store1");
List learners = Lists.newArrayList();
diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java
index 6c9aec88d..7e167a8f3 100644
--- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java
+++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java
@@ -21,18 +21,20 @@
import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertSame;
+import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basekv.balance.BalanceNow;
-import org.apache.bifromq.basekv.balance.BalanceResult;
-import org.apache.bifromq.basekv.balance.BalanceResultType;
-import org.apache.bifromq.basekv.balance.command.BootstrapCommand;
-import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
import java.time.Duration;
import java.util.Collections;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
+import org.apache.bifromq.basekv.balance.BalanceNow;
+import org.apache.bifromq.basekv.balance.BalanceResult;
+import org.apache.bifromq.basekv.balance.BalanceResultType;
+import org.apache.bifromq.basekv.balance.command.BootstrapCommand;
+import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@@ -58,7 +60,7 @@ public void updateWithoutStoreDescriptors() {
BalanceResult result = balancer.balance();
assertSame(result.type(), BalanceResultType.BalanceNow);
- assertEquals(FULL_BOUNDARY, ((BootstrapCommand) ((BalanceNow>) result).command).getBoundary());
+ assertEquals(((BootstrapCommand) ((BalanceNow>) result).command).getBoundary(), FULL_BOUNDARY);
}
@@ -74,6 +76,51 @@ public void balanceWithTrigger() {
BalanceResult result = balancer.balance();
assertSame(result.type(), BalanceResultType.BalanceNow);
- assertEquals(FULL_BOUNDARY, ((BootstrapCommand) ((BalanceNow>) result).command).getBoundary());
+ assertEquals(((BootstrapCommand) ((BalanceNow>) result).command).getBoundary(), FULL_BOUNDARY);
+ }
+
+ @Test
+ public void returnsAwaitImmediatelyBeforeDeadline() {
+ balancer.update(Collections.emptySet());
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.AwaitBalance);
+
+ Duration remaining = ((org.apache.bifromq.basekv.balance.AwaitBalance) result).await;
+ assertFalse(remaining.isZero());
+ assertTrue(remaining.toMillis() <= 2000L);
+ }
+
+ @Test
+ public void awaitThenBalanceNowAfterDeadline() {
+ balancer.update(Collections.emptySet());
+
+ BalanceResult r1 = balancer.balance();
+ assertSame(r1.type(), BalanceResultType.AwaitBalance);
+ long r1ms = ((org.apache.bifromq.basekv.balance.AwaitBalance) r1).await.toMillis();
+ assertTrue(r1ms > 0);
+
+ long half = Math.max(1, r1ms / 2);
+ mockTime.addAndGet(half);
+ BalanceResult r2 = balancer.balance();
+ assertSame(r2.type(), BalanceResultType.AwaitBalance);
+ long r2ms = ((org.apache.bifromq.basekv.balance.AwaitBalance) r2).await.toMillis();
+ assertTrue(r2ms >= 0 && r2ms < r1ms);
+
+ mockTime.addAndGet(r2ms + 1);
+ BalanceResult r3 = balancer.balance();
+ assertSame(r3.type(), BalanceResultType.BalanceNow);
+ assertEquals(((BootstrapCommand) ((BalanceNow>) r3).command).getBoundary(), FULL_BOUNDARY);
+ }
+
+ @Test
+ public void noSecondTriggerAfterBootstrapFires() {
+ balancer.update(Collections.emptySet());
+ mockTime.addAndGet(2000L);
+ BalanceResult fired = balancer.balance();
+ assertSame(fired.type(), BalanceResultType.BalanceNow);
+
+ BalanceResult next = balancer.balance();
+ assertSame(next.type(), BalanceResultType.NoNeedBalance);
}
}
\ No newline at end of file
diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java
index 1b1aebccd..ab1348c72 100644
--- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java
+++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basekv.balance.impl;
@@ -25,6 +25,8 @@
import static org.testng.Assert.assertSame;
import static org.testng.Assert.assertTrue;
+import com.google.protobuf.ByteString;
+import java.util.Set;
import org.apache.bifromq.basekv.balance.BalanceNow;
import org.apache.bifromq.basekv.balance.BalanceResult;
import org.apache.bifromq.basekv.balance.BalanceResultType;
@@ -34,10 +36,9 @@
import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
import org.apache.bifromq.basekv.proto.KVRangeId;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
+import org.apache.bifromq.basekv.proto.State;
import org.apache.bifromq.basekv.raft.proto.ClusterConfig;
import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
-import com.google.protobuf.ByteString;
-import java.util.Set;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@@ -58,6 +59,7 @@ public void noEffectiveRouteNoBalanceNeeded() {
KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder()
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(
Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a")).setEndKey(ByteString.copyFromUtf8("z"))
.build())
@@ -139,6 +141,7 @@ public void balanceToOtherNoLeaderStore() {
KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder()
.setId(kvRangeId1)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(toBoundary(null, ByteString.copyFromUtf8("z")))
.setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addLearners("otherStore").build())
.build();
@@ -148,6 +151,7 @@ public void balanceToOtherNoLeaderStore() {
.setId(kvRangeId2)
.setVer(1)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(toBoundary(ByteString.copyFromUtf8("z"), null))
.setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addVoters("otherStore").build())
.build();
@@ -161,6 +165,7 @@ public void balanceToOtherNoLeaderStore() {
KVRangeDescriptor kvRangeDescriptor3 = KVRangeDescriptor.newBuilder()
.setId(kvRangeId3)
.setRole(RaftNodeStatus.Follower)
+ .setState(State.StateType.Normal)
.setBoundary(toBoundary(ByteString.copyFromUtf8("z"), null))
.setConfig(ClusterConfig.newBuilder().addVoters("otherStore").build())
.build();
@@ -185,6 +190,7 @@ public void transferLeadership() {
KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder()
.setId(kvRangeId1)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build())
.setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addLearners("otherStore").build())
.build();
@@ -193,6 +199,7 @@ public void transferLeadership() {
.setId(kvRangeId2)
.setVer(1)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build())
.setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addVoters("otherStore").build())
.build();
@@ -205,6 +212,7 @@ public void transferLeadership() {
KVRangeDescriptor kvRangeDescriptor3 = KVRangeDescriptor.newBuilder()
.setId(kvRangeId3)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build())
.setConfig(ClusterConfig.newBuilder().addVoters("otherStore").build())
.build();
diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java
index a43f8fd47..0c0d6b62f 100644
--- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java
+++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java
@@ -163,4 +163,176 @@ public void stopSplitWhenExceedMaxRanges() {
.build()));
assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance);
}
+
+ @Test
+ public void skipWhenConfigHasDeadVoter() {
+ RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000);
+
+ KVRangeDescriptor rd = rangeDescriptorBuilder
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("store1")
+ .addVoters("deadStore")
+ .build())
+ .addHints(SplitHint.newBuilder()
+ .setType(HintType)
+ .putLoad("ioDensity", 40)
+ .putLoad("ioLatencyNanos", 100)
+ .setSplitKey(ByteString.copyFromUtf8("a"))
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor sd = storeDescriptorBuilder
+ .clearRanges()
+ .addRanges(rd)
+ .putStatistics("cpu.usage", 0.7)
+ .build();
+
+ balancer.update(Set.of(sd));
+
+ assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void skipWhenConfigHasDeadLearner() {
+ RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000);
+
+ KVRangeDescriptor rd = rangeDescriptorBuilder
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("store1")
+ .addLearners("ghost")
+ .build())
+ .addHints(SplitHint.newBuilder()
+ .setType(HintType)
+ .putLoad("ioDensity", 40)
+ .putLoad("ioLatencyNanos", 100)
+ .setSplitKey(ByteString.copyFromUtf8("a"))
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor sd = storeDescriptorBuilder
+ .clearRanges()
+ .addRanges(rd)
+ .putStatistics("cpu.usage", 0.7)
+ .build();
+
+ balancer.update(Set.of(sd));
+ assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void skipWhenConfigHasDeadNextMembers() {
+ RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000);
+
+ KVRangeDescriptor rd = rangeDescriptorBuilder
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("store1")
+ .addNextVoters("deadV")
+ .addNextLearners("deadL")
+ .build())
+ .addHints(SplitHint.newBuilder()
+ .setType(HintType)
+ .putLoad("ioDensity", 40)
+ .putLoad("ioLatencyNanos", 100)
+ .setSplitKey(ByteString.copyFromUtf8("a"))
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor sd = storeDescriptorBuilder
+ .clearRanges()
+ .addRanges(rd)
+ .putStatistics("cpu.usage", 0.7)
+ .build();
+
+ balancer.update(Set.of(sd));
+ assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void skipWhenOngoingConfigChange() {
+ RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000);
+
+ KVRangeDescriptor rd = rangeDescriptorBuilder
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("store1")
+ .addNextVoters("store1")
+ .build())
+ .addHints(SplitHint.newBuilder()
+ .setType(HintType)
+ .putLoad("ioDensity", 40)
+ .putLoad("ioLatencyNanos", 100)
+ .setSplitKey(ByteString.copyFromUtf8("a"))
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor sd = storeDescriptorBuilder
+ .clearRanges()
+ .addRanges(rd)
+ .putStatistics("cpu.usage", 0.7)
+ .build();
+
+ balancer.update(Set.of(sd));
+ assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void skipWhenSplitKeyEqualsStartOrOutOfRange() {
+ RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000);
+
+ KVRangeDescriptor rd = rangeDescriptorBuilder
+ .setBoundary(org.apache.bifromq.basekv.proto.Boundary.newBuilder()
+ .setStartKey(ByteString.copyFromUtf8("a"))
+ .setEndKey(ByteString.copyFromUtf8("z"))
+ .build())
+ .setConfig(ClusterConfig.newBuilder().addVoters("store1").build())
+ .addHints(SplitHint.newBuilder()
+ .setType(HintType)
+ .putLoad("ioDensity", 40)
+ .putLoad("ioLatencyNanos", 100)
+ .setSplitKey(ByteString.copyFromUtf8("a"))
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor sd = storeDescriptorBuilder
+ .clearRanges()
+ .addRanges(rd)
+ .putStatistics("cpu.usage", 0.7)
+ .build();
+
+ balancer.update(Set.of(sd));
+ assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+
+ KVRangeDescriptor rd2 = rd.toBuilder().clearHints()
+ .addHints(SplitHint.newBuilder()
+ .setType(HintType)
+ .putLoad("ioDensity", 40)
+ .putLoad("ioLatencyNanos", 100)
+ .setSplitKey(ByteString.copyFromUtf8("z"))
+ .build())
+ .build();
+ KVRangeStoreDescriptor sd2 = sd.toBuilder().clearRanges().addRanges(rd2).build();
+ balancer.update(Set.of(sd2));
+ assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void stopSplitWhenSplitKeyNotProvided() {
+ RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000);
+
+ KVRangeDescriptor rd = rangeDescriptorBuilder
+ .addHints(SplitHint.newBuilder()
+ .setType(HintType)
+ .putLoad("ioDensity", 40)
+ .putLoad("ioLatencyNanos", 100)
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor sd = storeDescriptorBuilder
+ .clearRanges()
+ .addRanges(rd)
+ .putStatistics("cpu.usage", 0.7)
+ .build();
+
+ balancer.update(Set.of(sd));
+ assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
}
diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java
index ecfeca3b9..00871e263 100644
--- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java
+++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java
@@ -23,9 +23,11 @@
import static org.testng.Assert.assertSame;
import com.google.protobuf.ByteString;
+import java.time.Duration;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.bifromq.basekv.balance.BalanceNow;
import org.apache.bifromq.basekv.balance.BalanceResult;
import org.apache.bifromq.basekv.balance.BalanceResultType;
@@ -34,20 +36,22 @@
import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
import org.apache.bifromq.basekv.proto.KVRangeId;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
+import org.apache.bifromq.basekv.proto.State;
import org.apache.bifromq.basekv.raft.proto.ClusterConfig;
import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
public class RedundantRangeRemovalBalancerTest {
-
private final String clusterId = "testCluster";
private final String localStoreId = "localStore";
private RedundantRangeRemovalBalancer balancer;
+ private AtomicLong mockTime;
@BeforeMethod
public void setUp() {
- balancer = new RedundantRangeRemovalBalancer(clusterId, localStoreId);
+ mockTime = new AtomicLong(0L); // Start time at 0
+ balancer = new RedundantRangeRemovalBalancer(clusterId, localStoreId, Duration.ofSeconds(1), mockTime::get);
}
@Test
@@ -56,6 +60,7 @@ public void noRedundantEpoch() {
KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder()
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setVer(1)
.setBoundary(Boundary.newBuilder()
.setStartKey(ByteString.copyFromUtf8("a"))
@@ -85,6 +90,7 @@ public void removeRangeInRedundantEpoch() {
KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder()
.setId(kvRangeId1)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setVer(1)
.setBoundary(Boundary.newBuilder()
.setStartKey(ByteString.copyFromUtf8("a"))
@@ -99,6 +105,7 @@ public void removeRangeInRedundantEpoch() {
KVRangeDescriptor kvRangeDescriptor2 = KVRangeDescriptor.newBuilder()
.setId(kvRangeId2)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setVer(1)
.setBoundary(Boundary.newBuilder()
.setStartKey(ByteString.copyFromUtf8("n"))
@@ -121,6 +128,11 @@ public void removeRangeInRedundantEpoch() {
balancer.update(storeDescriptors);
BalanceResult command = balancer.balance();
+ // first returns AwaitBalance due to suspicion delay
+ assertEquals(command.type(), BalanceResultType.AwaitBalance);
+ // advance mock time beyond the max suspicion window (2s)
+ mockTime.set(3000L);
+ command = balancer.balance();
assertEquals(command.type(), BalanceResultType.BalanceNow);
ChangeConfigCommand changeConfigCommand = (ChangeConfigCommand) ((BalanceNow>) command).command;
@@ -137,6 +149,7 @@ public void noLocalLeaderRangeInRedundantEpoch() {
.setId(kvRangeId1)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder()
.setStartKey(ByteString.copyFromUtf8("a"))
.setEndKey(ByteString.copyFromUtf8("m"))
@@ -150,6 +163,7 @@ public void noLocalLeaderRangeInRedundantEpoch() {
KVRangeDescriptor kvRangeDescriptor2 = KVRangeDescriptor.newBuilder()
.setId(kvRangeId2)
.setRole(RaftNodeStatus.Follower)
+ .setState(State.StateType.Normal)
.setVer(1)
.setBoundary(Boundary.newBuilder()
.setStartKey(ByteString.copyFromUtf8("n"))
@@ -187,6 +201,7 @@ public void removeRedundantEffectiveRange() {
.setId(kvRangeId1)
.setVer(1)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(boundary)
.setConfig(config)
.build();
@@ -194,6 +209,7 @@ public void removeRedundantEffectiveRange() {
.setId(kvRangeId2)
.setVer(1)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(boundary)
.setConfig(config)
.build();
@@ -209,7 +225,10 @@ public void removeRedundantEffectiveRange() {
balancer.update(storeDescriptors);
BalanceResult result = balancer.balance();
-
+ // first returns AwaitBalance due to suspicion delay
+ assertEquals(result.type(), BalanceResultType.AwaitBalance);
+ mockTime.set(3000L);
+ result = balancer.balance();
assertEquals(result.type(), BalanceResultType.BalanceNow);
ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow>) result).command;
@@ -226,6 +245,7 @@ public void ignoreNonLocalStore() {
KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder()
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setVer(1)
.setBoundary(Boundary.newBuilder()
.setStartKey(ByteString.copyFromUtf8("a"))
@@ -259,6 +279,7 @@ public void removeIdConflictingRangeWhenLocalStoreIsLoser() {
KVRangeDescriptor localRange = KVRangeDescriptor.newBuilder()
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setVer(1)
.setBoundary(boundary)
.setConfig(ClusterConfig.newBuilder()
@@ -289,6 +310,10 @@ public void removeIdConflictingRangeWhenLocalStoreIsLoser() {
balancer.update(Set.of(localStoreDesc, peerStoreDesc));
BalanceResult result = balancer.balance();
+ // first returns AwaitBalance due to suspicion delay
+ assertEquals(result.type(), BalanceResultType.AwaitBalance);
+ mockTime.set(3000L);
+ result = balancer.balance();
assertEquals(result.type(), BalanceResultType.BalanceNow);
ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
@@ -311,6 +336,7 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() {
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(boundary)
.setConfig(ClusterConfig.newBuilder()
.addVoters(localStoreId)
@@ -321,6 +347,7 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() {
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(boundary)
.setConfig(ClusterConfig.newBuilder()
.addVoters(peerStoreId)
@@ -342,4 +369,49 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() {
BalanceResult result = balancer.balance();
assertSame(result.type(), BalanceResultType.NoNeedBalance);
}
-}
\ No newline at end of file
+
+ @Test
+ public void idConflictButVotersOverlapShouldNotDelete() {
+ String peerStoreId = "peer";
+ KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ Boundary boundary = Boundary.newBuilder()
+ .setStartKey(ByteString.copyFromUtf8("a"))
+ .setEndKey(ByteString.copyFromUtf8("z")).build();
+
+ KVRangeDescriptor localRange = KVRangeDescriptor.newBuilder()
+ .setId(kvRangeId)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(boundary)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters(localStoreId)
+ .addVoters("x").build())
+ .build();
+
+ KVRangeDescriptor peerRange = KVRangeDescriptor.newBuilder()
+ .setId(kvRangeId)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(boundary)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters(localStoreId)
+ .addVoters(peerStoreId).build())
+ .build();
+
+ KVRangeStoreDescriptor localStoreDesc = KVRangeStoreDescriptor.newBuilder()
+ .setId(localStoreId)
+ .addRanges(localRange)
+ .build();
+ KVRangeStoreDescriptor peerStoreDesc = KVRangeStoreDescriptor.newBuilder()
+ .setId(peerStoreId)
+ .addRanges(peerRange)
+ .build();
+
+ balancer.update(Set.of(localStoreDesc, peerStoreDesc));
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.NoNeedBalance);
+ }
+}
diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java
index 346eedd31..1b5b7fc1f 100644
--- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java
+++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java
@@ -40,6 +40,7 @@
import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
import org.apache.bifromq.basekv.proto.KVRangeId;
import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
+import org.apache.bifromq.basekv.proto.State;
import org.apache.bifromq.basekv.raft.proto.ClusterConfig;
import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
import org.apache.bifromq.basekv.utils.EffectiveRoute;
@@ -69,6 +70,7 @@ public void balanceToAddVoter() {
.setId(kvRangeId1)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("a")).build())
.setConfig(ClusterConfig.newBuilder()
.addVoters("s1")
@@ -82,6 +84,7 @@ public void balanceToAddVoter() {
.setId(kvRangeId2)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a")).build())
.setConfig(ClusterConfig.newBuilder()
.addVoters("s1")
@@ -127,6 +130,7 @@ public void balanceToAddLearner() {
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(FULL_BOUNDARY)
.setConfig(ClusterConfig.newBuilder()
.addVoters("localStore")
@@ -163,16 +167,17 @@ public void balanceToAddLearner() {
}
@Test
- public void balanceToRemoveVoter() {
+ public void balanceToRemoveLearner() {
KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder()
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(FULL_BOUNDARY)
.setConfig(ClusterConfig.newBuilder()
.addVoters("localStore")
- .addVoters("remoteStore")
+ .addLearners("learnerStore")
.build())
.build();
@@ -190,63 +195,43 @@ public void balanceToRemoveVoter() {
BalanceResult result = balancer.balance();
ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow>) result).command;
assertTrue(command.getVoters().contains("localStore"));
- assertFalse(command.getVoters().contains("remoteStore"));
assertTrue(command.getLearners().isEmpty());
}
@Test
- public void balanceToRemoveLearner() {
+ public void promoteLearnersToVoters() {
KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
- KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder()
+ KVRangeDescriptor leader = KVRangeDescriptor.newBuilder()
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(FULL_BOUNDARY)
.setConfig(ClusterConfig.newBuilder()
.addVoters("localStore")
- .addLearners("learnerStore")
+ .addLearners("remoteStore")
.build())
.build();
-
- KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder()
- .setId("localStore")
- .addRanges(kvRangeDescriptor)
- .putStatistics("cpu.usage", 0.5)
- .build();
-
- Set storeDescriptors = new HashSet<>();
- storeDescriptors.add(storeDescriptor);
-
- balancer.update(storeDescriptors);
-
- BalanceResult result = balancer.balance();
- ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow>) result).command;
- assertTrue(command.getVoters().contains("localStore"));
- assertTrue(command.getLearners().isEmpty());
- }
-
- @Test
- public void nothingChanged() {
- KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
- KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder()
+ KVRangeDescriptor learner = KVRangeDescriptor.newBuilder()
.setId(kvRangeId)
- .setRole(RaftNodeStatus.Leader)
+ .setRole(RaftNodeStatus.Follower)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(FULL_BOUNDARY)
.setConfig(ClusterConfig.newBuilder()
.addVoters("localStore")
- .addLearners("learnerStore")
+ .addLearners("remoteStore")
.build())
.build();
KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder()
.setId("localStore")
- .addRanges(kvRangeDescriptor)
+ .addRanges(leader)
.putStatistics("cpu.usage", 0.5)
.build();
KVRangeStoreDescriptor learnerStoreDescriptor = KVRangeStoreDescriptor.newBuilder()
- .setId("learnerStore")
- .addRanges(kvRangeDescriptor)
+ .setId("remoteStore")
+ .addRanges(learner)
.putStatistics("cpu.usage", 0.5)
.build();
@@ -255,8 +240,10 @@ public void nothingChanged() {
storeDescriptors.add(learnerStoreDescriptor);
balancer.update(storeDescriptors);
-
- assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow>) balancer.balance()).command;
+ assertTrue(command.getVoters().contains("localStore"));
+ assertTrue(command.getVoters().contains("remoteStore"));
+ assertTrue(command.getLearners().isEmpty());
}
@Test
@@ -267,6 +254,7 @@ public void balanceToAddAllRestLearners() {
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(FULL_BOUNDARY)
.setConfig(ClusterConfig.newBuilder()
.addVoters("localStore")
@@ -318,6 +306,7 @@ public void balanceVoterCount() {
KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder()
.setId(kvRangeId1)
.setVer(1)
+ .setState(State.StateType.Normal)
.setRole(RaftNodeStatus.Leader)
.setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build())
.setConfig(ClusterConfig.newBuilder()
@@ -329,6 +318,7 @@ public void balanceVoterCount() {
.setId(kvRangeId2)
.setVer(2)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build())
.setConfig(ClusterConfig.newBuilder()
.addVoters("localStore")
@@ -364,6 +354,7 @@ public void balanceLearnerCount() {
.setId(kvRangeId1)
.setVer(1)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("a")).build())
.setConfig(ClusterConfig.newBuilder()
.addVoters("s1")
@@ -380,6 +371,7 @@ public void balanceLearnerCount() {
.setId(kvRangeId2)
.setVer(2)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a"))
.setEndKey(ByteString.copyFromUtf8("z")).build())
.setConfig(ClusterConfig.newBuilder()
@@ -398,6 +390,7 @@ public void balanceLearnerCount() {
.setId(kvRangeId3)
.setVer(2)
.setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
.setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build())
.setConfig(ClusterConfig.newBuilder()
.addVoters("s3")
@@ -432,6 +425,7 @@ public void generateCorrectClusterConfig() {
.setId(kvRangeId)
.setRole(RaftNodeStatus.Leader)
.setVer(1)
+ .setState(State.StateType.Normal)
.setBoundary(FULL_BOUNDARY)
.setConfig(ClusterConfig.newBuilder()
.addVoters("localStore")
@@ -464,4 +458,502 @@ public void generateCorrectClusterConfig() {
assertTrue(balancer.verify(layout, allStoreDescriptors));
}
+
+ @Test
+ public void removeDeadVoterAndBackfillEvenIfCountEqualsExpected() {
+ // live: s1, s2, s3ï¼›expected voters=3
+ // range current voters = [s1, ghost, s2]
+ KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(kvRangeId)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("s1")
+ .addVoters("ghost")
+ .addVoters("s2")
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+ KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build();
+
+ Set stores = new HashSet<>();
+ stores.add(s1);
+ stores.add(s2);
+ stores.add(s3);
+
+ // votersPerRange=3,learnersPerRange=0
+ balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0);
+ balancer.update(stores);
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+
+ // expected:ghost removed s3 added
+ assertEquals(cmd.getKvRangeId(), kvRangeId);
+ assertTrue(cmd.getVoters().contains("s1"));
+ assertTrue(cmd.getVoters().contains("s2"));
+ assertTrue(cmd.getVoters().contains("s3"));
+ assertFalse(cmd.getVoters().contains("ghost"));
+ assertTrue(cmd.getLearners().isEmpty());
+ }
+
+ @Test
+ public void abortWhenConfigChangeInProgress_nextFieldsPresent() {
+ KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ ClusterConfig cfgWithNext = ClusterConfig.newBuilder()
+ .addVoters("localStore")
+ .addNextVoters("someone")
+ .build();
+
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(kvRangeId)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(cfgWithNext)
+ .build();
+
+ KVRangeStoreDescriptor local = KVRangeStoreDescriptor.newBuilder()
+ .setId("localStore")
+ .addRanges(range)
+ .build();
+
+ Set stores = new HashSet<>();
+ stores.add(local);
+
+ balancer = new ReplicaCntBalancer("testCluster", "localStore", 1, 0);
+ balancer.update(stores);
+
+ assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void learnersMinusOneUsesLiveMinusVotersAndSanitizes() {
+ // expectedLearners = -1 => learners = live - votersï¼›
+ balancer = new ReplicaCntBalancer("testCluster", "s1", 1, -1);
+
+ KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(kvRangeId)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("s1")
+ .addLearners("ghostLearner")
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+ KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build();
+
+ Set stores = new HashSet<>();
+ stores.add(s1);
+ stores.add(s2);
+ stores.add(s3);
+
+ balancer.update(stores);
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+
+ // expected:learners = live - voters = {s2, s3};ghostLearner removed
+ assertTrue(cmd.getVoters().contains("s1"));
+ assertFalse(cmd.getLearners().contains("ghostLearner"));
+ assertTrue(cmd.getLearners().contains("s2"));
+ assertTrue(cmd.getLearners().contains("s3"));
+ assertEquals(cmd.getLearners().size(), 2);
+ }
+
+ @Test
+ public void skipWhenCapacityInsufficientAndHasDeadVoter() {
+ // expected voters=3,live voters=S1,S2, S3(dead)
+ ReplicaCntBalancer balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0);
+
+ KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(rid)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setBoundary(org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("s1")
+ .addVoters("s2")
+ .addVoters("deadS3") // dead
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+ Set landscape = new HashSet<>();
+ landscape.add(s1);
+ landscape.add(s2);
+
+ balancer.update(landscape);
+
+ assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void learnersMinusOnePreferPromoteLearnersToFillVoters() {
+ // expected: voters=3, learners=-1
+ balancer = new ReplicaCntBalancer("testCluster", "s1", 3, -1);
+
+ KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(rid)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("s1")
+ .addLearners("s2")
+ .addLearners("s3")
+ .addLearners("s4")
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+ KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build();
+ KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build();
+
+ balancer.update(Set.of(s1, s2, s3, s4));
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+
+ // voters should be s1 + two from {s2,s3,s4}
+ assertTrue(cmd.getVoters().contains("s1"));
+ assertEquals(cmd.getVoters().size(), 3);
+ // after promotion, learners should be live - voters = the remaining one
+ assertEquals(cmd.getLearners().size(), 1);
+ Set all = Set.of("s1", "s2", "s3", "s4");
+ Set union = new HashSet<>(cmd.getVoters());
+ union.addAll(cmd.getLearners());
+ assertEquals(union, all);
+ }
+
+ @Test
+ public void noChangeWhenLiveLessThanExpectedAndNoDeadVoter() {
+ // expected voters=3, live={s1,s2}, voters={s1,s2}
+ balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0);
+
+ KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(rid)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("s1")
+ .addVoters("s2")
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+
+ balancer.update(Set.of(s1, s2));
+ assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void neverRemoveLeaderWhenShrinkingVoters() {
+ // expected voters=3, voters currently 4 (leader must stay)
+ balancer = new ReplicaCntBalancer("testCluster", "leader", 3, 0);
+
+ KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(rid)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("leader")
+ .addVoters("s2")
+ .addVoters("s3")
+ .addVoters("s4")
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor leader = KVRangeStoreDescriptor.newBuilder().setId("leader").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+ KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build();
+ KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build();
+
+ balancer.update(Set.of(leader, s2, s3, s4));
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+
+ assertTrue(cmd.getVoters().contains("leader"));
+ assertEquals(cmd.getVoters().size(), 3);
+ }
+
+ @Test
+ public void balanceVoterCountNoopWhenSpreadWithinOne() {
+ // two stores, two ranges: counts differ by at most 1 -> no rebalance
+ balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 0);
+
+ KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder()
+ .setId(r1).setVer(1).setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build())
+ .setConfig(ClusterConfig.newBuilder().addVoters("s1").build())
+ .build();
+
+ KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build();
+ KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder()
+ .setId(r2).setVer(1).setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build())
+ .setConfig(ClusterConfig.newBuilder().addVoters("s2").build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(d1).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").addRanges(d2).build();
+
+ balancer.update(Set.of(s1, s2));
+ assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void fixedLearnerCountRemovesDeadAndBackfills() {
+ // expected learners=2; current learners={deadL, s2}; live={s1,s2,s3,s4}
+ balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 2);
+
+ KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(rid)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("s1")
+ .addLearners("deadL")
+ .addLearners("s2")
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+ KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build();
+ KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build();
+
+ balancer.update(Set.of(s1, s2, s3, s4));
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+
+ assertTrue(cmd.getVoters().contains("s1"));
+ assertEquals(cmd.getLearners().size(), 2);
+ assertTrue(cmd.getLearners().contains("s2"));
+ assertFalse(cmd.getLearners().contains("deadL"));
+ }
+
+ @Test
+ public void zeroLearnersTargetClearsLearners() {
+ // expected learners=0
+ balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 0);
+
+ KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(rid)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("s1")
+ .addLearners("s2")
+ .addLearners("s3")
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+ KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build();
+
+ balancer.update(Set.of(s1, s2, s3));
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+ assertTrue(cmd.getLearners().isEmpty());
+ assertTrue(cmd.getVoters().contains("s1"));
+ }
+
+ @Test
+ public void learnersMinusOneWithAllLiveAsVotersMakesLearnersEmpty() {
+ balancer = new ReplicaCntBalancer("testCluster", "s1", 3, -1);
+
+ KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor range = KVRangeDescriptor.newBuilder()
+ .setId(rid)
+ .setRole(RaftNodeStatus.Leader)
+ .setVer(1)
+ .setState(State.StateType.Normal)
+ .setBoundary(FULL_BOUNDARY)
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("s1")
+ .addVoters("s2")
+ .addVoters("s3")
+ .addLearners("ghost") // should be sanitized away
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build();
+ KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build();
+
+ balancer.update(Set.of(s1, s2, s3));
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+
+ assertEquals(cmd.getVoters(), Set.of("s1", "s2", "s3"));
+ assertTrue(cmd.getLearners().isEmpty());
+ }
+
+ @Test
+ public void balanceVoterCountPrefersZeroCountStoreFirst() {
+ KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build();
+ KVRangeId r3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build();
+ KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder()
+ .setId(r1)
+ .setVer(1)
+ .setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
+ .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build())
+ .setConfig(ClusterConfig.newBuilder().addVoters("sA").build())
+ .build();
+ KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder()
+ .setId(r2)
+ .setVer(1)
+ .setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
+ .setBoundary(Boundary.newBuilder()
+ .setStartKey(ByteString.copyFromUtf8("m"))
+ .setEndKey(ByteString.copyFromUtf8("z"))
+ .build())
+ .setConfig(ClusterConfig.newBuilder().addVoters("sA").build())
+ .build();
+
+ KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build();
+ KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB")
+ .addRanges(KVRangeDescriptor.newBuilder()
+ .setId(r3)
+ .setVer(1)
+ .setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
+ .setBoundary(Boundary.newBuilder()
+ .setStartKey(ByteString.copyFromUtf8("z"))
+ .build())
+ .setConfig(ClusterConfig.newBuilder().addVoters("sB").build())
+ .build())
+ .build();
+ KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build();
+
+ balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 0);
+ balancer.update(Set.of(sA, sB, sC));
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+
+ assertTrue(cmd.getVoters().contains("sC"));
+ assertFalse(cmd.getVoters().contains("sA"));
+ }
+
+ @Test
+ public void balanceVoterCountDoesOnlyOneChangePerRound() {
+ KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build();
+ KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder()
+ .setId(r1)
+ .setVer(1)
+ .setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
+ .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build())
+ .setConfig(ClusterConfig.newBuilder().addVoters("sA").build())
+ .build();
+ KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder()
+ .setId(r2)
+ .setVer(1)
+ .setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
+ .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build())
+ .setConfig(ClusterConfig.newBuilder().addVoters("sA").build())
+ .build();
+
+ KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build();
+ KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB").build();
+ KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build();
+
+ balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 0);
+ balancer.update(Set.of(sA, sB, sC));
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+ assertTrue(cmd.getKvRangeId().equals(r1) || cmd.getKvRangeId().equals(r2));
+ }
+
+ @Test
+ public void balanceVoterCountSkipsTargetsAlreadyInVotersOrLearners() {
+ KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder()
+ .setId(r1)
+ .setVer(1)
+ .setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
+ .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build())
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("sA")
+ .addLearners("sB")
+ .build())
+ .build();
+
+ KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder()
+ .setId(KVRangeId.newBuilder().setEpoch(1).setId(2).build())
+ .setVer(1)
+ .setRole(RaftNodeStatus.Leader)
+ .setState(State.StateType.Normal)
+ .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build())
+ .setConfig(ClusterConfig.newBuilder()
+ .addVoters("sA")
+ .addLearners("sB")
+ .build())
+ .build();
+
+ KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build();
+ KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB").build();
+ KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build();
+
+ balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 1);
+ balancer.update(Set.of(sA, sB, sC));
+
+ BalanceResult result = balancer.balance();
+ assertSame(result.type(), BalanceResultType.BalanceNow);
+ ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow>) result).command;
+
+ assertTrue(cmd.getVoters().contains("sC"));
+ assertFalse(cmd.getVoters().contains("sB"));
+ }
}
diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java
index 752ebbcff..dc2ccb358 100644
--- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java
+++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java
@@ -25,6 +25,12 @@
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertSame;
+import java.time.Duration;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map;
+import java.util.Set;
+import java.util.function.Supplier;
import org.apache.bifromq.basekv.balance.BalanceNow;
import org.apache.bifromq.basekv.balance.BalanceResult;
import org.apache.bifromq.basekv.balance.BalanceResultType;
@@ -36,22 +42,16 @@
import org.apache.bifromq.basekv.raft.proto.ClusterConfig;
import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState;
-import java.time.Duration;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Map;
-import java.util.Set;
-import java.util.function.Supplier;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
public class UnreachableReplicaRemovalBalancerTest {
- private UnreachableReplicaRemovalBalancer balancer;
- private Supplier mockTimeSource;
private final String localStoreId = "localStore";
private final String peerStoreId = "peerStore";
private final KVRangeId rangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ private UnreachableReplicaRemovalBalancer balancer;
+ private Supplier mockTimeSource;
@BeforeMethod
public void setUp() {
@@ -61,6 +61,36 @@ public void setUp() {
new UnreachableReplicaRemovalBalancer("clusterId", localStoreId, Duration.ofSeconds(15), mockTimeSource);
}
+ @Test
+ public void noChangeWhenLocalStoreMissingInitially() {
+ KVRangeStoreDescriptor peerStoreDescriptor = createStoreDescriptor(peerStoreId);
+ balancer.update(Set.of(peerStoreDescriptor));
+ assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
+ @Test
+ public void noNPEWhenLocalStoreDisappearsAfterBeingLeader() {
+ KVRangeStoreDescriptor localStoreDescriptor = createStoreDescriptor(
+ localStoreId,
+ createRangeDescriptor(
+ rangeId,
+ RaftNodeStatus.Leader,
+ Map.of(localStoreId, RaftNodeSyncState.Replicating, peerStoreId, RaftNodeSyncState.Probing),
+ Set.of(localStoreId, peerStoreId),
+ Set.of()
+ )
+ );
+ KVRangeStoreDescriptor peerStoreDescriptor = createStoreDescriptor(peerStoreId);
+
+ when(mockTimeSource.get()).thenReturn(System.currentTimeMillis());
+ balancer.update(Set.of(localStoreDescriptor, peerStoreDescriptor));
+
+ when(mockTimeSource.get()).thenReturn(System.currentTimeMillis() + 16000);
+ balancer.update(Set.of(peerStoreDescriptor));
+
+ assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance);
+ }
+
@Test
public void noChangeWhenAllReplicasAreReachable() {
KVRangeStoreDescriptor storeDescriptor = createStoreDescriptor(
@@ -73,7 +103,6 @@ public void noChangeWhenAllReplicasAreReachable() {
balancer.update(Set.of(storeDescriptor));
-
assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance);
}
@@ -101,7 +130,7 @@ public void removesUnreachableVoterReplicaAfterTimeout() {
// Verify that the unhealthy replica is scheduled for removal
assertEquals(localStoreId, command.getToStore());
assertEquals(rangeId, command.getKvRangeId());
- assertEquals(5, command.getExpectedVer());
+ assertEquals(command.getExpectedVer(), 5);
assertFalse(command.getVoters().contains(peerStoreId));
}
@@ -129,11 +158,10 @@ public void removesUnreachableLearnerReplicaAfterTimeout() {
// Verify that the unhealthy replica is scheduled for removal
assertEquals(localStoreId, command.getToStore());
assertEquals(rangeId, command.getKvRangeId());
- assertEquals(5, command.getExpectedVer());
+ assertEquals(command.getExpectedVer(), 5);
assertFalse(command.getLearners().contains(peerStoreId));
}
-
@Test
public void noCommandIfReplicaReachableAgain() {
KVRangeStoreDescriptor localStoreDescriptor = createStoreDescriptor(
@@ -177,7 +205,6 @@ public void removesReplicaIfLeaderChanged() {
Set.of(localStoreId), Set.of(peerStoreId))
);
-
balancer.update(Set.of(storeDescriptor, peerStoreDescriptor));
// Simulate a leader change
diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java
index c29a07839..13d6af6ad 100644
--- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java
+++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java
@@ -40,7 +40,8 @@ public abstract class StoreBalancer {
* @param localStoreId the id of the store which the balancer is responsible for
*/
public StoreBalancer(String clusterId, String localStoreId) {
- this.log = MDCLogger.getLogger("balancer.logger", "clusterId", clusterId, "storeId", localStoreId);
+ this.log = MDCLogger.getLogger("balancer.logger",
+ "clusterId", clusterId, "storeId", localStoreId, "balancer", this.getClass().getSimpleName());
this.clusterId = clusterId;
this.localStoreId = localStoreId;
}
diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java
index 2c36c3f38..99fc0701a 100644
--- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java
+++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java
@@ -22,6 +22,7 @@
import lombok.Getter;
import lombok.Setter;
import lombok.experimental.SuperBuilder;
+import org.apache.bifromq.basekv.utils.KVRangeIdUtil;
@Getter
@Setter
@@ -35,6 +36,7 @@ public CommandType type() {
@Override
public String toString() {
- return String.format("RecoveryCommand{toStore=%s}", getToStore());
+ return String.format("RecoveryCommand{toStore=%s, kvRangeId=%s}",
+ getToStore(), KVRangeIdUtil.toString(getKvRangeId()));
}
}
diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java
index 125ff4208..dadf83ba9 100644
--- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java
+++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basekv.balance.util;
@@ -23,8 +23,14 @@
import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKey;
import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary;
-import org.apache.bifromq.basekv.balance.BalanceNow;
-import org.apache.bifromq.basekv.balance.BalanceResult;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NavigableMap;
+import java.util.Set;
import org.apache.bifromq.basekv.balance.command.BalanceCommand;
import org.apache.bifromq.basekv.balance.command.BootstrapCommand;
import org.apache.bifromq.basekv.balance.command.ChangeConfigCommand;
@@ -37,14 +43,6 @@
import org.apache.bifromq.basekv.utils.EffectiveRoute;
import org.apache.bifromq.basekv.utils.KVRangeIdUtil;
import org.apache.bifromq.basekv.utils.LeaderRange;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NavigableMap;
-import java.util.Set;
/**
* Utility class for generating balance commands.
@@ -57,24 +55,24 @@ public class CommandUtil {
* @param rangeDescriptor the range descriptor of the range which the balancer is responsible for
* @return the generated ChangeConfigCommand
*/
- public static BalanceResult quit(String localStoreId, KVRangeDescriptor rangeDescriptor) {
+ public static BalanceCommand quit(String localStoreId, KVRangeDescriptor rangeDescriptor) {
ClusterConfig config = rangeDescriptor.getConfig();
if (config.getVotersCount() > 1 || config.getLearnersCount() > 0) {
- return BalanceNow.of(ChangeConfigCommand.builder()
+ return ChangeConfigCommand.builder()
.toStore(localStoreId)
.kvRangeId(rangeDescriptor.getId())
.expectedVer(rangeDescriptor.getVer())
.voters(Set.of(localStoreId))
.learners(Collections.emptySet())
- .build());
+ .build();
} else {
- return BalanceNow.of(ChangeConfigCommand.builder()
+ return ChangeConfigCommand.builder()
.toStore(localStoreId)
.kvRangeId(rangeDescriptor.getId())
.expectedVer(rangeDescriptor.getVer())
.voters(Collections.emptySet())
.learners(Collections.emptySet())
- .build());
+ .build();
}
}
diff --git a/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java b/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java
index 2b06a80a7..ae41eafa1 100644
--- a/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java
+++ b/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java
@@ -19,17 +19,19 @@
package org.apache.bifromq.basekv.balance.util;
+import static com.google.protobuf.ByteString.copyFromUtf8;
import static org.apache.bifromq.basekv.balance.util.CommandUtil.diffBy;
import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary;
-import static com.google.protobuf.ByteString.copyFromUtf8;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basekv.balance.BalanceNow;
-import org.apache.bifromq.basekv.balance.BalanceResult;
-import org.apache.bifromq.basekv.balance.BalanceResultType;
+import com.google.protobuf.ByteString;
+import java.util.Collections;
+import java.util.NavigableMap;
+import java.util.Set;
+import java.util.TreeMap;
import org.apache.bifromq.basekv.balance.command.BalanceCommand;
import org.apache.bifromq.basekv.balance.command.BootstrapCommand;
import org.apache.bifromq.basekv.balance.command.ChangeConfigCommand;
@@ -44,11 +46,6 @@
import org.apache.bifromq.basekv.utils.EffectiveRoute;
import org.apache.bifromq.basekv.utils.KVRangeIdUtil;
import org.apache.bifromq.basekv.utils.LeaderRange;
-import com.google.protobuf.ByteString;
-import java.util.Collections;
-import java.util.NavigableMap;
-import java.util.Set;
-import java.util.TreeMap;
import org.testng.annotations.Test;
public class CommandUtilTest {
@@ -65,10 +62,9 @@ public void quitWithMultipleVoters() {
.build())
.build();
- BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor);
+ BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor);
- assertEquals(result.type(), BalanceResultType.BalanceNow);
- ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow>) result).command;
+ ChangeConfigCommand command = (ChangeConfigCommand) result;
assertEquals(command.getToStore(), localStoreId);
assertEquals(command.getKvRangeId(), kvRangeId);
@@ -89,10 +85,9 @@ public void quitWithLearners() {
.build())
.build();
- BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor);
+ BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor);
- assertEquals(result.type(), BalanceResultType.BalanceNow);
- ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow>) result).command;
+ ChangeConfigCommand command = (ChangeConfigCommand) result;
assertEquals(command.getToStore(), localStoreId);
assertEquals(command.getKvRangeId(), kvRangeId);
@@ -112,10 +107,9 @@ public void quitWithSingleVoterNoLearners() {
.build())
.build();
- BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor);
+ BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor);
- assertEquals(result.type(), BalanceResultType.BalanceNow);
- ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow>) result).command;
+ ChangeConfigCommand command = (ChangeConfigCommand) result;
assertEquals(command.getToStore(), localStoreId);
assertEquals(command.getKvRangeId(), kvRangeId);
diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java
index b3fb57119..15d961508 100644
--- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java
+++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java
@@ -20,7 +20,6 @@
package org.apache.bifromq.basekv.server;
import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap;
-import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY;
import static org.apache.bifromq.baserpc.server.UnaryResponse.response;
import com.google.common.collect.Sets;
@@ -94,10 +93,11 @@ public String storeId() {
public void start() {
log.debug("Starting BaseKVStore service");
kvRangeStore.start(new AgentHostStoreMessenger(agentHost, clusterId, kvRangeStore.id()));
- kvRangeStore.bootstrap(KVRangeIdUtil.generate(), FULL_BOUNDARY);
landscapeReporter = metaService.landscapeReporter(clusterId, kvRangeStore.id());
// sync store descriptor via crdt
disposables.add(kvRangeStore.describe().subscribe(landscapeReporter::report));
+ disposables.add(landscapeReporter.refreshSignal()
+ .subscribe(ts -> landscapeReporter.report(kvRangeStore.describe().blockingFirst())));
log.debug("BaseKVStore service started");
}
diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java
index af807d3f1..0f60e10ba 100644
--- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java
+++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java
@@ -336,7 +336,7 @@ public void tick() {
statsCollector.tick();
dumpSessions.values().forEach(KVRangeDumpSession::tick);
shrinkWAL();
- checkZombieState();
+ checkAndRepairFromZombieState();
estimateSplitHint();
}
@@ -372,12 +372,10 @@ private CompletableFuture doClose() {
.thenCompose(v -> statsCollector.stop())
.thenCompose(v -> mgmtTaskRunner.awaitDone())
.thenCompose(v -> wal.close())
- .thenCompose(v -> {
+ .thenCompose(v -> awaitShutdown(fsmExecutor))
+ .whenComplete((v, e) -> {
kvRange.close();
metricManager.close();
- return awaitShutdown(fsmExecutor);
- })
- .whenComplete((v, e) -> {
cmdFutures.values()
.forEach(f -> f.completeExceptionally(new KVRangeException.TryLater("Range closed")));
queryRunner.close();
@@ -646,29 +644,27 @@ private CompletableFuture apply(LogEntry entry) {
switch (entry.getTypeCase()) {
case CONFIG -> {
IKVRangeWriter> rangeWriter = kvRange.toWriter();
- applyConfigChange(entry.getTerm(), entry.getIndex(), entry.getConfig(), rangeWriter)
- .whenComplete((callback, e) -> {
- if (onDone.isCancelled()) {
- rangeWriter.abort();
- } else {
- try {
- if (e != null) {
- rangeWriter.abort();
- onDone.completeExceptionally(e);
- } else {
- rangeWriter.lastAppliedIndex(entry.getIndex());
- rangeWriter.done();
- callback.run();
- linearizer.afterLogApplied(entry.getIndex());
- metricManager.reportLastAppliedIndex(entry.getIndex());
- onDone.complete(null);
- }
- } catch (Throwable t) {
- log.error("Failed to apply log", t);
- onDone.completeExceptionally(t);
+ try {
+ Supplier> afterLogApplied = applyConfigChange(entry.getTerm(),
+ entry.getIndex(), entry.getConfig(), rangeWriter);
+ rangeWriter.lastAppliedIndex(entry.getIndex());
+ rangeWriter.done();
+ afterLogApplied.get()
+ .whenComplete((v, e) -> {
+ if (e != null) {
+ log.error("Failed to apply config change", e);
+ onDone.completeExceptionally(e);
+ } else {
+ linearizer.afterLogApplied(entry.getIndex());
+ metricManager.reportLastAppliedIndex(entry.getIndex());
+ onDone.complete(null);
}
- }
- });
+ });
+ } catch (Throwable t) {
+ rangeWriter.abort();
+ log.error("Failed to apply command", t);
+ onDone.completeExceptionally(t);
+ }
}
case DATA -> {
try {
@@ -723,18 +719,17 @@ private CompletableFuture apply(LogEntry entry) {
return onDone;
}
- private CompletableFuture applyConfigChange(long term, long index,
- ClusterConfig config,
- IKVRangeWritable> rangeWriter) {
- CompletableFuture onDone = new CompletableFuture<>();
+ private Supplier> applyConfigChange(long term,
+ long index,
+ ClusterConfig config,
+ IKVRangeWritable> rangeWriter) {
State state = rangeWriter.state();
log.info("Apply new config[term={}, index={}]: state={}, leader={}\n{}",
term, index, state, wal.isLeader(), config);
rangeWriter.clusterConfig(config);
if (config.getNextVotersCount() != 0 || config.getNextLearnersCount() != 0) {
// skip joint-config
- onDone.complete(NOOP);
- return onDone;
+ return () -> CompletableFuture.completedFuture(null);
}
Set members = newHashSet();
members.addAll(config.getVotersList());
@@ -753,18 +748,17 @@ private CompletableFuture applyConfigChange(long term, long index,
.setType(Removed)
.setTaskId(taskId)
.build());
- onDone.complete(() -> {
+ return () -> {
quitSignal.complete(null);
finishCommand(taskId);
- });
+ return CompletableFuture.completedFuture(null);
+ };
} else {
rangeWriter.state(State.newBuilder()
.setType(Normal)
.setTaskId(taskId)
.build());
- onDone.complete(() -> {
- finishCommand(taskId);
- });
+ return () -> compactWAL().thenRun(() -> finishCommand(taskId));
}
} else {
// request config change failed, the config entry is appended due to leader reelection
@@ -774,16 +768,17 @@ private CompletableFuture applyConfigChange(long term, long index,
.setType(Removed)
.setTaskId(taskId)
.build());
- onDone.complete(() -> {
+ return () -> {
quitSignal.complete(null);
finishCommand(taskId);
- });
+ return CompletableFuture.completedFuture(null);
+ };
} else {
rangeWriter.state(State.newBuilder()
.setType(Normal)
.setTaskId(taskId)
.build());
- onDone.complete(() -> finishCommand(taskId));
+ return () -> compactWAL().thenRun(() -> finishCommand(taskId));
}
}
}
@@ -802,12 +797,13 @@ private CompletableFuture applyConfigChange(long term, long index,
.build());
}
rangeWriter.bumpVer(false);
- onDone.complete(() -> {
+ return () -> {
finishCommand(taskId);
if (remove) {
quitSignal.complete(null);
}
- });
+ return CompletableFuture.completedFuture(null);
+ };
}
case ToBePurged -> {
String taskId = state.getTaskId();
@@ -817,25 +813,28 @@ private CompletableFuture applyConfigChange(long term, long index,
.setType(Removed)
.setTaskId(taskId)
.build());
- onDone.complete(() -> {
+ return () -> {
finishCommand(taskId);
quitSignal.complete(null);
- });
+ return CompletableFuture.completedFuture(null);
+ };
} else {
rangeWriter.state(State.newBuilder()
.setType(Normal)
.setTaskId(taskId)
.build());
- onDone.complete(() -> {
+ return () -> compactWAL().thenRun(() -> {
+ // purge failed due to leader change, reset back to normal
+ log.debug("Purge failed due to leader change[newConfig={}]", config);
finishCommand(taskId);
});
}
}
- default ->
- // skip internal config change triggered by leadership change
- onDone.complete(NOOP);
+ default -> {
+ // skip internal config change triggered by leadership change, no need to compact WAL
+ return () -> CompletableFuture.completedFuture(null);
+ }
}
- return onDone;
}
private CompletableFuture applyCommand(long ver,
@@ -876,11 +875,6 @@ logTerm, logIndex, taskId, print(ver), state, newConfig.getVotersList(),
// make a checkpoint if needed
CompletableFuture compactWALFuture = CompletableFuture.completedFuture(null);
if (wal.latestSnapshot().getLastAppliedIndex() < logIndex - 1) {
- // cancel all on-going dump sessions
- dumpSessions.forEach((sessionId, session) -> {
- session.cancel();
- dumpSessions.remove(sessionId, session);
- });
compactWALFuture = compactWAL();
}
compactWALFuture.whenCompleteAsync((v, e) -> {
@@ -905,67 +899,84 @@ logTerm, logIndex, taskId, print(ver), state, newConfig.getVotersList(),
? newHashSet(clusterConfig.getVotersList()) : newHashSet(newConfig.getVotersList());
Set nextLearners = toBePurged
? emptySet() : newHashSet(newConfig.getLearnersList());
- List> onceFutures = newHostingStoreIds.stream()
- .map(storeId -> messenger
- .once(m -> {
- if (m.hasEnsureRangeReply()) {
- EnsureRangeReply reply = m.getEnsureRangeReply();
- return reply.getResult() == EnsureRangeReply.Result.OK;
- }
- return false;
- })
- .orTimeout(5, TimeUnit.SECONDS)
- )
- .collect(Collectors.toList());
- CompletableFuture.allOf(onceFutures.toArray(CompletableFuture[]::new))
- .whenCompleteAsync((v1, t) -> {
- if (t != null) {
- String errorMessage = String.format("ConfigChange aborted[taskId=%s] due to %s",
- taskId, t.getMessage());
- log.warn(errorMessage);
- finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage));
- wal.stepDown();
- return;
- }
- wal.changeClusterConfig(taskId, nextVoters, nextLearners)
- .whenCompleteAsync((v2, e2) -> {
- if (e2 != null) {
- String errorMessage =
- String.format("ConfigChange aborted[taskId=%s] due to %s",
- taskId, e2.getMessage());
- log.debug(errorMessage);
- finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage));
- wal.stepDown();
+ if (wal.isLeader()) {
+ List> onceFutures = newHostingStoreIds.stream()
+ .map(storeId -> messenger
+ .once(m -> {
+ if (m.hasEnsureRangeReply()) {
+ EnsureRangeReply reply = m.getEnsureRangeReply();
+ return reply.getResult() == EnsureRangeReply.Result.OK;
}
- // postpone finishing command when config entry is applied
- }, fsmExecutor);
- }, fsmExecutor);
- newHostingStoreIds.forEach(storeId -> {
- log.debug("Send EnsureRequest: taskId={}, targetStoreId={}", taskId, storeId);
- ClusterConfig ensuredClusterConfig = ClusterConfig.getDefaultInstance();
- messenger.send(KVRangeMessage.newBuilder()
- .setRangeId(id)
- .setHostStoreId(storeId)
- .setEnsureRange(EnsureRange.newBuilder()
- .setVer(ver) // ensure the new kvrange is compatible in target store
- .setBoundary(boundary)
- .setInitSnapshot(Snapshot.newBuilder()
- .setTerm(0)
- .setIndex(0)
- .setClusterConfig(ensuredClusterConfig) // empty voter set
- .setData(KVRangeSnapshot.newBuilder()
- .setVer(ver)
- .setId(id)
- // no checkpoint specified
- .setLastAppliedIndex(0)
- .setBoundary(boundary)
- .setState(state)
- .setClusterConfig(ensuredClusterConfig)
- .build().toByteString())
+ return false;
+ })
+ .orTimeout(5, TimeUnit.SECONDS)
+ )
+ .collect(Collectors.toList());
+ CompletableFuture.allOf(onceFutures.toArray(CompletableFuture[]::new))
+ .whenCompleteAsync((v1, t) -> {
+ if (t != null) {
+ String errorMessage = String.format("ConfigChange aborted[taskId=%s] due to %s",
+ taskId, t.getMessage());
+ log.warn(errorMessage);
+ finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage));
+ wal.stepDown();
+ return;
+ }
+ wal.changeClusterConfig(taskId, nextVoters, nextLearners)
+ .whenCompleteAsync((v2, e2) -> {
+ if (e2 != null) {
+ String errorMessage =
+ String.format("ConfigChange aborted[taskId=%s] due to %s",
+ taskId, e2.getMessage());
+ log.debug(errorMessage);
+ finishCommandWithError(taskId,
+ new KVRangeException.TryLater(errorMessage));
+ wal.stepDown();
+ }
+ // postpone finishing command when config entry is applied
+ }, fsmExecutor);
+ }, fsmExecutor);
+ newHostingStoreIds.forEach(storeId -> {
+ log.debug("Send EnsureRequest: taskId={}, targetStoreId={}", taskId, storeId);
+ ClusterConfig ensuredClusterConfig = ClusterConfig.getDefaultInstance();
+ messenger.send(KVRangeMessage.newBuilder()
+ .setRangeId(id)
+ .setHostStoreId(storeId)
+ .setEnsureRange(EnsureRange.newBuilder()
+ .setVer(ver) // ensure the new kvrange is compatible in target store
+ .setBoundary(boundary)
+ .setInitSnapshot(Snapshot.newBuilder()
+ .setTerm(0)
+ .setIndex(0)
+ .setClusterConfig(ensuredClusterConfig) // empty voter set
+ .setData(KVRangeSnapshot.newBuilder()
+ .setVer(ver)
+ .setId(id)
+ // no checkpoint specified
+ .setLastAppliedIndex(0)
+ .setBoundary(boundary)
+ .setState(state)
+ .setClusterConfig(ensuredClusterConfig)
+ .build().toByteString())
+ .build())
.build())
- .build())
- .build());
- });
+ .build());
+ });
+ } else {
+ wal.changeClusterConfig(taskId, nextVoters, nextLearners)
+ .whenCompleteAsync((v2, e2) -> {
+ if (e2 != null) {
+ String errorMessage =
+ String.format("ConfigChange aborted[taskId=%s] due to %s",
+ taskId, e2.getMessage());
+ log.debug(errorMessage);
+ finishCommandWithError(taskId,
+ new KVRangeException.TryLater(errorMessage));
+ wal.stepDown();
+ }
+ // postpone finishing command when config entry is applied
+ }, fsmExecutor);
+ }
if (state.getType() == Normal) {
if (toBePurged) {
rangeWriter.state(State.newBuilder()
@@ -1495,7 +1506,6 @@ private CompletableFuture restore(KVRangeSnapshot snapshot,
return restorer.restoreFrom(leader, snapshot)
.handle((result, ex) -> {
if (ex != null) {
- log.warn("Restored from snapshot error: \n{}", snapshot, ex);
return onInstalled.call(null, ex);
} else {
return onInstalled.call(kvRange.checkpoint(), null);
@@ -1504,10 +1514,9 @@ private CompletableFuture restore(KVRangeSnapshot snapshot,
.thenCompose(f -> f)
.whenCompleteAsync(unwrap((v, e) -> {
if (e != null) {
- if (e instanceof SnapshotException) {
- log.error("Failed to apply snapshot to WAL \n{}", snapshot, e);
- // WAL and FSM are inconsistent, need to quit and recreate again
- quitSignal.complete(null);
+ if (e instanceof SnapshotException.ObsoleteSnapshotException) {
+ log.debug("Obsolete snapshot, reset kvRange to latest snapshot: \n{}", snapshot);
+ kvRange.toReseter(wal.latestSnapshot()).done();
}
} else {
linearizer.afterLogApplied(snapshot.getLastAppliedIndex());
@@ -1564,6 +1573,11 @@ private void shrinkWAL() {
}
private CompletableFuture compactWAL() {
+ // cancel all on-going dump sessions
+ dumpSessions.forEach((sessionId, session) -> {
+ session.cancel();
+ dumpSessions.remove(sessionId, session);
+ });
return mgmtTaskRunner.add(this::doCompactWAL);
}
@@ -1592,7 +1606,7 @@ private void detectZombieState(KVRangeDescriptor descriptor) {
}
}
- private void checkZombieState() {
+ private void checkAndRepairFromZombieState() {
if (zombieAt > 0
&& Duration.ofMillis(HLC.INST.getPhysical() - zombieAt).toSeconds() > opts.getZombieTimeoutSec()) {
ClusterConfig clusterConfig = wal.latestClusterConfig();
@@ -1604,7 +1618,8 @@ private void checkZombieState() {
clusterConfig);
wal.recover().whenComplete((v, e) -> recovering.set(false));
}
- } else {
+ } else if (!clusterConfig.getVotersList().contains(hostStoreId)
+ && !clusterConfig.getLearnersList().contains(hostStoreId)) {
log.info("Zombie state detected, send quit signal.");
quitSignal.complete(null);
}
diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java
index 42f63ab79..c121a241d 100644
--- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java
+++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java
@@ -72,8 +72,7 @@ class KVRangeWALSubscription implements IKVRangeWALSubscription {
applyRunner.add(restore(task))
.handle((snap, e) -> fetchRunner.add(() -> {
if (e != null) {
- log.error(
- "Failed to install snapshot\n{}", snap);
+ log.error("Failed to restore from snapshot\n{}", task.snapshot, e);
return;
}
log.debug("Snapshot installed\n{}", snap);
diff --git a/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java b/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java
index c39045fa6..44abd56e4 100644
--- a/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java
+++ b/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basekv.utils;
@@ -22,10 +22,6 @@
import static org.apache.bifromq.basekv.utils.BoundaryUtil.endKey;
import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKey;
-import org.apache.bifromq.basekv.proto.Boundary;
-import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
-import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
-import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
import com.google.protobuf.ByteString;
import java.util.Comparator;
import java.util.HashMap;
@@ -37,6 +33,10 @@
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.stream.Collectors;
+import org.apache.bifromq.basekv.proto.Boundary;
+import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
+import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
+import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
/**
* Utilities for processing descriptor.
@@ -100,14 +100,21 @@ public static EffectiveRoute getEffectiveRoute(EffectiveEpoch effectiveEpoch) {
for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch.storeDescriptors()) {
for (KVRangeDescriptor rangeDescriptor : storeDescriptor.getRangesList()) {
if (rangeDescriptor.getRole() == RaftNodeStatus.Leader) {
- ByteString startKey = startKey(rangeDescriptor.getBoundary());
- if (startKey == null) {
- firstLeaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor));
- continue;
+ switch (rangeDescriptor.getState()) {
+ case Normal, ConfigChanging, PreparedMerging, WaitingForMerge -> {
+ ByteString startKey = startKey(rangeDescriptor.getBoundary());
+ if (startKey == null) {
+ firstLeaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor));
+ continue;
+ }
+ sortedLeaderRanges.computeIfAbsent(startKey,
+ k -> new TreeSet<>(Comparator.comparingLong(l -> l.descriptor().getId().getId())))
+ .add(new LeaderRange(rangeDescriptor, storeDescriptor));
+ }
+ default -> {
+ // skip other states
+ }
}
- sortedLeaderRanges.computeIfAbsent(startKey,
- k -> new TreeSet<>(Comparator.comparingLong(l -> l.descriptor().getId().getId())))
- .add(new LeaderRange(rangeDescriptor, storeDescriptor));
}
}
}
diff --git a/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java b/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java
index 3b724b5af..40b19a232 100644
--- a/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java
+++ b/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java
@@ -19,16 +19,15 @@
package org.apache.bifromq.basekv.utils;
+import static org.apache.bifromq.basekv.proto.State.StateType.Merged;
+import static org.apache.bifromq.basekv.proto.State.StateType.Normal;
+import static org.apache.bifromq.basekv.proto.State.StateType.PreparedMerging;
+import static org.apache.bifromq.basekv.proto.State.StateType.Removed;
import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveEpoch;
import static org.apache.bifromq.basekv.utils.DescriptorUtil.organizeByEpoch;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.basekv.proto.Boundary;
-import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
-import org.apache.bifromq.basekv.proto.KVRangeId;
-import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
-import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
import com.google.protobuf.ByteString;
import java.util.Arrays;
import java.util.HashSet;
@@ -37,6 +36,11 @@
import java.util.NavigableMap;
import java.util.Set;
import java.util.stream.Collectors;
+import org.apache.bifromq.basekv.proto.Boundary;
+import org.apache.bifromq.basekv.proto.KVRangeDescriptor;
+import org.apache.bifromq.basekv.proto.KVRangeId;
+import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor;
+import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus;
import org.testng.annotations.Test;
public class DescriptorUtilTest {
@@ -460,6 +464,7 @@ public void getEffectiveRouteContiguousChain() {
KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder()
.setId(id1)
.setRole(RaftNodeStatus.Leader)
+ .setState(Normal)
.setBoundary(boundaryBuilder1.build())
.build();
@@ -470,6 +475,7 @@ public void getEffectiveRouteContiguousChain() {
KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder()
.setId(id2)
.setRole(RaftNodeStatus.Leader)
+ .setState(Normal)
.setBoundary(boundaryBuilder2.build())
.build();
@@ -479,6 +485,7 @@ public void getEffectiveRouteContiguousChain() {
KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder()
.setId(id3)
.setRole(RaftNodeStatus.Leader)
+ .setState(Normal)
.setBoundary(boundaryBuilder3.build())
.build();
@@ -517,11 +524,13 @@ public void getEffectiveRouteSelectsSmallestVer() {
KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder()
.setId(id1)
.setRole(RaftNodeStatus.Leader)
+ .setState(Normal)
.setBoundary(boundary)
.build();
KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder()
.setId(id2)
.setRole(RaftNodeStatus.Leader)
+ .setState(Normal)
.setBoundary(boundary)
.build();
@@ -568,4 +577,257 @@ public void getEffectiveRouteWithNoLeaders() {
assertTrue(routeMap.isEmpty());
}
-}
\ No newline at end of file
+
+ @Test
+ public void getEffectiveRouteFiltersByState() {
+ KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build();
+ KVRangeId id3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build();
+ KVRangeId id4 = KVRangeId.newBuilder().setEpoch(1).setId(4).build();
+
+ // Allowed states
+ KVRangeDescriptor rNormal = KVRangeDescriptor.newBuilder()
+ .setId(id1)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("b")).build())
+ .setState(Normal)
+ .build();
+ KVRangeDescriptor rPreparedMerging = KVRangeDescriptor.newBuilder()
+ .setId(id2)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder()
+ .setStartKey(ByteString.copyFromUtf8("b"))
+ .setEndKey(ByteString.copyFromUtf8("m"))
+ .build())
+ .setState(PreparedMerging)
+ .build();
+
+ // Disallowed states
+ KVRangeDescriptor rMerged = KVRangeDescriptor.newBuilder()
+ .setId(id3)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder()
+ .setStartKey(ByteString.copyFromUtf8("m"))
+ .setEndKey(ByteString.copyFromUtf8("z"))
+ .build())
+ .setState(Merged)
+ .build();
+ KVRangeDescriptor rRemoved = KVRangeDescriptor.newBuilder()
+ .setId(id4)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder()
+ .setStartKey(ByteString.copyFromUtf8("z"))
+ .build())
+ .setState(Removed)
+ .build();
+
+ KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder()
+ .setId("store1")
+ .addRanges(rNormal)
+ .addRanges(rPreparedMerging)
+ .addRanges(rMerged)
+ .addRanges(rRemoved)
+ .build();
+
+ Set storeDescriptors = new HashSet<>();
+ storeDescriptors.add(storeDescriptor);
+
+ EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors);
+ EffectiveRoute effectiveRoute = DescriptorUtil.getEffectiveRoute(effectiveEpoch);
+ NavigableMap routeMap = effectiveRoute.leaderRanges();
+
+ // Only two allowed ranges should be present
+ assertEquals(routeMap.size(), 2);
+ List ids = routeMap.values().stream().map(lr -> lr.descriptor().getId().getId()).toList();
+ assertTrue(ids.contains(1L));
+ assertTrue(ids.contains(2L));
+ }
+
+ @Test
+ public void getEffectiveRoutePrefersNullStartKeyAsFirst() {
+ KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build();
+
+ // First range without startKey (should be chosen as first)
+ KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder()
+ .setId(id1)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder()
+ .setEndKey(ByteString.copyFromUtf8("m"))
+ .build())
+ .setState(Normal)
+ .build();
+
+ // Second range with explicit startKey
+ KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder()
+ .setId(id2)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build())
+ .setState(Normal)
+ .build();
+
+ KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder()
+ .setId("store1")
+ .addRanges(r2)
+ .addRanges(r1)
+ .build();
+
+ Set storeDescriptors = new HashSet<>();
+ storeDescriptors.add(storeDescriptor);
+
+ EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors);
+ NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges();
+
+ assertEquals(routeMap.firstEntry().getValue().descriptor().getId(), id1);
+ }
+
+ @Test
+ public void getEffectiveRouteStopsAtNullEndKey() {
+ KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build();
+ KVRangeId id3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build();
+
+ KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder()
+ .setId(id1)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("b")).build())
+ .setState(Normal)
+ .build();
+ // Tail range with null endKey
+ KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder()
+ .setId(id2)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("b")).build())
+ .setState(Normal)
+ .build();
+ // An extra range that should never be reached after tail
+ KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder()
+ .setId(id3)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(
+ Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("c")).setEndKey(ByteString.copyFromUtf8("d"))
+ .build())
+ .setState(Normal)
+ .build();
+
+ KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder()
+ .setId("store1")
+ .addRanges(r1)
+ .addRanges(r2)
+ .addRanges(r3)
+ .build();
+
+ Set storeDescriptors = new HashSet<>();
+ storeDescriptors.add(storeDescriptor);
+
+ EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors);
+ NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges();
+
+ // Should stop at r2 (endKey null)
+ assertEquals(routeMap.size(), 2);
+ List ids = routeMap.values().stream().map(lr -> lr.descriptor().getId().getId()).toList();
+ assertTrue(ids.contains(1L));
+ assertTrue(ids.contains(2L));
+ }
+
+ @Test
+ public void getEffectiveRouteAllowsGapsByCeilingStartKey() {
+ KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build();
+
+ KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder()
+ .setId(id1)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder()
+ .setEndKey(ByteString.copyFromUtf8("b"))
+ .build())
+ .setState(Normal)
+ .build();
+ KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder()
+ .setId(id2)
+ .setRole(RaftNodeStatus.Leader)
+ .setBoundary(Boundary.newBuilder()
+ .setStartKey(ByteString.copyFromUtf8("c"))
+ .setEndKey(ByteString.copyFromUtf8("z"))
+ .build())
+ .setState(Normal)
+ .build();
+
+ KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder()
+ .setId("store1")
+ .addRanges(r1)
+ .addRanges(r2)
+ .build();
+
+ Set storeDescriptors = new HashSet<>();
+ storeDescriptors.add(storeDescriptor);
+
+ EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors);
+ NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges();
+ assertEquals(routeMap.size(), 2);
+ }
+
+ @Test
+ public void organizeByEpochRetainsStoresWithoutRangesInEpoch() {
+ // store1 has epoch 1 & 2 ranges, store2 has only epoch 2
+ KVRangeId id11 = KVRangeId.newBuilder().setEpoch(1).setId(1).build();
+ KVRangeId id21 = KVRangeId.newBuilder().setEpoch(2).setId(1).build();
+ KVRangeDescriptor r11 = KVRangeDescriptor.newBuilder()
+ .setId(id11)
+ .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build())
+ .build();
+ KVRangeDescriptor r21 = KVRangeDescriptor.newBuilder()
+ .setId(id21)
+ .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("n")).build())
+ .build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("store1").addRanges(r11).addRanges(r21)
+ .build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("store2").addRanges(r21).build();
+
+ Set set = new HashSet<>();
+ set.add(s1);
+ set.add(s2);
+
+ NavigableMap> byEpoch = DescriptorUtil.organizeByEpoch(set);
+ // Epoch 1 should contain both stores, with store2 having 0 ranges
+ assertTrue(byEpoch.containsKey(1L));
+ Set epoch1 = byEpoch.get(1L);
+ assertEquals(epoch1.size(), 2);
+ for (KVRangeStoreDescriptor d : epoch1) {
+ if (d.getId().equals("store1")) {
+ assertEquals(d.getRangesCount(), 1);
+ assertEquals(d.getRanges(0).getId().getEpoch(), 1L);
+ } else if (d.getId().equals("store2")) {
+ assertEquals(d.getRangesCount(), 0);
+ }
+ }
+ }
+
+ @Test
+ public void getEffectiveEpochOldestSelectionWithMixedStores() {
+ // store1 has epoch 2, store2 has epoch 3; no epoch 1 present => pick epoch 2
+ KVRangeId id2 = KVRangeId.newBuilder().setEpoch(2).setId(1).build();
+ KVRangeId id3 = KVRangeId.newBuilder().setEpoch(3).setId(1).build();
+ KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder().setId(id2).build();
+ KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder().setId(id3).build();
+
+ KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(r2).build();
+ KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").addRanges(r3).build();
+ Set set = new HashSet<>();
+ set.add(s1);
+ set.add(s2);
+
+ Set result = DescriptorUtil.getEffectiveEpoch(set).get().storeDescriptors();
+ assertEquals(result.size(), 2);
+ // All descriptors in effective epoch must be epoch 2 versions of both stores, with s2 having 0 ranges
+ for (KVRangeStoreDescriptor d : result) {
+ if (d.getId().equals("s1")) {
+ assertEquals(d.getRangesCount(), 1);
+ assertEquals(d.getRanges(0).getId().getEpoch(), 2L);
+ } else if (d.getId().equals("s2")) {
+ assertEquals(d.getRangesCount(), 0);
+ }
+ }
+ }
+}
diff --git a/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java b/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java
index 32c2dd5e3..6d418ce5d 100644
--- a/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java
+++ b/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java
@@ -74,12 +74,20 @@ private void logWithMDC(Supplier> isEnabled,
if (lvl.isEmpty()) {
return;
}
+ Object[] evaluated = args;
+ if (args != null && args.length > 0) {
+ evaluated = new Object[args.length];
+ for (int i = 0; i < args.length; i++) {
+ Object a = args[i];
+ evaluated[i] = (a instanceof Supplier) ? ((Supplier>) a).get() : a;
+ }
+ }
for (int i = 0; i < tags.length; i += 2) {
MDC.put(tags[i], tags[i + 1]);
}
Map extraCtx = extraContext();
extraCtx.forEach(MDC::put);
- delegate.log(marker, FQCN, lvl.get().toInt(), msg, args, t);
+ delegate.log(marker, FQCN, lvl.get().toInt(), msg, evaluated, t);
for (int i = 0; i < tags.length; i += 2) {
MDC.remove(tags[i]);
}
diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java
index dcf7a1353..46b0b2c5b 100644
--- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java
+++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java
@@ -21,8 +21,6 @@
import static io.grpc.stub.ClientCalls.asyncBidiStreamingCall;
-import org.apache.bifromq.baserpc.RPCContext;
-import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject;
import io.grpc.CallOptions;
import io.grpc.Channel;
import io.grpc.Context;
@@ -36,10 +34,11 @@
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.baserpc.RPCContext;
+import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject;
@Slf4j
class BiDiStream implements IBiDiStream {
- private final String tenantId;
private final String serverId;
private final ClientCallStreamObserver callStreamObserver;
private final Subject outSubject = PublishSubject.create();
@@ -54,7 +53,6 @@ class BiDiStream implements IBiDiStream {
MethodDescriptor methodDescriptor,
Map metadata,
CallOptions callOptions) {
- this.tenantId = tenantId;
this.serverId = serverId;
Context ctx = Context.ROOT.fork()
.withValue(RPCContext.TENANT_ID_CTX_KEY, tenantId)
diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java
index f753ba63b..e4dc3a4d4 100644
--- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java
+++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java
@@ -19,16 +19,6 @@
package org.apache.bifromq.baserpc.client;
-import org.apache.bifromq.baseenv.EnvProvider;
-import org.apache.bifromq.baseenv.NettyEnv;
-import org.apache.bifromq.baserpc.BluePrint;
-import org.apache.bifromq.baserpc.client.interceptor.TenantAwareClientInterceptor;
-import org.apache.bifromq.baserpc.client.loadbalancer.IServerSelector;
-import org.apache.bifromq.baserpc.client.loadbalancer.TrafficDirectiveLoadBalancerProvider;
-import org.apache.bifromq.baserpc.client.nameresolver.TrafficGovernorNameResolverProvider;
-import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject;
-import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceLandscape;
-import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService;
import com.google.common.util.concurrent.MoreExecutors;
import io.grpc.Channel;
import io.grpc.ConnectivityState;
@@ -50,6 +40,16 @@
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import lombok.Builder;
+import org.apache.bifromq.baseenv.EnvProvider;
+import org.apache.bifromq.baseenv.NettyEnv;
+import org.apache.bifromq.baserpc.BluePrint;
+import org.apache.bifromq.baserpc.client.interceptor.TenantAwareClientInterceptor;
+import org.apache.bifromq.baserpc.client.loadbalancer.IServerSelector;
+import org.apache.bifromq.baserpc.client.loadbalancer.TrafficDirectiveLoadBalancerProvider;
+import org.apache.bifromq.baserpc.client.nameresolver.TrafficGovernorNameResolverProvider;
+import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject;
+import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceLandscape;
+import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService;
class ClientChannel implements IClientChannel {
private final String serviceUniqueName;
@@ -136,7 +136,7 @@ public Observable>> serverList() {
@Override
public Observable serverSelectorObservable() {
- return serverSelectorSubject;
+ return serverSelectorSubject.distinctUntilChanged();
}
@Override
diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java
index 27f63fcfa..03159dcd3 100644
--- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java
+++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java
@@ -116,14 +116,14 @@ final boolean isReady() {
abstract void onServiceUnavailable();
private void reportNoServerAvailable() {
- log.debug("Stream@{} no server available to target: method={}",
- this.hashCode(), methodDescriptor.getBareMethodName());
+ log.debug("Stream@{} no server available to target: method={}, state={}",
+ this.hashCode(), methodDescriptor.getBareMethodName(), state.get());
onNoServerAvailable();
}
private void reportServiceUnavailable() {
- log.debug("Stream@{} service unavailable to target: method={}",
- this.hashCode(), methodDescriptor.getBareMethodName());
+ log.debug("Stream@{} service unavailable to target: method={}, state={}",
+ this.hashCode(), methodDescriptor.getBareMethodName(), state.get());
onServiceUnavailable();
}
@@ -246,12 +246,12 @@ void close() {
private void gracefulRetarget() {
if (state.compareAndSet(State.Normal, State.PendingRetarget)) {
- log.debug("Stream@{} start graceful retarget process: method={}",
- this.hashCode(), methodDescriptor.getBareMethodName());
+ log.debug("Stream@{} start graceful retarget process: method={}, state={}",
+ this.hashCode(), methodDescriptor.getBareMethodName(), state.get());
if (prepareRetarget()) {
// if it's ready to retarget, close it and start a new one
- log.debug("Stream@{} close current bidi-stream immediately before retargeting: method={}",
- this.hashCode(), methodDescriptor.getBareMethodName());
+ log.debug("Stream@{} close current bidi-stream immediately before retargeting: method={}, state={}",
+ this.hashCode(), methodDescriptor.getBareMethodName(), state.get());
state.set(State.Retargeting);
bidiStream.get().close();
scheduleRetargetNow();
@@ -272,10 +272,12 @@ private void scheduleRetargetNow() {
private void scheduleRetarget(Duration delay) {
if (retargetScheduled.compareAndSet(false, true)) {
- log.debug("Stream@{} schedule retarget task in {}ms: method={}",
- this.hashCode(), delay.toMillis(), methodDescriptor.getBareMethodName());
- CompletableFuture.runAsync(() -> retarget(this.serverSelector),
- CompletableFuture.delayedExecutor(delay.toMillis(), MILLISECONDS));
+ log.debug("Stream@{} schedule retarget task in {}ms: method={}, state={}",
+ this.hashCode(), delay.toMillis(), methodDescriptor.getBareMethodName(), state.get());
+ CompletableFuture.runAsync(() -> {
+ retargetScheduled.set(false);
+ retarget(this.serverSelector);
+ }, CompletableFuture.delayedExecutor(delay.toMillis(), MILLISECONDS));
}
}
@@ -328,7 +330,6 @@ private void retarget(IServerSelector serverSelector) {
}
}
}
- retargetScheduled.set(false);
if (serverSelector != this.serverSelector) {
// server selector has been changed, schedule a retarget
scheduleRetargetNow();
@@ -338,10 +339,11 @@ private void retarget(IServerSelector serverSelector) {
private void target(String serverId) {
if (state.compareAndSet(State.Init, State.Normal)
|| state.compareAndSet(State.StreamDisconnect, State.Normal)
+ || state.compareAndSet(State.PendingRetarget, State.Normal)
|| state.compareAndSet(State.NoServerAvailable, State.Normal)
|| state.compareAndSet(State.Retargeting, State.Normal)) {
- log.debug("Stream@{} build bidi-stream to target server[{}]: method={}",
- this.hashCode(), serverId, methodDescriptor.getBareMethodName());
+ log.debug("Stream@{} build stream to server[{}]: method={}, state={}",
+ this.hashCode(), serverId, methodDescriptor.getBareMethodName(), state.get());
BidiStreamContext bidiStreamContext = BidiStreamContext.from(new BiDiStream<>(
tenantId,
serverId,
@@ -350,13 +352,13 @@ private void target(String serverId) {
metadataSupplier.get(),
callOptions));
bidiStream.set(bidiStreamContext);
- onStreamCreated();
bidiStreamContext.subscribe(this::onNext, this::onError, this::onCompleted);
bidiStreamContext.onReady(ts -> onStreamReady());
+ onStreamCreated();
}
if (bidiStream.get().bidiStream().isReady()) {
- log.debug("Stream@{} ready after build to server[{}]: method={}",
- this.hashCode(), serverId, methodDescriptor.getBareMethodName());
+ log.debug("Stream@{} ready: method={}, state={}",
+ this.hashCode(), methodDescriptor.getBareMethodName(), state.get());
onStreamReady();
}
}
@@ -367,8 +369,8 @@ private void onNext(OutT out) {
if (state.get() == State.PendingRetarget && canStartRetarget()) {
// do not close the stream inline
CompletableFuture.runAsync(() -> {
- log.debug("Stream@{} close current bidi-stream before retargeting: method={}",
- this.hashCode(), methodDescriptor.getBareMethodName());
+ log.debug("Stream@{} close current stream before retargeting: method={}, state={}",
+ this.hashCode(), methodDescriptor.getBareMethodName(), state.get());
state.set(State.Retargeting);
bidiStream.get().close();
scheduleRetargetNow();
@@ -377,18 +379,33 @@ private void onNext(OutT out) {
}
private void onError(Throwable t) {
- log.debug("BidiStream@{} error: method={}", this.hashCode(), methodDescriptor.getBareMethodName(), t);
- state.compareAndSet(State.Normal, State.StreamDisconnect);
+ log.debug("Stream@{} error: method={}, state={}",
+ this.hashCode(), methodDescriptor.getBareMethodName(), state.get(), t);
+ State s = state.get();
+ if (s == State.Normal || s == State.PendingRetarget) {
+ state.compareAndSet(s, State.StreamDisconnect);
+ }
onStreamError(t);
- scheduleRetargetWithRandomDelay();
+ if (s == State.PendingRetarget) {
+ scheduleRetargetNow();
+ } else {
+ scheduleRetargetWithRandomDelay();
+ }
}
private void onCompleted() {
- log.debug("BidiStream@{} complete: method={}", this.hashCode(), methodDescriptor.getBareMethodName());
+ log.debug("Stream@{} close by server: method={}, state={}",
+ this.hashCode(), methodDescriptor.getBareMethodName(), state.get());
// server gracefully close the stream
- state.compareAndSet(State.Normal, State.StreamDisconnect);
- onStreamError(new CancellationException("server close the bidi-stream"));
- scheduleRetargetWithRandomDelay();
+ State s = state.get();
+ if (s == State.Normal || s == State.PendingRetarget) {
+ state.compareAndSet(s, State.StreamDisconnect);
+ }
+ onStreamError(new CancellationException("Server shutdown"));
+ if (s == State.PendingRetarget) {
+ scheduleRetargetNow();
+ }
+ // wait for selector change to trigger retargeting
}
enum State {
diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java
index 2539d9cdc..1f6c38572 100644
--- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java
+++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java
@@ -19,17 +19,16 @@
package org.apache.bifromq.baserpc.client.loadbalancer;
-import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.IN_PROC_SERVER_ATTR_KEY;
-import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_GROUP_TAG_ATTR_KEY;
-import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_ID_ATTR_KEY;
import static com.google.common.base.Preconditions.checkNotNull;
import static io.grpc.ConnectivityState.CONNECTING;
import static io.grpc.ConnectivityState.IDLE;
import static io.grpc.ConnectivityState.READY;
import static io.grpc.ConnectivityState.SHUTDOWN;
import static io.grpc.ConnectivityState.TRANSIENT_FAILURE;
+import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.IN_PROC_SERVER_ATTR_KEY;
+import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_GROUP_TAG_ATTR_KEY;
+import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_ID_ATTR_KEY;
-import org.apache.bifromq.baseenv.EnvProvider;
import com.google.common.collect.Maps;
import io.grpc.Attributes;
import io.grpc.ConnectivityState;
@@ -48,6 +47,7 @@
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.IntStream;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.baseenv.EnvProvider;
@Slf4j
public class TrafficDirectiveLoadBalancer extends LoadBalancer {
@@ -72,6 +72,12 @@ public class TrafficDirectiveLoadBalancer extends LoadBalancer {
this.currentPicker = new SubChannelPicker();
}
+ private static Set difference(Set a, Set b) {
+ Set aCopy = new HashSet<>(a);
+ aCopy.removeAll(b);
+ return aCopy;
+ }
+
@Override
public void handleResolvedAddresses(ResolvedAddresses resolvedAddresses) {
log.debug("Handle traffic change: resolvedAddresses={}", resolvedAddresses);
@@ -183,25 +189,11 @@ private void updateBalancingState() {
currentPicker.refresh(serverChannels);
helper.updateBalancingState(newState, currentPicker);
- Map allServers = currentServers;
- ITenantRouter tenantRouter =
- new TenantRouter(currentServers, currentTrafficDirective, currentServerGroupTags);
- updateListener.onUpdate(new IServerSelector() {
- @Override
- public boolean exists(String serverId) {
- return allServers.containsKey(serverId);
- }
-
- @Override
- public IServerGroupRouter get(String tenantId) {
- return tenantRouter.get(tenantId);
- }
-
- @Override
- public String toString() {
- return allServers.toString();
- }
- });
+ if (newState == READY || (newState == TRANSIENT_FAILURE && currentServers.isEmpty())) {
+ // notify when channel is ready or TRANSIENT_FAILURE state and no servers available
+ updateListener.onUpdate(
+ new TenantAwareServerSelector(currentServers, currentServerGroupTags, currentTrafficDirective));
+ }
}
balancingStateUpdateScheduled.set(false);
}
@@ -275,10 +267,4 @@ private void updateSubChannelState(Subchannel subchannel, ConnectivityStateInfo
subchannel.requestConnection();
}
}
-
- private static Set difference(Set a, Set b) {
- Set aCopy = new HashSet<>(a);
- aCopy.removeAll(b);
- return aCopy;
- }
}
diff --git a/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java b/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java
index 2f740f0c4..7439c173d 100644
--- a/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java
+++ b/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java
@@ -14,13 +14,11 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.baserpc.server;
-import org.apache.bifromq.base.util.FutureTracker;
-import org.apache.bifromq.baserpc.metrics.RPCMetric;
import io.grpc.Status;
import io.grpc.stub.StreamObserver;
import io.micrometer.core.instrument.Timer;
@@ -28,6 +26,8 @@
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.base.util.FutureTracker;
+import org.apache.bifromq.baserpc.metrics.RPCMetric;
@Slf4j
abstract class AbstractResponsePipeline extends AbstractStreamObserver {
@@ -40,6 +40,7 @@ abstract class AbstractResponsePipeline extends AbstractStr
protected AbstractResponsePipeline(StreamObserver responseObserver) {
super(responseObserver);
+ this.responseObserver.setOnCancelHandler(this::cleanup);
}
@Override
@@ -80,7 +81,7 @@ private void close(Throwable t) {
/**
* Handle the request and return the result via completable future, remember always throw exception asynchronously
- * Returned future complete exceptionally will cause pipeline close
+ * Returned future complete exceptionally will cause pipeline close.
*
* @param tenantId the tenantId
* @param request the request
@@ -110,7 +111,6 @@ final CompletableFuture startHandlingRequest(RequestT request) {
return respFuture;
}
-
final void emitResponse(RequestT req, ResponseT resp) {
if (!isClosed()) {
log.trace("Response sent in pipeline@{}: request={}, response={}", hashCode(), req, resp);
@@ -125,7 +125,6 @@ final void emitResponse(RequestT req, ResponseT resp) {
protected void afterClose() {
}
-
private void fail(Throwable throwable) {
if (!isClosed()) {
if (throwable instanceof CancellationException) {
diff --git a/base-rpc/base-rpc-traffic-governor/pom.xml b/base-rpc/base-rpc-traffic-governor/pom.xml
index 7e3e47a08..78932bdb9 100644
--- a/base-rpc/base-rpc-traffic-governor/pom.xml
+++ b/base-rpc/base-rpc-traffic-governor/pom.xml
@@ -38,10 +38,26 @@
org.apache.bifromq
base-rpc-grpc-inproc
+
org.awaitility
awaitility
+
+ org.apache.logging.log4j
+ log4j-api
+ test
+
+
+ org.apache.logging.log4j
+ log4j-core
+ test
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+ test
+
diff --git a/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java b/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java
index 4cd67863a..b0724d0c6 100644
--- a/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java
+++ b/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java
@@ -14,24 +14,20 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.baserpc.trafficgovernor;
-import static org.apache.bifromq.baserpc.trafficgovernor.SharedScheduler.RPC_SHARED_SCHEDULER;
import static java.util.Collections.emptySet;
+import static org.apache.bifromq.baserpc.trafficgovernor.SharedScheduler.RPC_SHARED_SCHEDULER;
-import org.apache.bifromq.basecrdt.service.ICRDTService;
-import org.apache.bifromq.basehlc.HLC;
-import org.apache.bifromq.baserpc.proto.RPCServer;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.google.protobuf.ByteString;
import io.grpc.inprocess.InProcessSocketAddress;
import io.reactivex.rxjava3.core.Observable;
import io.reactivex.rxjava3.disposables.CompositeDisposable;
-import io.reactivex.rxjava3.disposables.Disposable;
import io.reactivex.rxjava3.subjects.BehaviorSubject;
import java.net.InetSocketAddress;
import java.util.Map;
@@ -41,6 +37,10 @@
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.base.util.RendezvousHash;
+import org.apache.bifromq.basecrdt.service.ICRDTService;
+import org.apache.bifromq.basehlc.HLC;
+import org.apache.bifromq.baserpc.proto.RPCServer;
@Slf4j
class RPCServiceTrafficManager extends RPCServiceAnnouncer
@@ -131,8 +131,8 @@ private Set refreshAliveServerList(Map announ
for (RPCServer server : announcedServers.values()) {
if (aliveAnnouncers.contains(server.getAnnouncerId())) {
aliveServers.add(build(server));
- } else {
- // this is a side effect: revoke the announcement made by dead announcer
+ } else if (shouldClean(aliveAnnouncers, server.getAnnouncerId())) {
+ // revoke the announcement made by dead announcer
log.debug("Remove not alive server announcement: {}", server.getId());
revoke(server.getId());
}
@@ -140,6 +140,18 @@ private Set refreshAliveServerList(Map announ
return aliveServers;
}
+ private boolean shouldClean(Set aliveAnnouncers, ByteString failedAnnouncer) {
+ aliveAnnouncers.add(id());
+ RendezvousHash hash = RendezvousHash.builder()
+ .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer()))
+ .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer()))
+ .nodes(aliveAnnouncers)
+ .build();
+ ByteString cleaner = hash.get(failedAnnouncer);
+ return cleaner.equals(id());
+ }
+
+
private ServerEndpoint build(RPCServer server) {
return new ServerEndpoint(server.getAgentHostId(),
server.getId(),
@@ -157,41 +169,49 @@ private static class ServerRegistration implements IServerRegistration {
private final RPCServiceTrafficManager manager;
private final AtomicReference localServer;
- private final Disposable disposable;
- private final CompositeDisposable disposables;
+ private final CompositeDisposable myDisposibles = new CompositeDisposable();
+ private final CompositeDisposable allDisposibles;
- private ServerRegistration(RPCServer server, RPCServiceTrafficManager announcer,
- CompositeDisposable disposables) {
+ private ServerRegistration(RPCServer server,
+ RPCServiceTrafficManager announcer,
+ CompositeDisposable allDisposables) {
this.localServer = new AtomicReference<>(server);
this.manager = announcer;
- this.disposables = disposables;
+ this.allDisposibles = allDisposables;
// make an announcement via rpcServiceCRDT
log.debug("Announce local server[{}]:{}", announcer.serviceUniqueName, server);
announcer.announce(localServer.get()).join();
// enforce the announcement consistent eventually
- disposable = announcer.announcedServers()
+ myDisposibles.add(announcer.announcedServers()
.doOnDispose(() -> manager.revoke(localServer.get().getId()).join())
.subscribe(serverMap -> {
RPCServer localServer = this.localServer.get();
if (!serverMap.containsKey(localServer.getId())) {
- RPCServer toUpdate = localServer.toBuilder().setAnnouncedTS(HLC.INST.get()).build();
- log.debug("Re-announce local server: {}", toUpdate);
- // refresh announcement time
- announcer.announce(toUpdate);
+ reannounce();
} else if (localServer.getAnnouncedTS() < serverMap.get(localServer.getId()).getAnnouncedTS()) {
localServer = serverMap.get(localServer.getId());
log.debug("Update local server from announcement: server={}", localServer);
}
- });
- disposables.add(disposable);
+ }));
+ myDisposibles.add(announcer.crdtService.refreshSignal()
+ .subscribe(ts -> reannounce()));
+ allDisposables.add(myDisposibles);
+ }
+
+ private void reannounce() {
+ RPCServer localServer = this.localServer.get();
+ RPCServer toUpdate = localServer.toBuilder().setAnnouncedTS(HLC.INST.get()).build();
+ log.debug("Re-announce local server: {}", toUpdate);
+ // refresh announcement time
+ manager.announce(toUpdate);
}
@Override
public void stop() {
- disposables.remove(disposable);
- disposable.dispose();
+ allDisposibles.remove(myDisposibles);
+ myDisposibles.dispose();
}
}
}
diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java
index d443430a8..754f46284 100644
--- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java
+++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java
@@ -14,32 +14,33 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basescheduler;
-import org.apache.bifromq.basescheduler.exception.BackPressureException;
-import org.apache.bifromq.basescheduler.spi.ICapacityEstimator;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.DistributionSummary;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Metrics;
import io.micrometer.core.instrument.Tags;
import io.micrometer.core.instrument.Timer;
-import java.util.ArrayDeque;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.basescheduler.exception.BackPressureException;
+import org.apache.bifromq.basescheduler.spi.ICapacityEstimator;
@Slf4j
final class Batcher {
@@ -72,7 +73,7 @@ final class Batcher {
this.batchCallBuilder = batchCallBuilder;
this.capacityEstimator = capacityEstimator;
this.maxBurstLatency = maxBurstLatency;
- this.batchPool = new ArrayDeque<>();
+ this.batchPool = new ConcurrentLinkedDeque<>();
this.emaQueueingTime = new EMALong(System::nanoTime, 0.1, 0.9, maxBurstLatency);
Tags tags = Tags.of("name", name, "key", Integer.toUnsignedString(System.identityHashCode(this)));
maxPipelineDepthGauge = Gauge.builder("batcher.pipeline.max", capacityEstimator::maxPipelineDepth)
@@ -96,7 +97,7 @@ public CompletableFuture submit(BatcherKeyT batcherKey, CallT reque
return CompletableFuture.failedFuture(
new RejectedExecutionException("Batcher has been shut down"));
}
- if (emaQueueingTime.get() < maxBurstLatency) {
+ if (Math.max(emaQueueingTime.get(), headCallWaitingNanos()) < maxBurstLatency) {
ICallTask callTask = new CallTask<>(batcherKey, request);
boolean offered = callTaskBuffers.offer(callTask);
assert offered;
@@ -104,10 +105,18 @@ public CompletableFuture submit(BatcherKeyT batcherKey, CallT reque
return callTask.resultPromise();
} else {
dropCounter.increment();
- return CompletableFuture.failedFuture(new BackPressureException("Too high average latency"));
+ return CompletableFuture.failedFuture(new BackPressureException("Batch call busy"));
}
}
+ private long headCallWaitingNanos() {
+ ICallTask head = callTaskBuffers.peek();
+ if (head != null) {
+ return System.nanoTime() - head.ts();
+ }
+ return 0;
+ }
+
public CompletableFuture close() {
if (state.compareAndSet(State.RUNNING, State.SHUTTING_DOWN)) {
checkShutdownCompletion();
@@ -137,6 +146,7 @@ private void cleanupMetrics() {
while ((batchCall = batchPool.poll()) != null) {
batchCall.destroy();
}
+ batchCallBuilder.close();
}
private void trigger() {
@@ -178,30 +188,37 @@ private void batchAndEmit() {
int finalBatchSize = batchSize;
CompletableFuture future = batchCall.execute();
runningBatchCalls.add(future);
- future.whenComplete((v, e) -> {
- runningBatchCalls.remove(future);
- long execEnd = System.nanoTime();
- if (e != null) {
- batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e));
- } else {
- long batchCallLatency = execEnd - execBegin;
- capacityEstimator.record(finalBatchSize, batchCallLatency);
- batchExecTimer.record(batchCallLatency, TimeUnit.NANOSECONDS);
- batchedTasks.forEach(t -> {
- long callLatency = execEnd - t.ts();
- batchCallTimer.record(callLatency, TimeUnit.NANOSECONDS);
- });
- }
- returnBatchCall(batchCall);
- pipelineDepth.getAndDecrement();
- // After each completion, check for shutdown
- if (state.get() == State.SHUTTING_DOWN) {
- checkShutdownCompletion();
- }
- if (!callTaskBuffers.isEmpty()) {
- trigger();
- }
- });
+ future
+ .orTimeout(maxBurstLatency, TimeUnit.NANOSECONDS) // Ensure we don't block indefinitely
+ .whenComplete((v, e) -> {
+ runningBatchCalls.remove(future);
+ long execEnd = System.nanoTime();
+ if (e != null) {
+ if (e instanceof TimeoutException) {
+ batchedTasks.forEach(t -> t.resultPromise()
+ .completeExceptionally(new BackPressureException("Batch Call timeout", e)));
+ } else {
+ batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e));
+ }
+ } else {
+ long batchCallLatency = execEnd - execBegin;
+ capacityEstimator.record(finalBatchSize, batchCallLatency);
+ batchExecTimer.record(batchCallLatency, TimeUnit.NANOSECONDS);
+ batchedTasks.forEach(t -> {
+ long callLatency = execEnd - t.ts();
+ batchCallTimer.record(callLatency, TimeUnit.NANOSECONDS);
+ });
+ }
+ returnBatchCall(batchCall);
+ pipelineDepth.getAndDecrement();
+ // After each completion, check for shutdown
+ if (state.get() == State.SHUTTING_DOWN) {
+ checkShutdownCompletion();
+ }
+ if (!callTaskBuffers.isEmpty()) {
+ trigger();
+ }
+ });
} catch (Throwable e) {
log.error("Batch call failed unexpectedly", e);
batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e));
diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java
index 0f8d898c9..f412a559e 100644
--- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java
+++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java
@@ -19,43 +19,58 @@
package org.apache.bifromq.basescheduler;
-import java.util.concurrent.atomic.AtomicLong;
+import com.google.common.base.Preconditions;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Supplier;
class EMALong {
private static final double NANOS_PER_SECOND = 1_000_000_000.0;
private final Supplier nowSupplier;
- private final double alpha;
- private final double decay;
+ private final double alpha; // (0,1]
+ private final double decay; // (0,1]
private final long decayDelayNanos;
- private final AtomicLong value = new AtomicLong(0);
- private final AtomicLong lastUpdateTime = new AtomicLong(0);
+ private final AtomicReference state;
public EMALong(Supplier nowSupplier, double alpha, double decay, long decayDelayNanos) {
+ Preconditions.checkArgument(alpha > 0.0 && alpha <= 1.0, "alpha must be in (0,1]");
+ Preconditions.checkArgument(decay > 0.0 && decay <= 1.0, "decay must be in (0,1]");
+ Preconditions.checkArgument(decayDelayNanos >= 0, "decayDelayNanos must be non-negative");
this.nowSupplier = nowSupplier;
this.alpha = alpha;
this.decay = decay;
this.decayDelayNanos = decayDelayNanos;
+ this.state = new AtomicReference<>(new State(0L, 0L));
}
public void update(long newValue) {
- value.updateAndGet(v -> {
- lastUpdateTime.set(nowSupplier.get());
- if (v == 0) {
- return newValue;
- } else {
- return (long) Math.ceil(v * (1 - alpha) + newValue * alpha);
+ long now = nowSupplier.get();
+ while (true) {
+ State prev = state.get();
+ long newEma = (prev.ema == 0L) ? newValue : (long) Math.ceil(prev.ema * (1 - alpha) + newValue * alpha);
+ State next = new State(newEma, now);
+ if (state.compareAndSet(prev, next)) {
+ return;
}
- });
+ }
}
public long get() {
long now = nowSupplier.get();
- long lastUpdate = lastUpdateTime.get();
- if (decayDelayNanos < Long.MAX_VALUE && lastUpdate + decayDelayNanos < now) {
- return (long) (value.get()
- * Math.pow(decay, Math.ceil((now - lastUpdate - decayDelayNanos) / NANOS_PER_SECOND)));
+ State s = state.get();
+ if (s.ema == 0L || s.lastTs == 0L) {
+ return s.ema;
+ }
+ if (decayDelayNanos < Long.MAX_VALUE) {
+ long dt = now - s.lastTs;
+ if (dt > decayDelayNanos) {
+ double seconds = Math.ceil((dt - decayDelayNanos) / NANOS_PER_SECOND);
+ double decayed = s.ema * Math.pow(decay, seconds);
+ return decayed < 1.0 ? 0L : Math.round(decayed);
+ }
}
- return value.get();
+ return s.ema;
+ }
+
+ private record State(long ema, long lastTs) {
}
}
diff --git a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java
index 21d3b63e9..1661d5405 100644
--- a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java
+++ b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java
@@ -14,14 +14,14 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basescheduler;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.fail;
-import org.apache.bifromq.basescheduler.exception.BackPressureException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
@@ -33,6 +33,7 @@
import java.util.concurrent.atomic.AtomicInteger;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.basescheduler.exception.BackPressureException;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@@ -61,9 +62,7 @@ public void batchCall() {
executor.submit(() -> {
int i;
while ((i = count.decrementAndGet()) >= 0) {
- scheduler.schedule(i).whenComplete((v, e) -> {
- latch.countDown();
- });
+ scheduler.schedule(i).whenComplete((v, e) -> latch.countDown());
}
});
latch.await();
@@ -93,4 +92,17 @@ public void backPressure() {
assertEquals(e.getCause().getClass(), BackPressureException.class);
}
}
+
+ @Test
+ public void batchCallTimeout() {
+ TestBatchCallScheduler scheduler =
+ new TestBatchCallScheduler(1, Duration.ofNanos(Long.MAX_VALUE), Duration.ofSeconds(1));
+ try {
+ scheduler.schedule(1).join();
+ fail();
+ } catch (Throwable e) {
+ assertEquals(e.getCause().getClass(), BackPressureException.class);
+ }
+ scheduler.close();
+ }
}
diff --git a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java
index 49fd1726a..3ca03f620 100644
--- a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java
+++ b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.basescheduler;
@@ -81,10 +81,10 @@ void testDecayBeforeDelay() {
void testDecayAfterDelay() {
// set decay=0.5, decayDelay=1s
EMALong ema = new EMALong(nowSupplier, 0.5, 0.5, 1_000_000_000L);
- fakeTime.set(0L);
+ fakeTime.set(1L);
ema.update(100L);
// advance time to after delay + 2s total => one decay period
- fakeTime.set(1_000_000_000L + 1_000_000_000L);
+ fakeTime.set(1_000_000_001L + 1_000_000_000L);
// (now - lastUpdate - delay) / 1e9 = (2s - 1s)/1e9 = 1 => ceil(1) =1
// value * decay^1 = 100 * 0.5 = 50
assertEquals(ema.get(), 50);
@@ -94,7 +94,7 @@ void testDecayAfterDelay() {
void testMultipleDecayPeriods() {
// decay=0.5, delay=1s
EMALong ema = new EMALong(nowSupplier, 0.5, 0.5, 1_000_000_000L);
- fakeTime.set(0L);
+ fakeTime.set(1L);
ema.update(80L);
// advance time to after delay + 3.2s => ceil(3.2)=4 periods
fakeTime.set(1_000_000_000L + 3_200_000_000L);
diff --git a/base-util/pom.xml b/base-util/pom.xml
index ca88f6c85..701acd89b 100644
--- a/base-util/pom.xml
+++ b/base-util/pom.xml
@@ -31,6 +31,10 @@
base-util
+
+ com.google.guava
+ guava
+
io.micrometer
micrometer-core
diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java b/base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java
similarity index 94%
rename from bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java
rename to base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java
index b5ec12199..583e33483 100644
--- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java
+++ b/base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java
@@ -14,10 +14,10 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
-package org.apache.bifromq.dist.worker;
+package org.apache.bifromq.base.util;
import static com.google.common.hash.Hashing.murmur3_128;
@@ -31,7 +31,7 @@
* @param The type of the node.
*/
@Builder
-class RendezvousHash {
+public class RendezvousHash {
private final Funnel keyFunnel;
private final Funnel nodeFunnel;
private final Iterable nodes;
diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java b/base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java
similarity index 98%
rename from bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java
rename to base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java
index a08733379..55b954c35 100644
--- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java
+++ b/base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java
@@ -17,7 +17,7 @@
* under the License.
*/
-package org.apache.bifromq.dist.worker;
+package org.apache.bifromq.base.util;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
diff --git a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java
index edb96f62b..b65f248fb 100644
--- a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java
+++ b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java
@@ -135,6 +135,7 @@ public void setup() {
.storeOptions(kvRangeStoreOptions)
.subBrokerManager(subBrokerMgr)
.settingProvider(settingProvider)
+ .bootstrapDelay(Duration.ofSeconds(1))
.build();
distServer = IDistServer.builder()
.rpcServerBuilder(rpcServerBuilder)
@@ -145,7 +146,7 @@ public void setup() {
rpcServer = rpcServerBuilder.build();
rpcServer.start();
- await().until(() -> BoundaryUtil.isValidSplitSet(workerClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(workerClient.latestEffectiveRouter().keySet()));
distClient.connState().filter(s -> s == IRPCClient.ConnState.READY).blockingFirst();
log.info("Setup finished, and start testing");
}
diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java
index e0f87f398..70fcdb47a 100644
--- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java
+++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java
@@ -39,6 +39,7 @@
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
+import org.apache.bifromq.base.util.RendezvousHash;
import org.apache.bifromq.deliverer.IMessageDeliverer;
import org.apache.bifromq.deliverer.TopicMessagePackHolder;
import org.apache.bifromq.dist.worker.schema.GroupMatching;
diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java
index 106d29007..975f0e1d1 100644
--- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java
+++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java
@@ -235,10 +235,11 @@ public void setup() {
.subBrokerManager(receiverManager)
.settingProvider(settingProvider)
.inlineFanoutThreshold(1)
+ .bootstrapDelay(Duration.ofSeconds(1))
.build();
rpcServer = rpcServerBuilder.build();
rpcServer.start();
- await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
log.info("Setup finished, and start testing");
}
diff --git a/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto b/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto
index 66fa081d3..83ce4f89d 100644
--- a/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto
+++ b/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto
@@ -196,9 +196,13 @@ message BatchFetchReply{
repeated Fetched result = 1;
}
+message MatchedRoute{
+ string topicFilter = 1; // the matched topic filter
+ uint64 incarnation = 2; // route incarnation
+}
// insert won't change version & lastActive timestamp
message SubMessagePack{
- map matchedTopicFilters = 1; // key: topicFilter, value: route incarnation
+ repeated MatchedRoute matchedRoute = 1;
commontype.TopicMessagePack messages = 2;
}
@@ -219,9 +223,8 @@ message InsertResult{
NO_INBOX = 1;
}
message SubStatus{
- string topicFilter = 1;
- uint64 incarnation = 2;
- bool rejected = 3;
+ MatchedRoute matchedRoute = 1;
+ bool rejected = 2; // true if the route is outdated
}
Code code = 1;
repeated SubStatus result = 2;
diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java
index 941a3865c..77c0d4280 100644
--- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java
+++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.inbox.server;
@@ -22,6 +22,15 @@
import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap;
import static org.apache.bifromq.plugin.subbroker.TypeUtil.toResult;
+import java.time.Duration;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import lombok.extern.slf4j.Slf4j;
import org.apache.bifromq.base.util.AsyncRetry;
import org.apache.bifromq.base.util.exception.RetryTimeoutException;
import org.apache.bifromq.basekv.client.exception.BadVersionException;
@@ -34,6 +43,7 @@
import org.apache.bifromq.inbox.server.scheduler.IInboxInsertScheduler;
import org.apache.bifromq.inbox.storage.proto.InsertRequest;
import org.apache.bifromq.inbox.storage.proto.InsertResult;
+import org.apache.bifromq.inbox.storage.proto.MatchedRoute;
import org.apache.bifromq.inbox.storage.proto.SubMessagePack;
import org.apache.bifromq.plugin.subbroker.DeliveryPack;
import org.apache.bifromq.plugin.subbroker.DeliveryReply;
@@ -41,14 +51,6 @@
import org.apache.bifromq.sysprops.props.DataPlaneMaxBurstLatencyMillis;
import org.apache.bifromq.type.MatchInfo;
import org.apache.bifromq.type.TopicMessagePack;
-import org.apache.bifromq.util.TopicUtil;
-import java.time.Duration;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.CompletableFuture;
-import lombok.extern.slf4j.Slf4j;
@Slf4j
class InboxWriter implements InboxWriterPipeline.ISendRequestHandler {
@@ -62,7 +64,7 @@ class InboxWriter implements InboxWriterPipeline.ISendRequestHandler {
@Override
public CompletableFuture handle(SendRequest request) {
- Map> matchInfosByInbox = new HashMap<>();
+ Map> matchInfosByInbox = new HashMap<>();
Map> subMsgPacksByInbox = new HashMap<>();
// break DeliveryPack into SubMessagePack by each TenantInboxInstance
for (String tenantId : request.getRequest().getPackageMap().keySet()) {
@@ -71,11 +73,15 @@ public CompletableFuture handle(SendRequest request) {
Map subMsgPackByInbox = new HashMap<>();
for (MatchInfo matchInfo : pack.getMatchInfoList()) {
TenantInboxInstance tenantInboxInstance = TenantInboxInstance.from(tenantId, matchInfo);
- matchInfosByInbox.computeIfAbsent(tenantInboxInstance, k -> new LinkedList<>()).add(matchInfo);
+ MatchedRoute matchedRoute = MatchedRoute.newBuilder()
+ .setTopicFilter(matchInfo.getMatcher().getMqttTopicFilter())
+ .setIncarnation(matchInfo.getIncarnation())
+ .build();
+ matchInfosByInbox.computeIfAbsent(tenantInboxInstance, k -> new HashMap<>())
+ .put(matchedRoute, matchInfo);
subMsgPackByInbox.computeIfAbsent(tenantInboxInstance,
k -> SubMessagePack.newBuilder().setMessages(topicMessagePack))
- .putMatchedTopicFilters(matchInfo.getMatcher().getMqttTopicFilter(),
- matchInfo.getIncarnation());
+ .addMatchedRoute(matchedRoute);
}
for (TenantInboxInstance tenantInboxInstance : subMsgPackByInbox.keySet()) {
subMsgPacksByInbox.computeIfAbsent(tenantInboxInstance, k -> new LinkedList<>())
@@ -127,20 +133,21 @@ public CompletableFuture handle(SendRequest request) {
Map> tenantMatchResultMap = new HashMap<>();
int i = 0;
for (TenantInboxInstance tenantInboxInstance : subMsgPacksByInbox.keySet()) {
- String receiverId = tenantInboxInstance.receiverId();
+ Map matchedRoutesMap = matchInfosByInbox.get(tenantInboxInstance);
InsertResult result = replyFutures.get(i++).join();
Map matchResultMap =
tenantMatchResultMap.computeIfAbsent(tenantInboxInstance.tenantId(), k -> new HashMap<>());
switch (result.getCode()) {
- case OK -> result.getResultList().forEach(insertionResult -> {
- DeliveryResult.Code code =
- insertionResult.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK;
- matchResultMap.putIfAbsent(MatchInfo.newBuilder().setReceiverId(receiverId)
- .setMatcher(TopicUtil.from(insertionResult.getTopicFilter()))
- .setIncarnation(insertionResult.getIncarnation()).build(), code);
- });
+ case OK -> {
+ Function resultFinder =
+ getFinalResultFinder(result.getResultList());
+ for (MatchedRoute matchedRoute : matchedRoutesMap.keySet()) {
+ matchResultMap.putIfAbsent(matchedRoutesMap.get(matchedRoute),
+ resultFinder.apply(matchedRoute));
+ }
+ }
case NO_INBOX -> {
- for (MatchInfo matchInfo : matchInfosByInbox.get(tenantInboxInstance)) {
+ for (MatchInfo matchInfo : matchedRoutesMap.values()) {
matchResultMap.putIfAbsent(matchInfo, DeliveryResult.Code.NO_RECEIVER);
}
}
@@ -155,4 +162,44 @@ public CompletableFuture handle(SendRequest request) {
.build()).build();
}));
}
+
+ private Function getFinalResultFinder(List subStatuses) {
+ Function resultFinder = getResultFinder(subStatuses);
+ return matchedRoute -> {
+ DeliveryResult.Code code = resultFinder.apply(matchedRoute);
+ if (code == null) {
+ // incompleted result from coproc
+ log.warn("MatchedRoute {} is missing in result", matchedRoute);
+ return DeliveryResult.Code.NO_SUB;
+ }
+ return code;
+ };
+ }
+
+ private Function getResultFinder(
+ List subStatuses) {
+ if (subStatuses.size() == 1) {
+ InsertResult.SubStatus onlyStatus = subStatuses.get(0);
+ return matchedRoute -> {
+ if (matchedRoute.equals(onlyStatus.getMatchedRoute())) {
+ return onlyStatus.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK;
+ }
+ return null;
+ };
+ } else if (subStatuses.size() < 10) {
+ return matchedRoute -> {
+ for (InsertResult.SubStatus status : subStatuses) {
+ if (status.getMatchedRoute().equals(matchedRoute)) {
+ return status.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK;
+ }
+ }
+ return null;
+ };
+ } else {
+ Map resultMap = subStatuses.stream()
+ .collect(Collectors.toMap(InsertResult.SubStatus::getMatchedRoute,
+ e -> e.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK));
+ return resultMap::get;
+ }
+ }
}
diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java
index 7306ea6e3..bf560d9d8 100644
--- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java
+++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.inbox.server;
@@ -26,15 +26,6 @@
import static org.mockito.ArgumentMatchers.anyString;
import static org.mockito.Mockito.when;
-import org.apache.bifromq.baserpc.client.IRPCClient;
-import org.apache.bifromq.baserpc.server.IRPCServer;
-import org.apache.bifromq.baserpc.server.RPCServerBuilder;
-import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService;
-import org.apache.bifromq.plugin.eventcollector.IEventCollector;
-import org.apache.bifromq.plugin.settingprovider.ISettingProvider;
-import org.apache.bifromq.plugin.settingprovider.Setting;
-import org.apache.bifromq.retain.client.IRetainClient;
-import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler;
import java.lang.reflect.Method;
import java.time.Duration;
import java.util.concurrent.CompletableFuture;
@@ -51,11 +42,20 @@
import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService;
import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions;
import org.apache.bifromq.basekv.utils.BoundaryUtil;
+import org.apache.bifromq.baserpc.client.IRPCClient;
+import org.apache.bifromq.baserpc.server.IRPCServer;
+import org.apache.bifromq.baserpc.server.RPCServerBuilder;
+import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService;
import org.apache.bifromq.dist.client.IDistClient;
import org.apache.bifromq.dist.client.MatchResult;
import org.apache.bifromq.dist.client.UnmatchResult;
import org.apache.bifromq.inbox.client.IInboxClient;
import org.apache.bifromq.inbox.store.IInboxStore;
+import org.apache.bifromq.plugin.eventcollector.IEventCollector;
+import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler;
+import org.apache.bifromq.plugin.settingprovider.ISettingProvider;
+import org.apache.bifromq.plugin.settingprovider.Setting;
+import org.apache.bifromq.retain.client.IRetainClient;
import org.apache.bifromq.sessiondict.client.ISessionDictClient;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
@@ -144,6 +144,7 @@ public void setup() {
.tickerThreads(tickerThreads)
.bgTaskExecutor(bgTaskExecutor)
.detachTimeout(Duration.ofSeconds(2))
+ .bootstrapDelay(Duration.ofSeconds(1))
.build();
inboxServer = IInboxServer.builder()
.rpcServerBuilder(rpcServerBuilder)
@@ -153,7 +154,7 @@ public void setup() {
.build();
rpcServer = rpcServerBuilder.build();
rpcServer.start();
- await().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreClient.latestEffectiveRouter().keySet()));
inboxClient.connState().filter(s -> s == IRPCClient.ConnState.READY).blockingFirst();
log.info("Setup finished, and start testing");
}
diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java
index 1d811d3e0..67a51d029 100644
--- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java
+++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.inbox.server;
@@ -25,16 +25,17 @@
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertEquals;
+import java.util.concurrent.CompletableFuture;
+import lombok.SneakyThrows;
import org.apache.bifromq.inbox.rpc.proto.SendReply;
import org.apache.bifromq.inbox.rpc.proto.SendRequest;
import org.apache.bifromq.inbox.server.scheduler.IInboxInsertScheduler;
import org.apache.bifromq.inbox.storage.proto.InsertRequest;
import org.apache.bifromq.inbox.storage.proto.InsertResult;
+import org.apache.bifromq.inbox.storage.proto.MatchedRoute;
import org.apache.bifromq.plugin.subbroker.DeliveryReply;
import org.apache.bifromq.plugin.subbroker.DeliveryResult;
import org.apache.bifromq.plugin.subbroker.DeliveryResults;
-import java.util.concurrent.CompletableFuture;
-import lombok.SneakyThrows;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import org.testng.annotations.AfterMethod;
@@ -86,8 +87,10 @@ public void insertScheduleRejected() {
InsertResult.newBuilder()
.addResult(InsertResult.SubStatus.newBuilder()
.setRejected(true)
- .setIncarnation(1L)
- .setTopicFilter("/foo/+")
+ .setMatchedRoute(MatchedRoute.newBuilder()
+ .setIncarnation(1L)
+ .setTopicFilter("/foo/+")
+ .build())
.build())
.setCode(InsertResult.Code.OK)
.build()));
@@ -103,8 +106,10 @@ public void insertScheduleOk() {
.setCode(InsertResult.Code.OK)
.addResult(InsertResult.SubStatus.newBuilder()
.setRejected(false)
- .setTopicFilter("/foo/+")
- .setIncarnation(1L)
+ .setMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter("/foo/+")
+ .setIncarnation(1L)
+ .build())
.build())
.build()));
SendRequest request = sendRequest();
diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java
index 3a019d80b..383e06acb 100644
--- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java
+++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.inbox.store;
@@ -116,6 +116,7 @@
import org.apache.bifromq.inbox.storage.proto.InsertRequest;
import org.apache.bifromq.inbox.storage.proto.InsertResult;
import org.apache.bifromq.inbox.storage.proto.LWT;
+import org.apache.bifromq.inbox.storage.proto.MatchedRoute;
import org.apache.bifromq.inbox.storage.proto.SubMessagePack;
import org.apache.bifromq.inbox.store.delay.DelayTaskRunner;
import org.apache.bifromq.inbox.store.delay.ExpireInboxTask;
@@ -966,39 +967,48 @@ private Runnable batchInsert(BatchInsertRequest request,
List bufferMsgList = new ArrayList<>();
Set insertResults = new HashSet<>();
for (SubMessagePack messagePack : params.getMessagePackList()) {
- Map matchedTopicFilters = messagePack.getMatchedTopicFiltersMap();
Map qos0TopicFilters = new HashMap<>();
Map qos1TopicFilters = new HashMap<>();
Map qos2TopicFilters = new HashMap<>();
TopicMessagePack topicMsgPack = messagePack.getMessages();
- for (String matchedTopicFilter : matchedTopicFilters.keySet()) {
- long matchedIncarnation = matchedTopicFilters.get(matchedTopicFilter);
- TopicFilterOption tfOption = metadata.getTopicFiltersMap().get(matchedTopicFilter);
+ for (MatchedRoute matchedRoute : messagePack.getMatchedRouteList()) {
+ long matchedIncarnation = matchedRoute.getIncarnation();
+ TopicFilterOption tfOption = metadata.getTopicFiltersMap().get(matchedRoute.getTopicFilter());
if (tfOption == null) {
- insertResults.add(
- InsertResult.SubStatus.newBuilder().setTopicFilter(matchedTopicFilter)
- .setIncarnation(matchedIncarnation).setRejected(true).build());
+ insertResults.add(InsertResult.SubStatus.newBuilder()
+ .setMatchedRoute(matchedRoute)
+ .setRejected(true)
+ .build());
} else {
if (tfOption.getIncarnation() > matchedIncarnation) {
// messages from old sub incarnation
log.debug("Receive message from previous subscription: topicFilter={}, inc={}, prevInc={}",
- matchedTopicFilter, tfOption.getIncarnation(), matchedIncarnation);
+ matchedRoute, tfOption.getIncarnation(), matchedIncarnation);
+ insertResults.add(InsertResult.SubStatus.newBuilder()
+ .setMatchedRoute(matchedRoute)
+ .setRejected(true)
+ .build());
+ } else {
+ // messages from current incarnation
+ insertResults.add(InsertResult.SubStatus.newBuilder()
+ .setMatchedRoute(matchedRoute)
+ .setRejected(false)
+ .build());
}
switch (tfOption.getQos()) {
- case AT_MOST_ONCE -> qos0TopicFilters.put(matchedTopicFilter, tfOption);
- case AT_LEAST_ONCE -> qos1TopicFilters.put(matchedTopicFilter, tfOption);
- case EXACTLY_ONCE -> qos2TopicFilters.put(matchedTopicFilter, tfOption);
+ case AT_MOST_ONCE -> qos0TopicFilters.put(matchedRoute.getTopicFilter(), tfOption);
+ case AT_LEAST_ONCE -> qos1TopicFilters.put(matchedRoute.getTopicFilter(), tfOption);
+ case EXACTLY_ONCE -> qos2TopicFilters.put(matchedRoute.getTopicFilter(), tfOption);
default -> {
// never happens
}
}
- insertResults.add(InsertResult.SubStatus.newBuilder()
- .setTopicFilter(matchedTopicFilter)
- .setIncarnation(matchedIncarnation)
- .setRejected(false)
- .build());
}
}
+ if (qos0TopicFilters.isEmpty() && qos1TopicFilters.isEmpty() && qos2TopicFilters.isEmpty()) {
+ // no matched topic filter, skip this message pack
+ continue;
+ }
String topic = topicMsgPack.getTopic();
for (TopicMessagePack.PublisherPack publisherPack : topicMsgPack.getMessageList()) {
for (Message message : publisherPack.getMessageList()) {
diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java
index 2d08a10a0..e70fe507c 100644
--- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java
+++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.inbox.store;
@@ -39,6 +39,7 @@
import org.apache.bifromq.inbox.storage.proto.InboxVersion;
import org.apache.bifromq.inbox.storage.proto.InsertRequest;
import org.apache.bifromq.inbox.storage.proto.InsertResult;
+import org.apache.bifromq.inbox.storage.proto.MatchedRoute;
import org.apache.bifromq.inbox.storage.proto.SubMessagePack;
import org.apache.bifromq.plugin.eventcollector.inboxservice.Overflowed;
import org.apache.bifromq.type.ClientInfo;
@@ -61,7 +62,10 @@ public void insertNoInbox() {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 1L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(1L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(message(AT_MOST_ONCE, "hello"))
@@ -86,6 +90,147 @@ public void commitNoInbox() {
assertEquals(commitCode, BatchCommitReply.Code.NO_INBOX);
}
+
+ @Test(groups = "integration")
+ public void insertWithUnmatchedTopicFilterRejected() {
+ long now = 0;
+ String tenantId = "tenantId-" + System.nanoTime();
+ String inboxId = "inboxId-" + System.nanoTime();
+ long incarnation = System.nanoTime();
+ // do not create any subscription, so all matched topic filters will be unmatched
+ ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build();
+ BatchAttachRequest.Params attachParams = BatchAttachRequest.Params.newBuilder()
+ .setInboxId(inboxId)
+ .setIncarnation(incarnation)
+ .setExpirySeconds(2)
+ .setLimit(10)
+ .setClient(client)
+ .setNow(now)
+ .build();
+ requestAttach(attachParams).get(0);
+
+ String unmatchedTF = "/not/subscribed";
+ TopicMessagePack.PublisherPack msg = message(QoS.AT_MOST_ONCE, "hello-unmatched");
+
+ InsertResult insertResult = requestInsert(InsertRequest.newBuilder()
+ .setTenantId(tenantId)
+ .setInboxId(inboxId)
+ .setIncarnation(incarnation)
+ .addMessagePack(SubMessagePack.newBuilder()
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(unmatchedTF)
+ .setIncarnation(1L)
+ .build())
+ .setMessages(TopicMessagePack.newBuilder()
+ .setTopic(unmatchedTF)
+ .addMessage(msg)
+ .build())
+ .build())
+ .build()).get(0);
+
+ // insert is ignored because no subscription matches the topic filter
+ assertEquals(insertResult.getCode(), InsertResult.Code.OK);
+ assertEquals(insertResult.getResultCount(), 1);
+ assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), unmatchedTF);
+ assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L);
+ assertTrue(insertResult.getResult(0).getRejected());
+
+ // no messages should be fetched
+ Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder()
+ .setTenantId(tenantId)
+ .setInboxId(inboxId)
+ .setIncarnation(incarnation)
+ .setMaxFetch(10)
+ .build()).get(0);
+ assertEquals(fetched.getQos0MsgCount(), 0);
+ assertEquals(fetched.getSendBufferMsgCount(), 0);
+ }
+
+ @Test(groups = "integration")
+ public void insertWithOldAndCurrentIncarnationMixed() {
+ long now = 0;
+ String tenantId = "tenantId-" + System.nanoTime();
+ String inboxId = "inboxId-" + System.nanoTime();
+ long incarnation = System.nanoTime();
+ String topicFilter = "/a/b/c";
+
+ ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build();
+ InboxVersion inboxVersion = requestAttach(BatchAttachRequest.Params.newBuilder()
+ .setInboxId(inboxId)
+ .setIncarnation(incarnation)
+ .setExpirySeconds(2)
+ .setLimit(10)
+ .setClient(client)
+ .setNow(now)
+ .build()).get(0);
+
+ requestSub(BatchSubRequest.Params.newBuilder()
+ .setTenantId(tenantId)
+ .setInboxId(inboxId)
+ .setVersion(inboxVersion)
+ .setTopicFilter(topicFilter)
+ .setOption(TopicFilterOption.newBuilder()
+ .setIncarnation(1L)
+ .setQos(QoS.AT_MOST_ONCE)
+ .build())
+ .setMaxTopicFilters(100)
+ .setNow(now)
+ .build());
+
+ TopicMessagePack.PublisherPack msg1 = message(QoS.AT_MOST_ONCE, "keep-me-1");
+ TopicMessagePack.PublisherPack msg2 = message(QoS.AT_MOST_ONCE, "keep-me-2");
+
+ // same topicFilter, same package with 2 matched: one old (0), one current (1)
+ InsertResult insertResult = requestInsert(InsertRequest.newBuilder()
+ .setTenantId(tenantId)
+ .setInboxId(inboxId)
+ .setIncarnation(incarnation)
+ .addMessagePack(SubMessagePack.newBuilder()
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L) // old -> rejected=true
+ .build())
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(1L) // matched -> rejected=false
+ .build())
+ .setMessages(TopicMessagePack.newBuilder()
+ .setTopic(topicFilter)
+ .addMessage(msg1)
+ .addMessage(msg2)
+ .build())
+ .build())
+ .build()).get(0);
+
+ assertEquals(insertResult.getCode(), InsertResult.Code.OK);
+ boolean oldRejected = false;
+ boolean currAccepted = false;
+ for (InsertResult.SubStatus s : insertResult.getResultList()) {
+ if (s.getMatchedRoute().getTopicFilter().equals(topicFilter)
+ && s.getMatchedRoute().getIncarnation() == 0L) {
+ assertTrue(s.getRejected());
+ oldRejected = true;
+ }
+ if (s.getMatchedRoute().getTopicFilter().equals(topicFilter)
+ && s.getMatchedRoute().getIncarnation() == 1L) {
+ assertFalse(s.getRejected());
+ currAccepted = true;
+ }
+ }
+ assertTrue(oldRejected && currAccepted);
+
+ Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder()
+ .setTenantId(tenantId)
+ .setInboxId(inboxId)
+ .setIncarnation(incarnation)
+ .setMaxFetch(10)
+ .build()).get(0);
+
+ assertEquals(fetched.getQos0MsgCount(), 2);
+ assertEquals(fetched.getQos0Msg(0).getMsg().getMessage(), msg1.getMessage(0));
+ assertEquals(fetched.getQos0Msg(1).getMsg().getMessage(), msg2.getMessage(0));
+ }
+
protected void fetchWithoutStartAfter(QoS qos) {
long now = 0;
String tenantId = "tenantId-" + System.nanoTime();
@@ -120,7 +265,10 @@ protected void fetchWithoutStartAfter(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 1L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(1L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg1)
@@ -129,8 +277,8 @@ protected void fetchWithoutStartAfter(QoS qos) {
.build())
.build()).get(0);
assertEquals(insertResult.getCode(), InsertResult.Code.OK);
- assertEquals(insertResult.getResult(0).getTopicFilter(), topicFilter);
- assertEquals(insertResult.getResult(0).getIncarnation(), 1L);
+ assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), topicFilter);
+ assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L);
Fetched fetched = requestFetch(
BatchFetchRequest.Params.newBuilder()
@@ -190,7 +338,10 @@ protected void fetchWithMaxLimit(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 1L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(1L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg1)
@@ -199,8 +350,8 @@ protected void fetchWithMaxLimit(QoS qos) {
.build())
.build()).get(0);
assertEquals(insertResult.getCode(), InsertResult.Code.OK);
- assertEquals(insertResult.getResult(0).getTopicFilter(), topicFilter);
- assertEquals(insertResult.getResult(0).getIncarnation(), 1L);
+ assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), topicFilter);
+ assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L);
Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder()
.setTenantId(tenantId)
@@ -270,7 +421,10 @@ protected void fetchWithStartAfter(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg1)
@@ -284,7 +438,10 @@ protected void fetchWithStartAfter(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg4)
@@ -386,7 +543,10 @@ protected void commit(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg1)
@@ -504,7 +664,10 @@ protected void commitAll(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg1)
@@ -518,7 +681,10 @@ protected void commitAll(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg4)
@@ -587,7 +753,10 @@ protected void insertDropOldest(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg0)
@@ -599,7 +768,10 @@ protected void insertDropOldest(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg1)
@@ -611,7 +783,10 @@ protected void insertDropOldest(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg2)
@@ -646,7 +821,10 @@ protected void insertDropOldest(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg0)
@@ -715,7 +893,10 @@ protected void insertDropYoungest(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg0)
@@ -727,7 +908,10 @@ protected void insertDropYoungest(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg1)
@@ -755,7 +939,10 @@ protected void insertDropYoungest(QoS qos) {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg0)
@@ -824,7 +1011,10 @@ public void insertQoS012() {
.setInboxId(inboxId)
.setIncarnation(incarnation)
.addMessagePack(SubMessagePack.newBuilder()
- .putMatchedTopicFilters(topicFilter, 0L)
+ .addMatchedRoute(MatchedRoute.newBuilder()
+ .setTopicFilter(topicFilter)
+ .setIncarnation(0L)
+ .build())
.setMessages(TopicMessagePack.newBuilder()
.setTopic(topicFilter)
.addMessage(msg0)
diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java
index 73f23877b..fdf2d27e4 100644
--- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java
+++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java
@@ -33,15 +33,6 @@
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
-import org.apache.bifromq.baserpc.client.IConnectable;
-import org.apache.bifromq.baserpc.server.IRPCServer;
-import org.apache.bifromq.baserpc.server.RPCServerBuilder;
-import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService;
-import org.apache.bifromq.plugin.eventcollector.IEventCollector;
-import org.apache.bifromq.plugin.settingprovider.ISettingProvider;
-import org.apache.bifromq.plugin.settingprovider.Setting;
-import org.apache.bifromq.retain.client.IRetainClient;
-import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler;
import com.google.protobuf.ByteString;
import io.micrometer.core.instrument.Gauge;
import io.micrometer.core.instrument.Meter;
@@ -87,6 +78,10 @@
import org.apache.bifromq.basekv.store.proto.RWCoProcInput;
import org.apache.bifromq.basekv.store.proto.ReplyCode;
import org.apache.bifromq.basekv.utils.BoundaryUtil;
+import org.apache.bifromq.baserpc.client.IConnectable;
+import org.apache.bifromq.baserpc.server.IRPCServer;
+import org.apache.bifromq.baserpc.server.RPCServerBuilder;
+import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService;
import org.apache.bifromq.dist.client.IDistClient;
import org.apache.bifromq.inbox.client.IInboxClient;
import org.apache.bifromq.inbox.storage.proto.BatchAttachRequest;
@@ -117,6 +112,11 @@
import org.apache.bifromq.inbox.storage.proto.InsertResult;
import org.apache.bifromq.inbox.storage.proto.Replica;
import org.apache.bifromq.metrics.TenantMetric;
+import org.apache.bifromq.plugin.eventcollector.IEventCollector;
+import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler;
+import org.apache.bifromq.plugin.settingprovider.ISettingProvider;
+import org.apache.bifromq.plugin.settingprovider.Setting;
+import org.apache.bifromq.retain.client.IRetainClient;
import org.apache.bifromq.sessiondict.client.ISessionDictClient;
import org.apache.bifromq.type.ClientInfo;
import org.apache.bifromq.type.Message;
@@ -208,7 +208,7 @@ public void setup() throws IOException {
rpcServer.start();
storeClient.connState().filter(connState -> connState == IConnectable.ConnState.READY).blockingFirst();
- await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
log.info("Setup finished, and start testing");
}
@@ -232,6 +232,7 @@ private void buildStoreServer() {
.bgTaskExecutor(bgTaskExecutor)
.detachTimeout(Duration.ofSeconds(1))
.gcInterval(Duration.ofSeconds(1))
+ .bootstrapDelay(Duration.ofSeconds(1))
.build();
rpcServer = rpcServerBuilder.build();
}
diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java
index 12b10a968..b4911cb85 100644
--- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java
+++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java
@@ -59,7 +59,7 @@ public void gcJobAfterRestart() {
InboxVersion inboxVersion = requestAttach(attachParams).get(0);
restartStoreServer();
- await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
when(sessionDictClient.exist(any())).thenReturn(CompletableFuture.completedFuture(OnlineCheckResult.NOT_EXISTS));
ArgumentCaptor deleteCaptor = ArgumentCaptor.forClass(DeleteRequest.class);
verify(inboxClient, timeout(10000)).delete(deleteCaptor.capture());
diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java
index 262bb628c..1609614d6 100644
--- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java
+++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.inbox.store;
@@ -73,7 +73,7 @@ public void collectAfterRestart() {
restartStoreServer();
- await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
Gauge newSubCountGauge = getSubCountGauge(tenantId);
Gauge newPSessionGauge = getPSessionGauge(tenantId);
Gauge newPSessionSpaceGauge = getPSessionSpaceGauge(tenantId);
diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java
index 855fe427b..a1e593dc4 100644
--- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java
+++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java
@@ -184,6 +184,7 @@ public final void setupClass() {
.resourceThrottler(resourceThrottler)
.tickerThreads(tickerThreads)
.bgTaskExecutor(bgTaskExecutor)
+ .bootstrapDelay(Duration.ofSeconds(1))
.storeOptions(new KVRangeStoreOptions()
.setDataEngineConfigurator(new InMemKVEngineConfigurator())
.setWalEngineConfigurator(new InMemKVEngineConfigurator()))
@@ -214,6 +215,7 @@ public final void setupClass() {
.retainStoreClient(retainStoreKVStoreClient)
.tickerThreads(tickerThreads)
.bgTaskExecutor(bgTaskExecutor)
+ .bootstrapDelay(Duration.ofSeconds(1))
.storeOptions(new KVRangeStoreOptions()
.setDataEngineConfigurator(new InMemKVEngineConfigurator())
.setWalEngineConfigurator(new InMemKVEngineConfigurator()))
@@ -243,6 +245,7 @@ public final void setupClass() {
.distWorkerClient(distWorkerStoreClient)
.tickerThreads(tickerThreads)
.bgTaskExecutor(bgTaskExecutor)
+ .bootstrapDelay(Duration.ofSeconds(1))
.storeOptions(new KVRangeStoreOptions()
.setDataEngineConfigurator(new InMemKVEngineConfigurator())
.setWalEngineConfigurator(new InMemKVEngineConfigurator()))
@@ -299,9 +302,9 @@ public final void setupClass() {
.filter(state -> state == IRPCClient.ConnState.READY)
.firstElement()
.blockingSubscribe();
- await().until(() -> BoundaryUtil.isValidSplitSet(distWorkerStoreClient.latestEffectiveRouter().keySet()));
- await().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreKVStoreClient.latestEffectiveRouter().keySet()));
- await().until(() -> BoundaryUtil.isValidSplitSet(retainStoreKVStoreClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(distWorkerStoreClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreKVStoreClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(retainStoreKVStoreClient.latestEffectiveRouter().keySet()));
lenient().when(settingProvider.provide(any(), anyString()))
.thenAnswer(invocation -> {
Setting setting = invocation.getArgument(0);
diff --git a/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java b/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java
index a59619185..a6af28b2b 100644
--- a/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java
+++ b/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java
@@ -28,10 +28,14 @@
*/
@Slf4j
public enum Setting {
- MQTT3Enabled(Boolean.class, val -> true, true), MQTT4Enabled(Boolean.class, val -> true, true),
- MQTT5Enabled(Boolean.class, val -> true, true), DebugModeEnabled(Boolean.class, val -> true, false),
- ForceTransient(Boolean.class, val -> true, false), ByPassPermCheckError(Boolean.class, val -> true, true),
- PayloadFormatValidationEnabled(Boolean.class, val -> true, true), RetainEnabled(Boolean.class, val -> true, true),
+ MQTT3Enabled(Boolean.class, val -> true, true),
+ MQTT4Enabled(Boolean.class, val -> true, true),
+ MQTT5Enabled(Boolean.class, val -> true, true),
+ DebugModeEnabled(Boolean.class, val -> true, false),
+ ForceTransient(Boolean.class, val -> true, false),
+ ByPassPermCheckError(Boolean.class, val -> true, true),
+ PayloadFormatValidationEnabled(Boolean.class, val -> true, true),
+ RetainEnabled(Boolean.class, val -> true, true),
WildcardSubscriptionEnabled(Boolean.class, val -> true, true),
SubscriptionIdentifierEnabled(Boolean.class, val -> true, true),
SharedSubscriptionEnabled(Boolean.class, val -> true, true),
diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java
index f751fbaf5..abdbe24ca 100644
--- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java
+++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.retain.store;
@@ -22,9 +22,9 @@
import static org.awaitility.Awaitility.await;
import static org.testng.Assert.assertNotSame;
-import org.apache.bifromq.basekv.utils.BoundaryUtil;
import io.micrometer.core.instrument.Gauge;
import java.time.Duration;
+import org.apache.bifromq.basekv.utils.BoundaryUtil;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
@@ -47,7 +47,7 @@ public void testLoadMetadata() {
Gauge retainCountGauge = getRetainCountGauge(tenantId);
restartStoreServer();
- await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
Gauge newSpaceUsageGauge = getSpaceUsageGauge(tenantId);
Gauge newRetainCountGauge = getRetainCountGauge(tenantId);
diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java
index 926e78a08..4f792c2dd 100644
--- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java
+++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java
@@ -14,7 +14,7 @@
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
- * under the License.
+ * under the License.
*/
package org.apache.bifromq.retain.store;
@@ -28,6 +28,25 @@
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
+import com.google.protobuf.ByteString;
+import io.micrometer.core.instrument.Gauge;
+import io.micrometer.core.instrument.Meter;
+import io.micrometer.core.instrument.Metrics;
+import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.Duration;
+import java.util.Comparator;
+import java.util.Objects;
+import java.util.UUID;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.atomic.AtomicReference;
+import lombok.extern.slf4j.Slf4j;
import org.apache.bifromq.basecluster.AgentHostOptions;
import org.apache.bifromq.basecluster.IAgentHost;
import org.apache.bifromq.basecrdt.service.CRDTServiceOptions;
@@ -69,25 +88,6 @@
import org.apache.bifromq.type.ClientInfo;
import org.apache.bifromq.type.Message;
import org.apache.bifromq.type.TopicMessage;
-import com.google.protobuf.ByteString;
-import io.micrometer.core.instrument.Gauge;
-import io.micrometer.core.instrument.Meter;
-import io.micrometer.core.instrument.Metrics;
-import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.time.Duration;
-import java.util.Comparator;
-import java.util.Objects;
-import java.util.UUID;
-import java.util.concurrent.ScheduledExecutorService;
-import java.util.concurrent.ScheduledThreadPoolExecutor;
-import java.util.concurrent.ThreadLocalRandom;
-import java.util.concurrent.atomic.AtomicReference;
-import lombok.extern.slf4j.Slf4j;
import org.mockito.MockitoAnnotations;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
@@ -143,7 +143,7 @@ public void setup() throws IOException {
.metaService(metaService).build();
buildStoreServer();
rpcServer.start();
- await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
+ await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet()));
log.info("Setup finished, and start testing");
}
@@ -158,6 +158,7 @@ private void buildStoreServer() {
.tickerThreads(tickerThreads)
.bgTaskExecutor(bgTaskExecutor)
.gcInterval(Duration.ofSeconds(60))
+ .bootstrapDelay(Duration.ofSeconds(1))
.build();
rpcServer = rpcServerBuilder.build();
}
diff --git a/build/build-bifromq-starter/conf/log4j2.xml b/build/build-bifromq-starter/conf/log4j2.xml
index a646e40ab..c3fd5569e 100644
--- a/build/build-bifromq-starter/conf/log4j2.xml
+++ b/build/build-bifromq-starter/conf/log4j2.xml
@@ -41,7 +41,7 @@
%d{yyyy-MM-dd HH:mm:ss.SSS} %5p [%t] --- [%F:%L] %m - [id=%X{id},term=%X{term},state=%X{state},leader=%X{leader},f=%X{first},l=%X{last}],c=%X{commit},cfg=%X{config}]%n
]]>