diff --git a/.github/workflows/build-cov.yaml b/.github/workflows/build-cov.yaml index 5fa51550e..bd0c4b8e6 100644 --- a/.github/workflows/build-cov.yaml +++ b/.github/workflows/build-cov.yaml @@ -1,6 +1,7 @@ name: Cov-Build on: + workflow_dispatch: pull_request: branches: - 'main' diff --git a/base-cluster/pom.xml b/base-cluster/pom.xml index 34c978411..1bcc08082 100644 --- a/base-cluster/pom.xml +++ b/base-cluster/pom.xml @@ -33,6 +33,10 @@ org.apache.bifromq base-env-provider + + org.apache.bifromq + base-util + org.apache.bifromq base-hlc diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java index 1633b3cf5..d74230271 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/AgentHost.java @@ -14,14 +14,31 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; -import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI; import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import com.google.protobuf.ByteString; +import io.micrometer.core.instrument.Metrics; +import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.core.Scheduler; +import io.reactivex.rxjava3.disposables.CompositeDisposable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import java.net.InetSocketAddress; +import java.time.Duration; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; import org.apache.bifromq.basecluster.fd.FailureDetector; import org.apache.bifromq.basecluster.fd.IFailureDetector; @@ -43,23 +60,6 @@ import org.apache.bifromq.basecrdt.store.ICRDTStore; import org.apache.bifromq.basecrdt.store.proto.CRDTStoreMessage; import org.apache.bifromq.baseenv.EnvProvider; -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.protobuf.ByteString; -import io.micrometer.core.instrument.Metrics; -import io.micrometer.core.instrument.binder.jvm.ExecutorServiceMetrics; -import io.reactivex.rxjava3.core.Observable; -import io.reactivex.rxjava3.core.Scheduler; -import io.reactivex.rxjava3.disposables.CompositeDisposable; -import io.reactivex.rxjava3.schedulers.Schedulers; -import java.net.InetSocketAddress; -import java.time.Duration; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ScheduledThreadPoolExecutor; -import java.util.concurrent.atomic.AtomicReference; -import lombok.extern.slf4j.Slf4j; @Slf4j final class AgentHost implements IAgentHost { @@ -173,6 +173,11 @@ public Observable>> landscape() { return memberList.landscape(); } + @Override + public Observable refuteSignal() { + return memberList.refuteSignal(); + } + @Override public void close() { if (state.compareAndSet(State.STARTED, State.STOPPING)) { diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java index e804f4e55..d5b26aa47 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/IAgentHost.java @@ -14,11 +14,16 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; +import io.reactivex.rxjava3.core.Observable; +import java.net.InetSocketAddress; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basecluster.memberlist.HostAddressResolver; import org.apache.bifromq.basecluster.memberlist.IHostAddressResolver; import org.apache.bifromq.basecluster.memberlist.agent.IAgent; @@ -26,11 +31,6 @@ import org.apache.bifromq.basecluster.transport.ITransport; import org.apache.bifromq.basecluster.transport.TCPTransport; import org.apache.bifromq.basecluster.transport.Transport; -import io.reactivex.rxjava3.core.Observable; -import java.net.InetSocketAddress; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.CompletableFuture; /** * Agent host defines the interface for hosting agents and joining the cluster. @@ -101,6 +101,14 @@ static IAgentHost newInstance(AgentHostOptions options) { */ Observable>> landscape(); + /** + * Emits a signal whenever the local host actively refutes a suspicion of being dead. + * Each emission carries the timestamp (in millis) when the refutation occurred. + * + * @return an observable stream of refutation timestamps + */ + Observable refuteSignal(); + /** * Shutdown the agent host. */ diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java index df208694a..25868a209 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/HostMemberList.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist; @@ -25,28 +25,8 @@ import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; import static org.apache.bifromq.basecrdt.store.ReplicaIdGenerator.generate; -import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; -import org.apache.bifromq.basecluster.memberlist.agent.Agent; -import org.apache.bifromq.basecluster.memberlist.agent.AgentAddressProvider; -import org.apache.bifromq.basecluster.memberlist.agent.AgentMessenger; -import org.apache.bifromq.basecluster.memberlist.agent.IAgent; -import org.apache.bifromq.basecluster.membership.proto.Doubt; -import org.apache.bifromq.basecluster.membership.proto.Fail; -import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; -import org.apache.bifromq.basecluster.membership.proto.HostMember; -import org.apache.bifromq.basecluster.membership.proto.Join; -import org.apache.bifromq.basecluster.membership.proto.Quit; -import org.apache.bifromq.basecluster.messenger.IMessenger; -import org.apache.bifromq.basecluster.proto.ClusterMessage; -import org.apache.bifromq.basecrdt.core.api.IORMap; -import org.apache.bifromq.basecrdt.core.api.MVRegOperation; -import org.apache.bifromq.basecrdt.core.api.ORMapOperation; -import org.apache.bifromq.basecrdt.proto.Replica; -import org.apache.bifromq.basecrdt.store.ICRDTStore; -import org.apache.bifromq.basehlc.HLC; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; -import com.google.common.collect.Sets; import com.google.protobuf.AbstractMessageLite; import com.google.protobuf.ByteString; import io.micrometer.core.instrument.Gauge; @@ -57,6 +37,7 @@ import io.reactivex.rxjava3.core.Scheduler; import io.reactivex.rxjava3.disposables.CompositeDisposable; import io.reactivex.rxjava3.subjects.BehaviorSubject; +import io.reactivex.rxjava3.subjects.PublishSubject; import java.net.InetSocketAddress; import java.util.HashSet; import java.util.Iterator; @@ -69,7 +50,30 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; +import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; +import org.apache.bifromq.basecluster.memberlist.agent.Agent; +import org.apache.bifromq.basecluster.memberlist.agent.AgentAddressProvider; +import org.apache.bifromq.basecluster.memberlist.agent.AgentMessenger; +import org.apache.bifromq.basecluster.memberlist.agent.IAgent; +import org.apache.bifromq.basecluster.membership.proto.Doubt; +import org.apache.bifromq.basecluster.membership.proto.Fail; +import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; +import org.apache.bifromq.basecluster.membership.proto.HostMember; +import org.apache.bifromq.basecluster.membership.proto.Join; +import org.apache.bifromq.basecluster.membership.proto.Quit; +import org.apache.bifromq.basecluster.messenger.IMessenger; +import org.apache.bifromq.basecluster.proto.ClusterMessage; +import org.apache.bifromq.basecrdt.core.api.IORMap; +import org.apache.bifromq.basecrdt.core.api.MVRegOperation; +import org.apache.bifromq.basecrdt.core.api.ORMapOperation; +import org.apache.bifromq.basecrdt.proto.Replica; +import org.apache.bifromq.basecrdt.store.ICRDTStore; +import org.apache.bifromq.basehlc.HLC; +/** + * HostMemberList implementation using CRDT for achieving a consistent view of the host members in the cluster. + */ @Slf4j public class HostMemberList implements IHostMemberList { private final AtomicReference state = new AtomicReference<>(State.JOINED); @@ -79,12 +83,25 @@ public class HostMemberList implements IHostMemberList { private final IHostAddressResolver addressResolver; private final BehaviorSubject> membershipSubject = BehaviorSubject.createDefault( new ConcurrentHashMap<>()); + private final PublishSubject refuteSubject = PublishSubject.create(); private final Map agentMap = new ConcurrentHashMap<>(); private final IORMap hostListCRDT; private final CompositeDisposable disposables = new CompositeDisposable(); private final MetricManager metricManager; private final String[] tags; private volatile HostMember local; + + /** + * Constructor of HostMemberList. + * + * @param bindAddr the address to bind the host member + * @param port the port to bind the host member + * @param messenger the messenger to use for communication + * @param scheduler the scheduler to use for scheduling tasks + * @param store the CRDT store to use for storing internal OR-Map + * @param addressResolver the address resolver to resolve host endpoints to addresses + * @param tags the tags to be used for metrics + */ public HostMemberList(String bindAddr, int port, IMessenger messenger, @@ -134,10 +151,13 @@ private boolean join(HostMember member) { if (joined) { // add it into crdt log.debug("Member[{}] joins the cluster: local={}", member, local); - Optional memberInCRDT = getHostMember(hostListCRDT, member.getEndpoint()); - if (memberInCRDT.isEmpty() || memberInCRDT.get().getIncarnation() < member.getIncarnation()) { - hostListCRDT.execute(ORMapOperation.update(member.getEndpoint().toByteString()) - .with(MVRegOperation.write(member.toByteString()))); + if (member == local) { + // only update crdt if it's local member + Optional memberInCRDT = getHostMember(hostListCRDT, member.getEndpoint()); + if (memberInCRDT.isEmpty() || memberInCRDT.get().getIncarnation() < member.getIncarnation()) { + hostListCRDT.execute(ORMapOperation.update(member.getEndpoint().toByteString()) + .with(MVRegOperation.write(member.toByteString()))); + } } // update crdt landscape store.join(hostListCRDT.id(), currentMembers().keySet().stream() @@ -148,12 +168,11 @@ private boolean join(HostMember member) { } } - private void drop(HostEndpoint memberEndpoint, int incarnation) { + private void drop(HostEndpoint memberEndpoint, int incarnation, boolean fromQuit) { synchronized (this) { boolean removed = removeMember(memberEndpoint, incarnation); Optional memberInCRDT = getHostMember(hostListCRDT, memberEndpoint); - if (memberInCRDT.isPresent()) { - // remove it from crdt if any + if (!fromQuit && memberInCRDT.isPresent() && shouldReportFailure(memberInCRDT.get().getEndpoint())) { hostListCRDT.execute(ORMapOperation.remove(memberEndpoint.toByteString()).of(mvreg)); } if (removed) { @@ -165,6 +184,17 @@ private void drop(HostEndpoint memberEndpoint, int incarnation) { } } + private boolean shouldReportFailure(HostEndpoint failedMemberEndpoint) { + // if local member is responsible for removing the failed member from CRDT + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer())) + .nodes(currentMembers().keySet()) + .build(); + HostEndpoint reporter = hash.get(failedMemberEndpoint); + return reporter.getId().equals(local.getEndpoint().getId()); + } + @Override public boolean isZombie(HostEndpoint endpoint) { return !endpoint.getId().equals(local.getEndpoint().getId()) @@ -207,6 +237,7 @@ public CompletableFuture stop() { .thenCompose(v -> store.stopHosting(hostListCRDT.id())) .whenComplete((v, e) -> { membershipSubject.onComplete(); + refuteSubject.onComplete(); metricManager.close(); state.set(State.QUITED); }); @@ -226,6 +257,8 @@ private void renew(int atLeastIncarnation) { synchronized (this) { local = local.toBuilder().setIncarnation(Math.max(local.getIncarnation(), atLeastIncarnation) + 1).build(); join(local); + agentMap.values().forEach(Agent::refreshRegistration); + refuteSubject.onNext(HLC.INST.get()); } } @@ -247,7 +280,6 @@ public IAgent host(String agentId) { tags)); local = local.toBuilder() .setIncarnation(local.getIncarnation() + 1) - .addAgentId(agentId) // deprecate since 3.3.3 .putAgent(agentId, agentEndpoint.getIncarnation()) .build(); join(local); @@ -265,8 +297,6 @@ public CompletableFuture stopHosting(String agentId) { synchronized (this) { local = local.toBuilder() .setIncarnation(local.getIncarnation() + 1) - .clearAgentId() - .addAllAgentId(agentMap.keySet()) // deprecate since 3.3.3 .clearAgent() .putAllAgent(Maps.transformValues(agentMap, a -> a.local().getIncarnation())) .build(); @@ -279,7 +309,12 @@ public CompletableFuture stopHosting(String agentId) { @Override public Observable>> landscape() { - return membershipSubject.map(m -> Maps.transformValues(m, v -> Sets.newHashSet(v.getAgentIdList()))); + return membershipSubject.map(m -> Maps.transformValues(m, v -> v.getAgentMap().keySet())); + } + + @Override + public Observable refuteSignal() { + return refuteSubject; } private Map currentMembers() { @@ -327,6 +362,9 @@ private void handleMessage(ClusterMessage message) { case QUIT -> handleQuit(message.getQuit()); case FAIL -> handleFail(message.getFail()); case DOUBT -> handleDoubt(message.getDoubt()); + default -> { + // never happen + } } } @@ -363,7 +401,7 @@ private void handleFail(Fail fail) { } else if (isZombie(failedEndpoint)) { clearZombie(failedEndpoint); } else { - drop(failedEndpoint, fail.getIncarnation()); + drop(failedEndpoint, fail.getIncarnation(), false); } } @@ -371,7 +409,7 @@ private void handleQuit(Quit quit) { HostEndpoint quitEndpoint = quit.getEndpoint(); log.debug("Member[{}] quits the cluster", quitEndpoint); if (!quitEndpoint.equals(local.getEndpoint()) && !isZombie(quitEndpoint)) { - drop(quitEndpoint, quit.getIncarnation()); + drop(quitEndpoint, quit.getIncarnation(), true); } } @@ -388,7 +426,7 @@ private void handleDoubt(Doubt doubt) { private void clearZombie(HostEndpoint zombieEndpoint) { // drop zombie if any, and broadcast a quit on behalf of it - drop(zombieEndpoint, Integer.MAX_VALUE); + drop(zombieEndpoint, Integer.MAX_VALUE, false); messenger.spread(ClusterMessage.newBuilder() .setQuit(Quit.newBuilder().setEndpoint(zombieEndpoint).setIncarnation(Integer.MAX_VALUE).build()) .build()); diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java index c772a406d..52b152fb8 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/IHostMemberList.java @@ -14,19 +14,22 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist; -import org.apache.bifromq.basecluster.memberlist.agent.IAgent; -import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; -import org.apache.bifromq.basecluster.membership.proto.HostMember; import io.reactivex.rxjava3.core.Observable; import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basecluster.memberlist.agent.IAgent; +import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; +import org.apache.bifromq.basecluster.membership.proto.HostMember; +/** + * The interface of host member list service. + */ public interface IHostMemberList { /** * The member from local. @@ -35,6 +38,12 @@ public interface IHostMemberList { */ HostMember local(); + /** + * If the given endpoint is considered a zombie(The dead endpoint used to live in the local host). + * + * @param endpoint the endpoint + * @return true if the given endpoint is considered a zombie. + */ boolean isZombie(HostEndpoint endpoint); /** @@ -71,4 +80,12 @@ public interface IHostMemberList { * @return the observable */ Observable>> landscape(); + + /** + * Emits a signal whenever the local member actively refutes a suspicion of being dead. + * Each emission carries the timestamp (in millis) when the refutation occurred. + * + * @return an observable stream of refutation timestamps + */ + Observable refuteSignal(); } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java index 8a9e656f1..cb46467c3 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/Agent.java @@ -14,21 +14,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; -import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; import static java.util.Collections.emptyMap; +import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; -import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecrdt.core.api.IORMap; -import org.apache.bifromq.basecrdt.core.api.ORMapOperation; -import org.apache.bifromq.basecrdt.proto.Replica; -import org.apache.bifromq.basecrdt.store.ICRDTStore; import com.google.common.collect.Sets; import com.google.protobuf.AbstractMessageLite; import io.micrometer.core.instrument.Gauge; @@ -38,7 +31,6 @@ import io.reactivex.rxjava3.disposables.CompositeDisposable; import io.reactivex.rxjava3.subjects.BehaviorSubject; import java.util.HashMap; -import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.CompletableFuture; @@ -50,13 +42,18 @@ import java.util.function.Supplier; import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; +import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; +import org.apache.bifromq.basecrdt.core.api.IORMap; +import org.apache.bifromq.basecrdt.core.api.ORMapOperation; +import org.apache.bifromq.basecrdt.proto.Replica; +import org.apache.bifromq.basecrdt.store.ICRDTStore; @Slf4j public final class Agent implements IAgent { - private enum State { - JOINED, QUITTING, QUITED - } - private final ReadWriteLock quitLock = new ReentrantReadWriteLock(); private final String agentId; private final AgentEndpoint localEndpoint; @@ -70,7 +67,6 @@ private enum State { BehaviorSubject.createDefault(emptyMap()); private final CompositeDisposable disposables = new CompositeDisposable(); private final Gauge memberNumGauge; - private volatile Set currentAgentEndpoints = new HashSet<>(); public Agent(String agentId, AgentEndpoint endpoint, @@ -140,6 +136,11 @@ public CompletableFuture deregister(IAgentMember member) { }); } + @Override + public void refreshRegistration() { + localMemberRegistry.values().forEach(AgentMember::refresh); + } + public CompletableFuture quit() { Lock writeLock = quitLock.writeLock(); try { @@ -184,27 +185,37 @@ private void sync(long ts) { private void handleAgentEndpointsUpdate(Set agentEndpoints) { skipRunIfNotJoined(() -> { - Set newAgentEndpoints = Sets.newHashSet(agentEndpoints); - newAgentEndpoints.add(localEndpoint); - Set leftHosts = Sets.difference(currentAgentEndpoints, newAgentEndpoints); - // drop members on left hosts + Set aliveAgentEndpoints = Sets.newHashSet(agentEndpoints); + aliveAgentEndpoints.add(localEndpoint); + // compute alive endpoints from host member list (clean source of truth) + Set aliveAgentHostEndpoints = aliveAgentEndpoints.stream() + .map(AgentEndpoint::getEndpoint) + .collect(Collectors.toSet()); + // drop members in CRDT that are not present in alive host endpoints Map agentMemberMap = CRDTUtil.toAgentMemberMap(agentCRDT); for (AgentMemberAddr memberAddr : agentMemberMap.keySet()) { - AgentEndpoint agentEndpoint = AgentEndpoint.newBuilder() - .setEndpoint(memberAddr.getEndpoint()) - .setIncarnation(memberAddr.getIncarnation()) - .build(); - if (leftHosts.contains(agentEndpoint)) { + if (!aliveAgentHostEndpoints.contains(memberAddr.getEndpoint()) + && shouldClean(aliveAgentEndpoints, memberAddr.getEndpoint())) { agentCRDT.execute(ORMapOperation.remove(memberAddr.toByteString()).of(mvreg)); } } // update landscape - currentAgentEndpoints = newAgentEndpoints; store.join(agentCRDT.id(), - currentAgentEndpoints.stream().map(AbstractMessageLite::toByteString).collect(Collectors.toSet())); + aliveAgentEndpoints.stream().map(AbstractMessageLite::toByteString).collect(Collectors.toSet())); }); } + private boolean shouldClean(Set allEndpoints, HostEndpoint failedMemberEndpoint) { + // if local member is responsible for removing the failed member from CRDT + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.getId().asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.getEndpoint().getId().asReadOnlyByteBuffer())) + .nodes(allEndpoints) + .build(); + AgentEndpoint cleaner = hash.get(failedMemberEndpoint); + return cleaner.getEndpoint().getId().equals(localEndpoint.getEndpoint().getId()); + } + private void skipRunIfNotJoined(Runnable runnable) { Lock readLock = quitLock.readLock(); try { @@ -231,4 +242,8 @@ private T runIfJoined(Supplier supplier) { readLock.unlock(); } } + + private enum State { + JOINED, QUITTING, QUITED + } } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/AgentMember.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/AgentMember.java index e36980ab5..6e5d6e776 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/AgentMember.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/AgentMember.java @@ -14,21 +14,13 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecluster.agent.proto.AgentMessage; -import org.apache.bifromq.basecluster.agent.proto.AgentMessageEnvelope; -import org.apache.bifromq.basecrdt.core.api.IORMap; -import org.apache.bifromq.basecrdt.core.api.MVRegOperation; -import org.apache.bifromq.basecrdt.core.api.ORMapOperation; -import org.apache.bifromq.basehlc.HLC; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.core.Scheduler; @@ -38,12 +30,23 @@ import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Supplier; import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecluster.agent.proto.AgentMessage; +import org.apache.bifromq.basecluster.agent.proto.AgentMessageEnvelope; +import org.apache.bifromq.basecrdt.core.api.IORMap; +import org.apache.bifromq.basecrdt.core.api.MVRegOperation; +import org.apache.bifromq.basecrdt.core.api.ORMapOperation; +import org.apache.bifromq.basehlc.HLC; +@Slf4j class AgentMember implements IAgentMember { private final AgentMemberAddr localAddr; private final IORMap agentCRDT; @@ -52,8 +55,9 @@ class AgentMember implements IAgentMember { private final PublishSubject agentMessageSubject = PublishSubject.create(); private final CompositeDisposable disposables = new CompositeDisposable(); private final ReadWriteLock destroyLock = new ReentrantReadWriteLock(); + private final AtomicReference metadata = new AtomicReference<>( + AgentMemberMetadata.newBuilder().setHlc(HLC.INST.get()).build()); private volatile boolean destroy = false; - private volatile AgentMemberMetadata metadata; AgentMember(AgentMemberAddr memberAddr, IORMap agentCRDT, @@ -64,7 +68,6 @@ class AgentMember implements IAgentMember { this.agentCRDT = agentCRDT; this.messenger = messenger; this.memberAddresses = memberAddresses; - metadata = AgentMemberMetadata.newBuilder().setHlc(HLC.INST.get()).build(); updateCRDT(); disposables.add(agentCRDT.inflation() .observeOn(scheduler) @@ -78,14 +81,14 @@ class AgentMember implements IAgentMember { @Override public AgentMemberMetadata metadata() { - return metadata; + return metadata.get(); } @Override public void metadata(ByteString value) { skipRunWhenDestroyed(() -> { - if (!metadata.getValue().equals(value)) { - metadata = AgentMemberMetadata.newBuilder().setValue(value).setHlc(HLC.INST.get()).build(); + if (!metadata.get().getValue().equals(value)) { + metadata.set(AgentMemberMetadata.newBuilder().setValue(value).setHlc(HLC.INST.get()).build()); updateCRDT(); } }); @@ -136,7 +139,7 @@ public CompletableFuture multicast(String targetMemberName, ByteString mes private void updateCRDT(long ts) { skipRunWhenDestroyed(() -> { Optional metaOnCRDT = CRDTUtil.getAgentMemberMetadata(agentCRDT, localAddr); - if (metaOnCRDT.isEmpty() || !metaOnCRDT.get().equals(metadata)) { + if (metaOnCRDT.isEmpty() || !metaOnCRDT.get().equals(metadata.get())) { updateCRDT(); } }); @@ -144,7 +147,7 @@ private void updateCRDT(long ts) { private void updateCRDT() { skipRunWhenDestroyed(() -> agentCRDT.execute(ORMapOperation.update(localAddr.toByteString()) - .with(MVRegOperation.write(metadata.toByteString())))); + .with(MVRegOperation.write(metadata.get().toByteString())))); } @Override @@ -152,6 +155,14 @@ public Observable receive() { return agentMessageSubject; } + @Override + public void refresh() { + skipRunWhenDestroyed(() -> { + metadata.set(metadata.get().toBuilder().setHlc(HLC.INST.get()).build()); + updateCRDT(); + }); + } + private void skipRunWhenDestroyed(Runnable runnable) { Lock readLock = destroyLock.readLock(); try { diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgent.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgent.java index dee044d59..87202e1a2 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgent.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgent.java @@ -14,19 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; -import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; import io.reactivex.rxjava3.core.Observable; import java.util.Map; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basecluster.agent.proto.AgentEndpoint; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +/** + * The interface for an overlay agent cluster. + */ public interface IAgent { + /** + * The agent cluster id. + * + * @return the agent cluster id + */ String id(); /** @@ -39,7 +47,7 @@ public interface IAgent { /** * A hot observable of agent membership. * - * @return + * @return an observable that emits the current membership map */ Observable> membership(); @@ -47,14 +55,19 @@ public interface IAgent { * Register a local agent member. It's allowed to register same member name in same logical agent from different * agent hosts * - * @param memberName + * @param memberName the member name, should be unique in local host member */ IAgentMember register(String memberName); /** - * Deregister a member instance, the caller should never hold the reference to the instance after deregistered + * Deregister a member instance, the caller should never hold the reference to the instance after deregistered. * - * @param member + * @param member the member instance to deregister */ CompletableFuture deregister(IAgentMember member); + + /** + * Refresh the registration of the local agent member. + */ + void refreshRegistration(); } diff --git a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgentMember.java b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgentMember.java index 3d921d779..69e3a1879 100644 --- a/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgentMember.java +++ b/base-cluster/src/main/java/org/apache/bifromq/basecluster/memberlist/agent/IAgentMember.java @@ -14,17 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist.agent; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecluster.agent.proto.AgentMessage; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; import java.util.concurrent.CompletableFuture; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecluster.agent.proto.AgentMessage; public interface IAgentMember { AgentMemberAddr address(); @@ -32,50 +32,55 @@ public interface IAgentMember { /** * Broadcast a message among the agent members. * - * @param message - * @param reliable - * @return + * @param message the message to be sent + * @param reliable if true, the message will be sent reliably, otherwise it may be dropped + * @return a CompletableFuture that completes when the message is sent */ CompletableFuture broadcast(ByteString message, boolean reliable); /** - * Send a message to another member located in given endpoint + * Send a message to another member located in given endpoint. * - * @param targetMemberAddr - * @param message - * @param reliable - * @return + * @param targetMemberAddr the address of the target member + * @param message the message to be sent + * @param reliable if true, the message will be sent reliably, otherwise it may be dropped + * @return a CompletableFuture that completes when the message is sent */ CompletableFuture send(AgentMemberAddr targetMemberAddr, ByteString message, boolean reliable); /** - * Send a message to all endpoints where target member name is registered + * Send a message to all endpoints where target member name is registered. * - * @param targetMemberName - * @param message - * @param reliable - * @return + * @param targetMemberName the name of the target member + * @param message the message to be sent + * @param reliable if true, the message will be sent reliably, otherwise it may be dropped + * @return a CompletableFuture that completes when the message is sent */ CompletableFuture multicast(String targetMemberName, ByteString message, boolean reliable); /** - * Get current associated metadata + * Get current associated metadata. * - * @return + * @return the current metadata */ AgentMemberMetadata metadata(); /** - * Update associated metadata + * Update associated metadata. * - * @param value + * @param value the new metadata value */ void metadata(ByteString value); /** - * An observable of incoming messages + * An observable of incoming messages. * - * @return + * @return an observable that emits AgentMessage */ Observable receive(); + + /** + * Refresh the registration of the local agent member. + */ + void refresh(); } diff --git a/base-cluster/src/main/proto/basecluster/membership/HostMember.proto b/base-cluster/src/main/proto/basecluster/membership/HostMember.proto index 9d706d443..63b2f4d5a 100644 --- a/base-cluster/src/main/proto/basecluster/membership/HostMember.proto +++ b/base-cluster/src/main/proto/basecluster/membership/HostMember.proto @@ -34,6 +34,6 @@ message HostEndpoint{ message HostMember { HostEndpoint endpoint = 1; uint32 incarnation = 2; // incarnation of the node, managed by the node itself - repeated string agentId = 3; // deprecate since 3.3.3, the list of agents reside on the host + //repeated string agentId = 3; deprecate since 3.3.3, the list of agents reside on the host map agent = 4; // the map of agent id to incarnation } diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java index cce647ab3..973609d57 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentHostsTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; @@ -22,6 +22,13 @@ import static com.google.protobuf.ByteString.copyFromUtf8; import static org.awaitility.Awaitility.await; +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.observers.TestObserver; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; import org.apache.bifromq.basecluster.agent.proto.AgentMessage; @@ -30,13 +37,6 @@ import org.apache.bifromq.basecluster.memberlist.agent.IAgent; import org.apache.bifromq.basecluster.memberlist.agent.IAgentMember; import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; -import com.google.common.collect.Sets; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.observers.TestObserver; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import lombok.extern.slf4j.Slf4j; import org.testng.Assert; import org.testng.annotations.Test; @@ -86,16 +86,12 @@ public void testUnregister() { @StoreCfg(id = "s1", isSeed = true), @StoreCfg(id = "s2"), @StoreCfg(id = "s3"), - @StoreCfg(id = "s4"), - @StoreCfg(id = "s5"), }) @Test public void testMultipleAgentHosts() { - await().until(() -> storeMgr.membership("s1").size() == 5); - await().until(() -> storeMgr.membership("s2").size() == 5); - await().until(() -> storeMgr.membership("s3").size() == 5); - await().until(() -> storeMgr.membership("s4").size() == 5); - await().until(() -> storeMgr.membership("s5").size() == 5); + await().forever().until(() -> storeMgr.membership("s1").size() == 3); + await().forever().until(() -> storeMgr.membership("s2").size() == 3); + await().forever().until(() -> storeMgr.membership("s3").size() == 3); } @Test @@ -385,7 +381,7 @@ public void testAgentClusterPartitionAndHealing() { await().until(() -> agentOnS2.membership().blockingFirst().size() == 4); await().until(() -> agentOnS3.membership().blockingFirst().size() == 4); - // isolate s1 from others + // isolate s2 from others log.info("isolate s1"); storeMgr.isolate("s1"); await().forever().until(() -> agentOnS1.membership().blockingFirst().size() == 2); @@ -395,8 +391,46 @@ public void testAgentClusterPartitionAndHealing() { log.info("integrate s1"); // integrate s1 into the cluster storeMgr.integrate("s1"); - await().until(() -> agentOnS1.membership().blockingFirst().size() == 4); - await().until(() -> agentOnS2.membership().blockingFirst().size() == 4); - await().until(() -> agentOnS3.membership().blockingFirst().size() == 4); + await().forever().until(() -> agentOnS1.membership().blockingFirst().size() == 4); + await().forever().until(() -> agentOnS2.membership().blockingFirst().size() == 4); + await().forever().until(() -> agentOnS3.membership().blockingFirst().size() == 4); + } + + @StoreCfgs(stores = { + @StoreCfg(id = "s1", isSeed = true), + @StoreCfg(id = "s2"), + }) + @Test + public void testCleanStaleAgentMembersAfterHostRestartWithNewEndpoint() { + // ensure cluster up + await().until(() -> storeMgr.membership("s1").size() == 2); + await().until(() -> storeMgr.membership("s2").size() == 2); + + // host same agent on both hosts so CRDT survives while s1 restarts + IAgent agentOnS1 = storeMgr.hostAgent("s1", "agentX"); + IAgent agentOnS2 = storeMgr.hostAgent("s2", "agentX"); + + // register a member only on s1 to create a CRDT entry bound to s1's endpoint + IAgentMember s1Member = agentOnS1.register("nodeOnS1"); + s1Member.metadata(copyFromUtf8("payload")); + + // both sides should observe exactly 1 member + await().until(() -> agentOnS1.membership().blockingFirst().size() == 1); + await().until(() -> agentOnS2.membership().blockingFirst().size() == 1); + + storeMgr.crash("s1"); + // s2 should eventually only see itself + await().forever().until(() -> storeMgr.membership("s2").size() == 1); + + // start a new s1 instance with a new endpoint (old isolated one still exists but unreachable) + storeMgr.startHost("s1"); + // rejoin cluster + storeMgr.join("s1", "s2"); + // re-host the agent on s1 (no members registered now) + IAgent newAgentOnS1 = storeMgr.hostAgent("s1", "agentX"); + + // eventually, the stale member from old s1 endpoint should be cleaned from CRDT + await().until(() -> newAgentOnS1.membership().blockingFirst().isEmpty()); + await().until(() -> agentOnS2.membership().blockingFirst().isEmpty()); } } diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java index c827dbcf4..399dbd924 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestCluster.java @@ -14,17 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; -import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; -import org.apache.bifromq.basecluster.memberlist.HostAddressResolver; -import org.apache.bifromq.basecluster.memberlist.agent.IAgent; -import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; -import org.apache.bifromq.basecluster.transport.ITransport; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -40,30 +34,31 @@ import java.util.Set; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberAddr; +import org.apache.bifromq.basecluster.agent.proto.AgentMemberMetadata; +import org.apache.bifromq.basecluster.memberlist.HostAddressResolver; +import org.apache.bifromq.basecluster.memberlist.agent.IAgent; +import org.apache.bifromq.basecluster.membership.proto.HostEndpoint; +import org.apache.bifromq.basecluster.transport.ITransport; @Slf4j public class AgentTestCluster { - @AllArgsConstructor - private static class AgentHostMeta { - final AgentHostOptions options; - } - private final MockNetwork network = new MockNetwork(); private final Map hostMetaMap = Maps.newConcurrentMap(); private final Map hostEndpointMap = Maps.newConcurrentMap(); private final Map hostTransportMap = Maps.newConcurrentMap(); private final Map hostMap = Maps.newConcurrentMap(); private final Map> inflationLogs = Maps.newConcurrentMap(); + private final Map crashedHostEndpointMap = Maps.newConcurrentMap(); + private final Map crashedHostTransportMap = Maps.newConcurrentMap(); + private final Map crashedHostMap = Maps.newConcurrentMap(); private final CompositeDisposable disposables = new CompositeDisposable(); public AgentTestCluster() { } - public String newHost(String hostId, AgentHostOptions options) { - hostMetaMap.computeIfAbsent(hostId, k -> { - loadStore(hostId, options); - return new AgentHostMeta(options); - }); + public String registerHost(String hostId, AgentHostOptions options) { + hostMetaMap.computeIfAbsent(hostId, k -> new AgentHostMeta(options)); return hostId; } @@ -94,11 +89,24 @@ public void isolate(String hostId) { network.isolate(hostTransportMap.get(hostId)); } + public void crash(String hostId) { + checkHost(hostId); + network.isolate(hostTransportMap.get(hostId)); + inflationLogs.remove(hostId); + + HostEndpoint crashedEndpoint = hostEndpointMap.remove(hostId); + crashedHostEndpointMap.put(hostId, crashedEndpoint); + + IAgentHost crashedAgentHost = hostMap.remove(crashedEndpoint); + crashedHostMap.put(crashedEndpoint, crashedAgentHost); + ITransport transport = hostTransportMap.remove(hostId); + crashedHostTransportMap.put(hostId, transport); + } + public void integrate(String hostId) { network.integrate(hostTransportMap.get(hostId)); } - public HostEndpoint endpoint(String hostId) { checkHost(hostId); return getHost(hostId).local(); @@ -145,6 +153,8 @@ private HostEndpoint loadStore(String storeId, AgentHostOptions options) { public void shutdown() { disposables.dispose(); hostEndpointMap.keySet().forEach(this::stopHost); + crashedHostTransportMap.keySet().forEach(hostId -> + crashedHostMap.remove(crashedHostEndpointMap.get(hostId)).close()); } public IAgentHost getHost(String hostId) { @@ -155,4 +165,9 @@ public IAgentHost getHost(String hostId) { private void checkHost(String hostId) { Preconditions.checkArgument(hostEndpointMap.containsKey(hostId)); } + + @AllArgsConstructor + private static class AgentHostMeta { + final AgentHostOptions options; + } } diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestTemplate.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestTemplate.java index 9bc5aa25f..369cd9415 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestTemplate.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/AgentTestTemplate.java @@ -14,17 +14,17 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster; -import org.apache.bifromq.basecluster.annotation.StoreCfg; -import org.apache.bifromq.basecluster.annotation.StoreCfgs; -import org.apache.bifromq.basecrdt.store.CRDTStoreOptions; import java.lang.reflect.Method; import java.time.Duration; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecluster.annotation.StoreCfg; +import org.apache.bifromq.basecluster.annotation.StoreCfgs; +import org.apache.bifromq.basecrdt.store.CRDTStoreOptions; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -42,14 +42,16 @@ public void createClusterByAnnotation(Method testMethod) { if (storeMgr != null) { if (storeCfgs != null) { for (StoreCfg cfg : storeCfgs.stores()) { - storeMgr.newHost(cfg.id(), build(cfg)); + storeMgr.registerHost(cfg.id(), build(cfg)); + storeMgr.startHost(cfg.id()); if (cfg.isSeed()) { seedStoreId = cfg.id(); } } } if (storeCfg != null) { - storeMgr.newHost(storeCfg.id(), build(storeCfg)); + storeMgr.registerHost(storeCfg.id(), build(storeCfg)); + storeMgr.startHost(storeCfg.id()); } if (seedStoreId != null && storeCfgs != null) { for (StoreCfg cfg : storeCfgs.stores()) { diff --git a/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java b/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java index d51b20dae..0a93cf4c2 100644 --- a/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java +++ b/base-cluster/src/test/java/org/apache/bifromq/basecluster/memberlist/HostMemberListTest.java @@ -14,11 +14,12 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecluster.memberlist; +import static java.util.Collections.emptyIterator; import static org.apache.bifromq.basecluster.memberlist.CRDTUtil.AGENT_HOST_MAP_URI; import static org.apache.bifromq.basecluster.memberlist.Fixtures.LOCAL_ADDR; import static org.apache.bifromq.basecluster.memberlist.Fixtures.LOCAL_ENDPOINT; @@ -27,7 +28,6 @@ import static org.apache.bifromq.basecluster.memberlist.Fixtures.REMOTE_ADDR_1; import static org.apache.bifromq.basecluster.memberlist.Fixtures.REMOTE_HOST_1_ENDPOINT; import static org.apache.bifromq.basecluster.memberlist.Fixtures.ZOMBIE_ENDPOINT; -import static java.util.Collections.emptyIterator; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.argThat; import static org.mockito.Mockito.mockConstruction; @@ -39,6 +39,18 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; +import com.google.common.collect.Iterators; +import com.google.common.util.concurrent.MoreExecutors; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.core.Scheduler; +import io.reactivex.rxjava3.schedulers.Schedulers; +import io.reactivex.rxjava3.schedulers.Timed; +import io.reactivex.rxjava3.subjects.PublishSubject; +import java.net.InetSocketAddress; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecluster.memberlist.agent.Agent; import org.apache.bifromq.basecluster.memberlist.agent.IAgent; import org.apache.bifromq.basecluster.membership.proto.Doubt; @@ -55,18 +67,6 @@ import org.apache.bifromq.basecrdt.core.api.IORMap; import org.apache.bifromq.basecrdt.core.api.ORMapOperation; import org.apache.bifromq.basecrdt.store.ICRDTStore; -import com.google.common.collect.Iterators; -import com.google.common.util.concurrent.MoreExecutors; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.core.Scheduler; -import io.reactivex.rxjava3.schedulers.Schedulers; -import io.reactivex.rxjava3.schedulers.Timed; -import io.reactivex.rxjava3.subjects.PublishSubject; -import java.net.InetSocketAddress; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.TimeUnit; -import lombok.extern.slf4j.Slf4j; import org.mockito.ArgumentCaptor; import org.mockito.Mock; import org.mockito.MockedConstruction; @@ -120,7 +120,7 @@ public void init() { assertEquals(local.getEndpoint().getAddress(), LOCAL_ADDR.getHostName()); assertEquals(local.getEndpoint().getPort(), LOCAL_ADDR.getPort()); assertTrue(local.getIncarnation() >= 0); - assertTrue(local.getAgentIdList().isEmpty()); + assertTrue(local.getAgentMap().isEmpty()); assertEquals(memberList.landscape().blockingFirst().size(), 1); Map hostMap = memberList.members().blockingFirst(); assertEquals(hostMap.size(), 1); @@ -157,15 +157,15 @@ public void stopHosting() { when(hostMemberOnCRDT.read()).thenReturn(emptyIterator()); IHostMemberList memberList = new HostMemberList(LOCAL_ADDR.getHostName(), LOCAL_ADDR.getPort(), messenger, scheduler, store, addressResolver); - HostMember local = memberList.local(); memberList.host(agentId); when(mockAgent.constructed().get(0).quit()).thenReturn(CompletableFuture.completedFuture(null)); memberList.stopHosting(agentId); - assertEquals(memberList.local().getAgentIdCount(), 0); + assertEquals(memberList.local().getAgentMap().size(), 0); assertEquals(memberList.landscape().blockingFirst().size(), 1); - assertTrue(local.getIncarnation() + 2 == memberList.local().getIncarnation()); + HostMember local = memberList.local(); + assertEquals(memberList.local().getIncarnation(), local.getIncarnation()); Map hostMap = memberList.members().blockingFirst(); - assertTrue(local.getIncarnation() + 2 == hostMap.get(local.getEndpoint())); + assertEquals((int) hostMap.get(local.getEndpoint()), local.getIncarnation()); verify(hostListCRDT, times(3)).execute(any(ORMapOperation.ORMapUpdate.class)); } @@ -197,7 +197,7 @@ public void handleJoin() { .build())); ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(2)).execute(opCap.capture()); + verify(hostListCRDT, times(1)).execute(opCap.capture()); verify(store, times(2)).join( argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); } @@ -243,7 +243,7 @@ public void handleJoinFromHealing() { .build(), LOCAL_ENDPOINT)); ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(2)).execute(opCap.capture()); + verify(hostListCRDT, times(1)).execute(opCap.capture()); ArgumentCaptor msgCap = ArgumentCaptor.forClass(ClusterMessage.class); ArgumentCaptor addrCap = ArgumentCaptor.forClass(InetSocketAddress.class); @@ -274,7 +274,7 @@ public void handleJoinFromDuplicatedHealing() { .build(), LOCAL_ENDPOINT)); ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(3)).execute(opCap.capture()); + verify(hostListCRDT, times(2)).execute(opCap.capture()); ArgumentCaptor msgCap = ArgumentCaptor.forClass(ClusterMessage.class); ArgumentCaptor addrCap = ArgumentCaptor.forClass(InetSocketAddress.class); @@ -333,9 +333,9 @@ public void handleFailAndDrop() { messageSubject.onNext(failMsg(REMOTE_HOST_1_ENDPOINT, 1)); ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(3)).execute(opCap.capture()); - assertEquals(((ORMapOperation.ORMapRemove) opCap.getAllValues().get(2)).valueType, CausalCRDTType.mvreg); - assertEquals(opCap.getAllValues().get(2).keyPath[0], REMOTE_HOST_1_ENDPOINT.toByteString()); + verify(hostListCRDT, times(2)).execute(opCap.capture()); + assertEquals(((ORMapOperation.ORMapRemove) opCap.getAllValues().get(1)).valueType, CausalCRDTType.mvreg); + assertEquals(opCap.getAllValues().get(1).keyPath[0], REMOTE_HOST_1_ENDPOINT.toByteString()); } @Test @@ -346,7 +346,6 @@ public void handleFailAndRenew() { messenger, scheduler, store, addressResolver); assertEquals(memberList.members().blockingFirst().get(LOCAL_ENDPOINT).intValue(), 0); - messageSubject.onNext(failMsg(LOCAL_ENDPOINT, 0)); messageSubject.onNext(failMsg(LOCAL_ENDPOINT, 0)); // this time will be ignored @@ -388,7 +387,7 @@ public void handleQuitNotExistMember() { messageSubject.onNext(quitMsg(REMOTE_HOST_1_ENDPOINT, 1)); // nothing will happen - verify(hostListCRDT, times(1)).execute(any(ORMapOperation.ORMapRemove.class)); + verify(hostListCRDT, never()).execute(any(ORMapOperation.ORMapRemove.class)); verify(store, times(1)).join( argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); } @@ -409,7 +408,6 @@ public void handleQuitNotExistMemberOnCRDT() { argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); } - @Test public void handleQuitSelf() { when(hostListCRDT.getMVReg(any())).thenReturn(hostMemberOnCRDT); @@ -444,8 +442,8 @@ public void handleQuitAndDrop() { messageSubject.onNext(quitMsg(REMOTE_HOST_1_ENDPOINT, 0)); // nothing will happen ArgumentCaptor opCap = ArgumentCaptor.forClass(ORMapOperation.class); - verify(hostListCRDT, times(3)).execute(opCap.capture()); - assertTrue(opCap.getAllValues().get(2) instanceof ORMapOperation.ORMapRemove); + verify(hostListCRDT, times(1)).execute(opCap.capture()); + assertFalse(opCap.getAllValues().get(0) instanceof ORMapOperation.ORMapRemove); verify(store, times(3)).join( argThat(r -> r.getUri().equals(AGENT_HOST_MAP_URI) && r.getId().equals(LOCAL_STORE_ID)), any()); @@ -480,7 +478,6 @@ public void handleDoubtAndIgnore() { assertEquals(memberList.members().blockingFirst().get(LOCAL_ENDPOINT).intValue(), 0); } - private Timed joinMsg(HostMember member) { return to(ClusterMessage.newBuilder() .setJoin(Join.newBuilder() diff --git a/base-crdt/base-crdt-service/pom.xml b/base-crdt/base-crdt-service/pom.xml index f0b2351ab..2dd7cf2df 100644 --- a/base-crdt/base-crdt-service/pom.xml +++ b/base-crdt/base-crdt-service/pom.xml @@ -47,6 +47,10 @@ slf4j-api + + org.awaitility + awaitility + org.apache.logging.log4j log4j-api diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java index 72b13ae93..25eb0c825 100644 --- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java +++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTCluster.java @@ -73,7 +73,7 @@ class CRDTCluster> { this.store = store; this.agentHost = agentHost; replicaId = generate(uri); - log = MDCLogger.getLogger(CRDTCluster.class, "replica", print(replicaId)); + log = MDCLogger.getLogger(CRDTCluster.class, "store", store.id(), "replica", print(replicaId)); membershipAgent = agentHost.host(replicaId.getUri()); endpoint = AgentMemberAddr.newBuilder() .setName(AgentUtil.toAgentMemberName(replicaId)) diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java index 94213d86e..e74ee85d8 100644 --- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java +++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/CRDTService.java @@ -115,6 +115,11 @@ public Observable> aliveCRDTs() { }); } + @Override + public Observable refreshSignal() { + return agentHost.refuteSignal(); + } + private CompletableFuture stopHostingInternal(String uri) { return hostedCRDT.remove(uri).close(); } diff --git a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java index 14b1d72db..97375ff37 100644 --- a/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java +++ b/base-crdt/base-crdt-service/src/main/java/org/apache/bifromq/basecrdt/service/ICRDTService.java @@ -14,20 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.service; -import org.apache.bifromq.basecluster.IAgentHost; -import org.apache.bifromq.basecrdt.core.api.ICRDTOperation; -import org.apache.bifromq.basecrdt.core.api.ICausalCRDT; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; import java.util.Set; import java.util.concurrent.CompletableFuture; import lombok.NonNull; +import org.apache.bifromq.basecluster.IAgentHost; +import org.apache.bifromq.basecrdt.core.api.ICRDTOperation; +import org.apache.bifromq.basecrdt.core.api.ICausalCRDT; +import org.apache.bifromq.basecrdt.proto.Replica; /** * The CRDT service with decentralized membership management based on base-cluster. @@ -89,6 +89,13 @@ static ICRDTService newInstance(IAgentHost agentHost, @NonNull CRDTServiceOption */ Observable> aliveCRDTs(); + /** + * A signal to refresh the CRDT replica hosted in the service. + * + * @return an observable that emits refresh signal + */ + Observable refreshSignal(); + /** * Stop the store. */ diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java index d3662f976..0d91e8410 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/AWORSetInflater.java @@ -19,21 +19,22 @@ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.AWORSetOperation; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IAWORSet; import org.apache.bifromq.basecrdt.core.api.IAWORSetInflater; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class AWORSetInflater extends CausalCRDTInflater implements IAWORSetInflater { - AWORSetInflater(Replica replica, + AWORSetInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java index 4bef5345e..06c7e5810 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CCounterInflater.java @@ -19,21 +19,22 @@ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CCounterOperation; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.ICCounter; import org.apache.bifromq.basecrdt.core.api.ICCounterInflater; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class CCounterInflater extends CausalCRDTInflater implements ICCounterInflater { - CCounterInflater(Replica replica, + CCounterInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java index f38d8bd02..ba9e912c5 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/CausalCRDTInflater.java @@ -73,11 +73,13 @@ abstract class CausalCRDTInflater new AWORSetInflater(replicaId, lattice, executor, inflationInterval, tags); - case rworset -> new RWORSetInflater(replicaId, lattice, executor, inflationInterval, tags); - case ormap -> new ORMapInflater(replicaId, lattice, executor, inflationInterval, tags); - case cctr -> new CCounterInflater(replicaId, lattice, executor, inflationInterval, tags); - case dwflag -> new DWFlagInflater(replicaId, lattice, executor, inflationInterval, tags); - case ewflag -> new EWFlagInflater(replicaId, lattice, executor, inflationInterval, tags); - case mvreg -> new MVRegInflater(replicaId, lattice, executor, inflationInterval, tags); + case aworset -> new AWORSetInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case rworset -> new RWORSetInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case ormap -> new ORMapInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case cctr -> new CCounterInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case dwflag -> new DWFlagInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case ewflag -> new EWFlagInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); + case mvreg -> new MVRegInflater(storeId, replicaId, lattice, executor, inflationInterval, tags); }; } } diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java index bd8a654ea..af8276cca 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/DWFlagInflater.java @@ -19,21 +19,22 @@ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.DWFlagOperation; import org.apache.bifromq.basecrdt.core.api.IDWFlag; import org.apache.bifromq.basecrdt.core.api.IDWFlagInflater; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class DWFlagInflater extends CausalCRDTInflater implements IDWFlagInflater { - DWFlagInflater(Replica replica, + DWFlagInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java index e48b94642..dbcd80958 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/EWFlagInflater.java @@ -19,18 +19,18 @@ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.EWFlagOperation; import org.apache.bifromq.basecrdt.core.api.IEWFlag; import org.apache.bifromq.basecrdt.core.api.IEWFlagInflater; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class EWFlagInflater extends CausalCRDTInflater implements IEWFlagInflater { - EWFlagInflater(Replica replica, IReplicaStateLattice stateLattice, + EWFlagInflater(String storeId, Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java index d12fa3209..0b42d7c54 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLattice.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; @@ -69,9 +69,10 @@ class InMemReplicaStateLattice implements IReplicaStateLattice { private final Duration historyExpire; private final long maxCompactionDuration; - InMemReplicaStateLattice(Replica ownerReplica, Duration historyExpire, Duration maxCompactionTime) { + InMemReplicaStateLattice(String storeId, Replica ownerReplica, Duration historyExpire, Duration maxCompactionTime) { this.ownerReplica = ownerReplica; - this.log = MDCLogger.getLogger(InMemReplicaStateLattice.class, "replica", print(ownerReplica)); + this.log = MDCLogger.getLogger(InMemReplicaStateLattice.class, + "store", storeId, "replica", print(ownerReplica)); this.historyExpire = historyExpire; this.maxCompactionDuration = maxCompactionTime.toNanos(); } diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java index 3044da2f4..497844b6d 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/MVRegInflater.java @@ -14,26 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IMVReg; import org.apache.bifromq.basecrdt.core.api.IMVRegInflater; import org.apache.bifromq.basecrdt.core.api.MVRegOperation; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class MVRegInflater extends CausalCRDTInflater implements IMVRegInflater { - MVRegInflater(Replica replica, + MVRegInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java index 137b62343..aa8bec0b6 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/ORMapInflater.java @@ -14,26 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IORMap; import org.apache.bifromq.basecrdt.core.api.IORMapInflater; import org.apache.bifromq.basecrdt.core.api.ORMapOperation; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class ORMapInflater extends CausalCRDTInflater implements IORMapInflater { - ORMapInflater(Replica replica, + ORMapInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java index 8698aa25c..c842a58a4 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/core/internal/RWORSetInflater.java @@ -14,26 +14,27 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; +import java.time.Duration; +import java.util.concurrent.ScheduledExecutorService; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IRWORSet; import org.apache.bifromq.basecrdt.core.api.IRWORSetInflater; import org.apache.bifromq.basecrdt.core.api.RWORSetOperation; import org.apache.bifromq.basecrdt.proto.Replica; -import java.time.Duration; -import java.util.concurrent.ScheduledExecutorService; class RWORSetInflater extends CausalCRDTInflater implements IRWORSetInflater { - RWORSetInflater(Replica replica, + RWORSetInflater(String storeId, + Replica replica, IReplicaStateLattice stateLattice, ScheduledExecutorService executor, Duration inflationInterval, String... tags) { - super(replica, stateLattice, executor, inflationInterval, tags); + super(storeId, replica, stateLattice, executor, inflationInterval, tags); } @Override diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java index dff26bf68..6e2a39f1d 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropy.java @@ -65,6 +65,9 @@ final class AntiEntropy { private long currentNeighborVer; private long currentInflationTs; private DeltaMessage currentDelta = null; + // track if the last sent delta contains replacements (i.e., real diff), + // so that after ACK we can proactively continue to drain remaining deltas + private boolean lastSentHasReplacement = false; AntiEntropy(String storeId, ByteString localAddr, @@ -110,31 +113,51 @@ void updateObservedNeighborHistory(long ver, } void handleAck(AckMessage ack) { - if (canceled.get() || !running.get()) { + if (canceled.get()) { return; } synchronized (this) { - if (!running.get() || currentDelta == null) { - return; - } - if (ack.getSeqNo() != currentDelta.getSeqNo()) { + // Case 1: Matched ACK for in-flight delta + if (running.get() && currentDelta != null && ack.getSeqNo() == currentDelta.getSeqNo()) { + // currentDelta has been ack'ed + currentDelta = null; + if (resendTask != null) { + resendTask.cancel(false); + } + // reset resend counter after a successful ack to avoid inflated backoff + resendCount = 0; + if (ack.getVer() > neighborVer) { + // got newer neighbor's history + neighborVer = ack.getVer(); + neighborLatticeIndex = to(ack.getLatticeEventsList()); + neighborHistoryIndex = to(ack.getHistoryEventsList()); + } + running.set(false); + // Proactively continue if: + // - probe success (currentNeighborVer==0), or + // - local inflation happened, or + // - neighbor's version advanced since we computed delta, or + // - we just sent a batch of replacements and may have more to drain + if (currentNeighborVer == 0 + || lastInflationTs != currentInflationTs + || ack.getVer() > currentNeighborVer + || lastSentHasReplacement) { + scheduleRun(); + } + // clear the flag after scheduling decision + lastSentHasReplacement = false; return; } - // currentDelta has been ack'ed - currentDelta = null; - if (resendTask != null) { - resendTask.cancel(false); - } + + // Case 2: Late or unmatched ACK. Use it to advance neighbor index if it's newer. if (ack.getVer() > neighborVer) { - // got newer neighbor's history neighborVer = ack.getVer(); neighborLatticeIndex = to(ack.getLatticeEventsList()); neighborHistoryIndex = to(ack.getHistoryEventsList()); - } - running.set(false); - // if there are new inflation happened or probe success, restart the task - if (currentNeighborVer == 0 || lastInflationTs != currentInflationTs) { - scheduleRun(); + // try schedule a run if we are not currently running + if (!running.get()) { + scheduleRun(); + } } } } @@ -180,6 +203,7 @@ private void run() { .addAllHistoryEvents(to(crdtInflater.historyEvents())) .setVer(HLC.INST.get()) .build(); + lastSentHasReplacement = false; send(currentDelta); } else { // Calculate delta @@ -200,6 +224,7 @@ private void run() { .addAllHistoryEvents(to(crdtInflater.historyEvents())) .setVer(HLC.INST.get()) .build(); + lastSentHasReplacement = true; send(currentDelta); } else { currentDelta = null; @@ -219,7 +244,7 @@ private void run() { private void send(DeltaMessage deltaMessage) { log.trace("Local[{}] send delta to neighbor[{}]:\n{}", toPrintable(localAddr), toPrintable(neighborAddr), toPrintable(deltaMessage)); - neighborMessageSubject.onNext(new NeighborMessage(deltaMessage, neighborAddr)); + emit(deltaMessage); // Schedule timer task for resend scheduleResend(deltaMessage); } @@ -239,9 +264,7 @@ private void resend(DeltaMessage toResend) { if (currentDelta == toResend) { log.trace("Local[{}] resend delta to neighbor[{}]:\n{}", toPrintable(localAddr), toPrintable(neighborAddr), toPrintable(toResend)); - deltaMsgCounter.increment(1D); - deltaMsgBytesCounter.increment(currentDelta.getSerializedSize()); - neighborMessageSubject.onNext(new NeighborMessage(currentDelta, neighborAddr)); + emit(currentDelta); if (resendCount++ < 10) { scheduleResend(toResend); } else { @@ -262,4 +285,10 @@ private void resend(DeltaMessage toResend) { private long resendDelay() { return ThreadLocalRandom.current().nextLong(500, 2000) * (resendCount + 1); } + + private void emit(DeltaMessage delta) { + deltaMsgCounter.increment(); + deltaMsgBytesCounter.increment(delta.getSerializedSize()); + neighborMessageSubject.onNext(new NeighborMessage(delta, neighborAddr)); + } } diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java index e942ddaf6..2ed3cd52c 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/AntiEntropyManager.java @@ -99,7 +99,17 @@ Observable neighborMessages() { CompletableFuture receive(DeltaMessage delta, ByteString sender) { log.trace("Local[{}] receive delta[{}] from addr[{}]:\n{}", toPrintable(localAddr), delta.getSeqNo(), toPrintable(sender), toPrintable(delta)); - metricManager.receiveDeltaNum.increment(1D); + return handleDelta(delta, sender).thenApply(ack -> { + metricManager.sendAckNum.increment(); + metricManager.sendAckBytes.increment(ack.getSerializedSize()); + log.trace("Local[{}] send ack[{}] to addr[{}]:\n{}", + toPrintable(localAddr), ack.getSeqNo(), toPrintable(sender), toPrintable(ack)); + return ack; + }); + } + + private CompletableFuture handleDelta(DeltaMessage delta, ByteString sender) { + metricManager.receiveDeltaNum.increment(); metricManager.receiveDeltaBytes.increment(delta.getSerializedSize()); AntiEntropy neighborAntiEntropy = neighborMap.get(sender); if (neighborAntiEntropy != null) { @@ -124,7 +134,7 @@ CompletableFuture receive(DeltaMessage delta, ByteString sender) { } void receive(AckMessage ack, ByteString neighborAddr) { - metricManager.receiveAckNum.increment(1D); + metricManager.receiveAckNum.increment(); metricManager.receiveAckBytes.increment(ack.getSerializedSize()); AntiEntropy neighborAntiEntropy = neighborMap.get(neighborAddr); if (neighborAntiEntropy != null) { diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java index 9201d72c1..7a639809c 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/store/CRDTStore.java @@ -72,6 +72,7 @@ public CRDTStore(CRDTStoreOptions options) { storeExecutor = options.storeExecutor(); String[] tags = new String[] {"store.id", storeId}; inflaterFactory = new CausalCRDTInflaterFactory( + options.id(), options.inflationInterval(), options.orHistoryExpireTime(), options.maxCompactionTime(), diff --git a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java index 4fc6aa7b4..a7672009e 100644 --- a/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java +++ b/base-crdt/base-crdt-store/src/main/java/org/apache/bifromq/basecrdt/util/Formatter.java @@ -38,33 +38,39 @@ public static String print(Replica replica) { } public static Supplier toPrintable(Replica replica) { - return () -> replica.getUri() + "-" + BaseEncoding.base32().encode(replica.getId().toByteArray()); + return () -> replica.getUri() + "-" + replica.hashCode(); } - public static String toPrintable(DeltaMessage delta) { - try { - return JsonFormat.printer().print(delta); - } catch (Exception e) { - // ignore - return delta.toString(); - } + public static Supplier toPrintable(DeltaMessage delta) { + return () -> { + try { + return JsonFormat.printer().print(delta); + } catch (Exception e) { + // ignore + return delta.toString(); + } + }; } - public static String toPrintable(AckMessage ack) { - try { - return JsonFormat.printer().print(ack); - } catch (Exception e) { - // ignore - return ack.toString(); - } + public static Supplier toPrintable(AckMessage ack) { + return () -> { + try { + return JsonFormat.printer().print(ack); + } catch (Exception e) { + // ignore + return ack.toString(); + } + }; } - public static String toPrintable(CRDTStoreMessage ack) { - try { - return JsonFormat.printer().print(ack); - } catch (Exception e) { - // ignore - return ack.toString(); - } + public static Supplier toPrintable(CRDTStoreMessage ack) { + return () -> { + try { + return JsonFormat.printer().print(ack); + } catch (Exception e) { + // ignore + return ack.toString(); + } + }; } } diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java index 398c4a70a..6adbba88f 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/benchmark/CRDTBenchmarkTemplate.java @@ -14,20 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.benchmark; import static com.google.protobuf.UnsafeByteOperations.unsafeWrap; -import org.apache.bifromq.basecrdt.core.internal.CausalCRDTInflaterFactory; import com.google.protobuf.ByteString; import java.io.IOException; import java.nio.ByteBuffer; import java.time.Duration; import java.util.concurrent.Executors; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecrdt.core.internal.CausalCRDTInflaterFactory; import org.openjdk.jmh.annotations.Setup; import org.openjdk.jmh.annotations.TearDown; import org.openjdk.jmh.runner.Runner; @@ -42,7 +42,7 @@ public abstract class CRDTBenchmarkTemplate { @Setup public void setup() throws IOException { - inflaterFactory = new CausalCRDTInflaterFactory( + inflaterFactory = new CausalCRDTInflaterFactory("testStoreId", Duration.ofMillis(200), Duration.ofSeconds(20), Duration.ofMillis(200), Executors.newSingleThreadScheduledExecutor()); doSetup(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java index c8e7d12e5..98293ad94 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/AWORSetTest.java @@ -25,12 +25,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.AWORSetOperation; -import org.apache.bifromq.basecrdt.core.api.IAWORSet; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.time.Duration; +import org.apache.bifromq.basecrdt.core.api.AWORSetOperation; +import org.apache.bifromq.basecrdt.core.api.IAWORSet; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; public class AWORSetTest extends CRDTTest { @@ -48,9 +48,9 @@ public class AWORSetTest extends CRDTTest { @Test public void testOperation() { - AWORSetInflater aworSetInflater = - new AWORSetInflater(leftReplica, newStateLattice(leftReplica, 1000), - executor, Duration.ofMillis(100)); + AWORSetInflater aworSetInflater = new AWORSetInflater("testStore", leftReplica, + newStateLattice(leftReplica, 1000), + executor, Duration.ofMillis(100)); IAWORSet aworSet = aworSetInflater.getCRDT(); assertEquals(aworSet.id(), leftReplica); @@ -82,11 +82,11 @@ public void testOperation() { @Test public void testJoin() { - AWORSetInflater leftInflater = new AWORSetInflater(leftReplica, + AWORSetInflater leftInflater = new AWORSetInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IAWORSet left = leftInflater.getCRDT(); - AWORSetInflater rightInflater = new AWORSetInflater(rightReplica, + AWORSetInflater rightInflater = new AWORSetInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100)); IAWORSet right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java index e0ffbf94f..6352f320a 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CCounterTest.java @@ -24,14 +24,14 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.CCounterOperation; -import org.apache.bifromq.basecrdt.core.api.ICCounter; -import org.apache.bifromq.basecrdt.proto.Replacement; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.observers.TestObserver; import java.time.Duration; import java.util.Optional; +import org.apache.bifromq.basecrdt.core.api.CCounterOperation; +import org.apache.bifromq.basecrdt.core.api.ICCounter; +import org.apache.bifromq.basecrdt.proto.Replacement; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; public class CCounterTest extends CRDTTest { @@ -46,7 +46,7 @@ public class CCounterTest extends CRDTTest { @Test public void testOperation() { - CCounterInflater cctrInflater = new CCounterInflater(leftReplica, + CCounterInflater cctrInflater = new CCounterInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); ICCounter cctr = cctrInflater.getCRDT(); assertEquals(cctr.id(), leftReplica); @@ -73,11 +73,11 @@ public void testOperation() { @Test public void testJoin() { - CCounterInflater leftInflater = new CCounterInflater(leftReplica, + CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica, newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100)); ICCounter left = leftInflater.getCRDT(); - CCounterInflater rightInflater = new CCounterInflater(rightReplica, + CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica, newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100)); ICCounter right = rightInflater.getCRDT(); @@ -101,11 +101,11 @@ public void testJoin() { @Test public void testZeroOut() { - CCounterInflater leftInflater = new CCounterInflater(leftReplica, + CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica, newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100)); ICCounter left = leftInflater.getCRDT(); - CCounterInflater rightInflater = new CCounterInflater(rightReplica, + CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica, newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100)); ICCounter right = rightInflater.getCRDT(); @@ -129,11 +129,11 @@ public void testZeroOut() { @Test public void testZeroOutInBatch() { - CCounterInflater leftInflater = new CCounterInflater(leftReplica, + CCounterInflater leftInflater = new CCounterInflater("leftStore", leftReplica, newStateLattice(leftReplica, 100000), executor, Duration.ofMillis(100)); ICCounter left = leftInflater.getCRDT(); - CCounterInflater rightInflater = new CCounterInflater(rightReplica, + CCounterInflater rightInflater = new CCounterInflater("rightStore", rightReplica, newStateLattice(rightReplica, 100000), executor, Duration.ofMillis(100)); ICCounter right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java index 1bd623184..5510283ee 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/CRDTTest.java @@ -19,8 +19,6 @@ package org.apache.bifromq.basecrdt.core.internal; -import org.apache.bifromq.basecrdt.proto.Replacement; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.common.util.concurrent.MoreExecutors; import java.time.Duration; import java.util.Optional; @@ -28,6 +26,8 @@ import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import org.apache.bifromq.basecrdt.proto.Replacement; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; @@ -45,7 +45,7 @@ public void tearDown() { } protected IReplicaStateLattice newStateLattice(Replica ownerReplica, long historyDurationInMS) { - return new InMemReplicaStateLattice(ownerReplica, + return new InMemReplicaStateLattice("storeId", ownerReplica, Duration.ofMillis(historyDurationInMS), Duration.ofMillis(200)); } diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java index 6f645d657..8a15a977a 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/DWFlagTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; @@ -25,12 +25,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.DWFlagOperation; -import org.apache.bifromq.basecrdt.core.api.IDWFlag; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.protobuf.ByteString; import java.time.Duration; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecrdt.core.api.DWFlagOperation; +import org.apache.bifromq.basecrdt.core.api.IDWFlag; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; @Slf4j @@ -46,7 +46,7 @@ public class DWFlagTest extends CRDTTest { @Test public void testOperation() { - DWFlagInflater dwFlagInflater = new DWFlagInflater(leftReplica, + DWFlagInflater dwFlagInflater = new DWFlagInflater("testStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IDWFlag dwFlag = dwFlagInflater.getCRDT(); assertEquals(dwFlag.id(), leftReplica); @@ -65,12 +65,12 @@ public void testOperation() { @Test public void testJoin() { - DWFlagInflater leftInflater = new DWFlagInflater(leftReplica, + DWFlagInflater leftInflater = new DWFlagInflater("store1", leftReplica, newStateLattice(leftReplica, 1000000), executor, Duration.ofMillis(100)); IDWFlag left = leftInflater.getCRDT(); - DWFlagInflater rightInflater = new DWFlagInflater(rightReplica, + DWFlagInflater rightInflater = new DWFlagInflater("store2", rightReplica, newStateLattice(rightReplica, 1000000), executor, Duration.ofMillis(100)); IDWFlag right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java index 86bdf261a..8e6f216e6 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/EWFlagTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; @@ -25,13 +25,13 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.EWFlagOperation; -import org.apache.bifromq.basecrdt.core.api.IEWFlag; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.protobuf.ByteString; import io.reactivex.rxjava3.observers.TestObserver; import java.time.Duration; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basecrdt.core.api.EWFlagOperation; +import org.apache.bifromq.basecrdt.core.api.IEWFlag; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; @Slf4j @@ -47,7 +47,7 @@ public class EWFlagTest extends CRDTTest { @Test public void testOperation() { - EWFlagInflater ewFlagInflater = new EWFlagInflater(leftReplica, + EWFlagInflater ewFlagInflater = new EWFlagInflater("testStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IEWFlag ewFlag = ewFlagInflater.getCRDT(); @@ -67,11 +67,11 @@ public void testOperation() { @Test public void testJoin() { - EWFlagInflater leftInflater = new EWFlagInflater(leftReplica, + EWFlagInflater leftInflater = new EWFlagInflater("lestStore", leftReplica, newStateLattice(leftReplica, 1000000), executor, Duration.ofMillis(100)); IEWFlag left = leftInflater.getCRDT(); - EWFlagInflater rightInflater = new EWFlagInflater(rightReplica, + EWFlagInflater rightInflater = new EWFlagInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000000), executor, Duration.ofMillis(100)); IEWFlag right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java index 3ce126904..6c0c91805 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/InMemReplicaStateLatticeTest.java @@ -14,46 +14,47 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; -import static org.apache.bifromq.basecrdt.core.internal.EventHistoryUtil.isRemembering; -import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.dot; -import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacement; -import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacements; -import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.singleDot; import static com.google.common.collect.Lists.newArrayList; import static com.google.common.collect.Sets.newHashSet; import static com.google.protobuf.ByteString.copyFromUtf8; import static java.util.Collections.emptyMap; import static java.util.Collections.singleton; +import static org.apache.bifromq.basecrdt.core.internal.EventHistoryUtil.isRemembering; +import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.dot; +import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacement; +import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.replacements; +import static org.apache.bifromq.basecrdt.core.internal.ProtoUtils.singleDot; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.proto.Replacement; -import org.apache.bifromq.basecrdt.proto.Replica; -import org.apache.bifromq.basecrdt.proto.StateLattice; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.time.Duration; import java.util.List; import java.util.Optional; import java.util.Set; +import org.apache.bifromq.basecrdt.proto.Replacement; +import org.apache.bifromq.basecrdt.proto.Replica; +import org.apache.bifromq.basecrdt.proto.StateLattice; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class InMemReplicaStateLatticeTest { - private InMemReplicaStateLattice testLattice; private final Replica ownerReplica = Replica.newBuilder().setId(copyFromUtf8("Owner")).build(); private final ByteString replicaA = copyFromUtf8("A"); private final ByteString replicaB = copyFromUtf8("B"); + private InMemReplicaStateLattice testLattice; @BeforeMethod public void setup() { - testLattice = new InMemReplicaStateLattice(ownerReplica, Duration.ofMillis(1000), Duration.ofMillis(200)); + testLattice = new InMemReplicaStateLattice("storeId", ownerReplica, Duration.ofMillis(1000), + Duration.ofMillis(200)); assertFalse(testLattice.lattices().hasNext()); } @@ -495,7 +496,7 @@ public void testCompact6() throws InterruptedException { } @Test - public void compact7() throws InterruptedException { + public void testCompact7() throws InterruptedException { Set states = newHashSet( replacement(dot(replicaA, 4, singleDot(replicaA, 4)), dot(replicaA, 3), dot(replicaA, 1)), replacement(dot(replicaA, 2), dot(replicaA, 1))); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java index 3f18b2793..cb99c3ea6 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/MVRegTest.java @@ -19,19 +19,19 @@ package org.apache.bifromq.basecrdt.core.internal; +import static java.util.Collections.emptyIterator; import static org.apache.bifromq.basecrdt.core.api.CRDTURI.toURI; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; -import static java.util.Collections.emptyIterator; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; -import org.apache.bifromq.basecrdt.core.api.IMVReg; -import org.apache.bifromq.basecrdt.core.api.MVRegOperation; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.time.Duration; +import org.apache.bifromq.basecrdt.core.api.IMVReg; +import org.apache.bifromq.basecrdt.core.api.MVRegOperation; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; public class MVRegTest extends CRDTTest { @@ -49,7 +49,7 @@ public class MVRegTest extends CRDTTest { @Test public void testOperation() { - MVRegInflater mvRegInflater = new MVRegInflater(leftReplica, + MVRegInflater mvRegInflater = new MVRegInflater("storeId", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IMVReg mvReg = mvRegInflater.getCRDT(); assertEquals(mvReg.id(), leftReplica); @@ -67,11 +67,11 @@ public void testOperation() { @Test public void testJoin() { - MVRegInflater leftInflater = new MVRegInflater(leftReplica, newStateLattice(leftReplica, 10000), + MVRegInflater leftInflater = new MVRegInflater("leftStore", leftReplica, newStateLattice(leftReplica, 10000), executor, Duration.ofMillis(100)); IMVReg left = leftInflater.getCRDT(); - MVRegInflater rightInflater = new MVRegInflater(rightReplica, newStateLattice(rightReplica, 10000), + MVRegInflater rightInflater = new MVRegInflater("rightStore", rightReplica, newStateLattice(rightReplica, 10000), executor, Duration.ofMillis(100)); IMVReg right = rightInflater.getCRDT(); @@ -96,11 +96,11 @@ public void testJoin() { @Test public void testJoin1() throws InterruptedException { - MVRegInflater leftInflater = new MVRegInflater(leftReplica, newStateLattice(leftReplica, 1000), + MVRegInflater leftInflater = new MVRegInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IMVReg left = leftInflater.getCRDT(); - MVRegInflater rightInflater = new MVRegInflater(rightReplica, newStateLattice(rightReplica, 1000), + MVRegInflater rightInflater = new MVRegInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100)); IMVReg right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java index 54afb219a..fd5c5ecc3 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/ORMapTest.java @@ -19,15 +19,23 @@ package org.apache.bifromq.basecrdt.core.internal; +import static java.util.Collections.emptySet; import static org.apache.bifromq.basecrdt.core.api.CRDTURI.toURI; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.mvreg; import static org.apache.bifromq.basecrdt.core.api.CausalCRDTType.ormap; -import static java.util.Collections.emptySet; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertNotEquals; import static org.testng.Assert.assertTrue; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.disposables.Disposable; +import java.time.Duration; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecrdt.core.api.AWORSetOperation; import org.apache.bifromq.basecrdt.core.api.CCounterOperation; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; @@ -44,14 +52,6 @@ import org.apache.bifromq.basecrdt.core.api.ORMapOperation; import org.apache.bifromq.basecrdt.core.api.RWORSetOperation; import org.apache.bifromq.basecrdt.proto.Replica; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; -import com.google.protobuf.ByteString; -import io.reactivex.rxjava3.disposables.Disposable; -import java.time.Duration; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import lombok.extern.slf4j.Slf4j; import org.testng.annotations.Test; @Slf4j @@ -76,7 +76,7 @@ public class ORMapTest extends CRDTTest { @Test public void testOperation() { - ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000), + ORMapInflater orMapInflater = new ORMapInflater("storeId", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap ormap = orMapInflater.getCRDT(); assertEquals(ormap.id(), leftReplica); @@ -191,11 +191,11 @@ public void testOperation() { @Test public void testJoin() { - ORMapInflater leftInflater = new ORMapInflater(leftReplica, + ORMapInflater leftInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap leftMap = leftInflater.getCRDT(); - ORMapInflater rightInflater = new ORMapInflater(rightReplica, + ORMapInflater rightInflater = new ORMapInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100)); IORMap rightMap = rightInflater.getCRDT(); @@ -286,11 +286,11 @@ public void testJoin() { @Test public void testJoinAfterCompaction() throws InterruptedException { - ORMapInflater leftInflater = new ORMapInflater(leftReplica, + ORMapInflater leftInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap leftMap = leftInflater.getCRDT(); - ORMapInflater rightInflater = new ORMapInflater(rightReplica, + ORMapInflater rightInflater = new ORMapInflater("rightStore", rightReplica, newStateLattice(rightReplica, 100), executor, Duration.ofMillis(100)); IORMap rightMap = rightInflater.getCRDT(); @@ -318,7 +318,7 @@ public void testJoinAfterCompaction() throws InterruptedException { @Test public void testSubCRDTGC() { - ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000), + ORMapInflater orMapInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap orMap = orMapInflater.getCRDT(); @@ -344,7 +344,7 @@ public void testSubCRDTGC() { @Test public void testInflationSubscriptionWhenGC() { - ORMapInflater orMapInflater = new ORMapInflater(leftReplica, newStateLattice(leftReplica, 1000), + ORMapInflater orMapInflater = new ORMapInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IORMap orMap = orMapInflater.getCRDT(); AtomicInteger inflationCount = new AtomicInteger(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java index 4d3320501..e677b8dda 100644 --- a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/core/internal/RWORSetTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basecrdt.core.internal; @@ -25,12 +25,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basecrdt.core.api.IRWORSet; -import org.apache.bifromq.basecrdt.core.api.RWORSetOperation; -import org.apache.bifromq.basecrdt.proto.Replica; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import java.time.Duration; +import org.apache.bifromq.basecrdt.core.api.IRWORSet; +import org.apache.bifromq.basecrdt.core.api.RWORSetOperation; +import org.apache.bifromq.basecrdt.proto.Replica; import org.testng.annotations.Test; public class RWORSetTest extends CRDTTest { @@ -49,7 +49,7 @@ public class RWORSetTest extends CRDTTest { @Test public void testOperation() { RWORSetInflater rworSetInflater = - new RWORSetInflater(leftReplica, newStateLattice(leftReplica, 1000), + new RWORSetInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IRWORSet rworSet = rworSetInflater.getCRDT(); assertEquals(rworSet.id(), leftReplica); @@ -82,11 +82,11 @@ public void testOperation() { @Test public void testJoin() { - RWORSetInflater leftInflater = new RWORSetInflater(leftReplica, + RWORSetInflater leftInflater = new RWORSetInflater("leftStore", leftReplica, newStateLattice(leftReplica, 1000), executor, Duration.ofMillis(100)); IRWORSet left = leftInflater.getCRDT(); - RWORSetInflater rightInflater = new RWORSetInflater(rightReplica, + RWORSetInflater rightInflater = new RWORSetInflater("rightStore", rightReplica, newStateLattice(rightReplica, 1000), executor, Duration.ofMillis(100)); IRWORSet right = rightInflater.getCRDT(); diff --git a/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java new file mode 100644 index 000000000..5d414a735 --- /dev/null +++ b/base-crdt/base-crdt-store/src/test/java/org/apache/bifromq/basecrdt/store/AntiEntropyResilienceTest.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.bifromq.basecrdt.store; + +import static org.awaitility.Awaitility.await; + +import com.google.common.collect.Sets; +import com.google.protobuf.ByteString; +import io.reactivex.rxjava3.core.Observable; +import io.reactivex.rxjava3.schedulers.Schedulers; +import io.reactivex.rxjava3.subjects.PublishSubject; +import io.reactivex.rxjava3.subjects.Subject; +import java.time.Duration; +import java.util.Collections; +import org.apache.bifromq.basecrdt.core.api.CRDTURI; +import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; +import org.apache.bifromq.basecrdt.core.api.IMVReg; +import org.apache.bifromq.basecrdt.core.api.IORMap; +import org.apache.bifromq.basecrdt.core.api.MVRegOperation; +import org.apache.bifromq.basecrdt.core.api.ORMapOperation; +import org.apache.bifromq.basecrdt.proto.Replica; +import org.apache.bifromq.basecrdt.store.compressor.GzipCompressor; +import org.apache.bifromq.basecrdt.store.proto.CRDTStoreMessage; +import org.apache.bifromq.basecrdt.store.proto.MessagePayload; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.Test; + +public class AntiEntropyResilienceTest { + private ICRDTStore storeA; + private ICRDTStore storeB; + private Subject chAB; + private Subject chBA; + + @AfterMethod(alwaysRun = true) + public void teardown() { + if (storeA != null) { + storeA.stop(); + storeA = null; + } + if (storeB != null) { + storeB.stop(); + storeB = null; + } + } + + @Test(groups = "integration") + public void testConvergeWithDroppedAckOnce() { + CRDTStoreOptions optsA = CRDTStoreOptions.builder() + .inflationInterval(Duration.ofMillis(50)) + .maxEventsInDelta(16) + .build(); + CRDTStoreOptions optsB = CRDTStoreOptions.builder() + .inflationInterval(Duration.ofMillis(50)) + .maxEventsInDelta(16) + .build(); + storeA = ICRDTStore.newInstance(optsA); + storeB = ICRDTStore.newInstance(optsB); + + chAB = PublishSubject.create().toSerialized(); + chBA = PublishSubject.create().toSerialized(); + + // Interpose B->A path to drop the first ACK intentionally to exercise resend/late-ack path + GzipCompressor compressor = new GzipCompressor(); + final boolean[] firstAckDropped = {false}; + + // Start stores with the interposed channels + storeA.start(chBA); + storeB.start(chAB + .flatMap(msg -> { + // inspect payload; if it's ACK and first time, drop it once + MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg); + if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.ACK && !firstAckDropped[0]) { + firstAckDropped[0] = true; + // drop this ack + return Observable.empty(); + } + return Observable.just(msg); + })); + + storeA.storeMessages() + .observeOn(Schedulers.single()) + .subscribe(chAB::onNext); + storeB.storeMessages() + .observeOn(Schedulers.single()) + .subscribe(chBA::onNext); + + String uri = CRDTURI.toURI(CausalCRDTType.ormap, "test"); + // Build replicas + Replica rA = ReplicaIdGenerator.generate(uri); + Replica rB = ReplicaIdGenerator.generate(uri); + ByteString addrA = ByteString.copyFromUtf8("A"); + ByteString addrB = ByteString.copyFromUtf8("B"); + + // Host replicas + IORMap ormapA = storeA.host(rA, addrA); + IORMap ormapB = storeB.host(rB, addrB); + + // Join neighbors + storeA.join(rA, Collections.singleton(addrB)); + storeB.join(rB, Collections.singleton(addrA)); + + // Write a value from A + ByteString key = ByteString.copyFromUtf8("k"); + ByteString val = ByteString.copyFromUtf8("v1"); + ormapA.execute(ORMapOperation.update(key).with(MVRegOperation.write(val))).join(); + + await().until(() -> { + IMVReg regB = ormapB.getMVReg(key); + ByteString read = Sets.newHashSet(regB.read()).stream().findFirst().orElse(ByteString.EMPTY); + return val.equals(read); + }); + } + + @Test(groups = "integration") + public void testConvergeWithLateUnmatchedAck() { + CRDTStoreOptions optsC = CRDTStoreOptions.builder() + .inflationInterval(Duration.ofMillis(50)) + .maxEventsInDelta(16) + .build(); + CRDTStoreOptions optsD = CRDTStoreOptions.builder() + .inflationInterval(Duration.ofMillis(50)) + .maxEventsInDelta(16) + .build(); + ICRDTStore storeC = ICRDTStore.newInstance(optsC); + ICRDTStore storeD = ICRDTStore.newInstance(optsD); + + Subject cToD = PublishSubject.create().toSerialized(); + Subject dToC = PublishSubject.create().toSerialized(); + + GzipCompressor compressor = new GzipCompressor(); + final CRDTStoreMessage[] delayedAck = {null}; + final int[] deltaCountFromC = {0}; + + // Wire inbound with logic: buffer first ACK from D->C, only deliver after second DELTA from C + storeC.start(dToC + .flatMap(msg -> { + MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg); + if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.ACK && delayedAck[0] == null) { + delayedAck[0] = msg; // buffer first ACK + return Observable.empty(); + } + return Observable.just(msg); + })); + storeD.start(cToD + .flatMap(msg -> { + MessagePayload payload = MessagePayloadUtil.decompress(compressor, msg); + if (payload.getMsgTypeCase() == MessagePayload.MsgTypeCase.DELTA) { + deltaCountFromC[0]++; + if (deltaCountFromC[0] >= 2 && delayedAck[0] != null) { + CRDTStoreMessage ack = delayedAck[0]; + delayedAck[0] = null; + dToC.onNext(ack); + } + } + return Observable.just(msg); + })); + + storeC.storeMessages().observeOn(Schedulers.single()).subscribe(cToD::onNext); + storeD.storeMessages().observeOn(Schedulers.single()).subscribe(dToC::onNext); + + // Host replicas + String uri = CRDTURI.toURI(CausalCRDTType.ormap, "test-late-ack"); + Replica rC = ReplicaIdGenerator.generate(uri); + Replica rD = ReplicaIdGenerator.generate(uri); + ByteString addrC = ByteString.copyFromUtf8("C"); + ByteString addrD = ByteString.copyFromUtf8("D"); + IORMap ormapC = storeC.host(rC, addrC); + IORMap ormapD = storeD.host(rD, addrD); + storeC.join(rC, Collections.singleton(addrD)); + storeD.join(rD, Collections.singleton(addrC)); + + // Write on C + ByteString key = ByteString.copyFromUtf8("k2"); + ByteString val = ByteString.copyFromUtf8("v2"); + ormapC.execute(ORMapOperation.update(key).with(MVRegOperation.write(val))).join(); + + // Await convergence on D even though first ACK is delivered late and unmatched + await().until(() -> { + IMVReg regD = ormapD.getMVReg(key); + ByteString read = Sets.newHashSet(regD.read()).stream().findFirst().orElse(ByteString.EMPTY); + return val.equals(read); + }); + + storeC.stop(); + storeD.stop(); + } +} diff --git a/base-kv/base-kv-meta-service/pom.xml b/base-kv/base-kv-meta-service/pom.xml index 97e4c3ac3..55d6f75a6 100644 --- a/base-kv/base-kv-meta-service/pom.xml +++ b/base-kv/base-kv-meta-service/pom.xml @@ -39,6 +39,14 @@ org.apache.bifromq base-kv-type-proto + + org.apache.bifromq + base-logger + + + org.apache.bifromq + base-util + io.reactivex.rxjava3 rxjava diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java index cc6523180..0cbde7bd7 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeCRDT.java @@ -37,7 +37,7 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; -import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IMVReg; import org.apache.bifromq.basecrdt.core.api.IORMap; @@ -47,21 +47,39 @@ import org.apache.bifromq.basecrdt.service.ICRDTService; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.apache.bifromq.basekv.proto.StoreKey; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVLandscapeCRDT implements IBaseKVLandscapeCRDT { + private final String clusterId; + private final Logger log; private final ICRDTService crdtService; private final IORMap landscapeORMap; private final BehaviorSubject> landscapeSubject = BehaviorSubject.create(); private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVLandscapeCRDT(String clusterId, ICRDTService crdtService) { + this.clusterId = clusterId; + this.log = MDCLogger.getLogger(BaseKVLandscapeCRDT.class, "clusterId", clusterId); this.crdtService = crdtService; this.landscapeORMap = crdtService.host(toLandscapeURI(clusterId)); disposable.add(landscapeORMap.inflation() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) .map(this::buildLandscape) .subscribe(landscapeSubject::onNext)); + disposable.add(Observable.combineLatest(landscape(), aliveReplicas(), (StoreDescriptorAndReplicas::new)) + .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) + .subscribe(this::houseKeep)); + } + + @Override + public String clusterId() { + return clusterId; + } + + @Override + public Observable refreshSignal() { + return crdtService.refreshSignal(); } public Observable> aliveReplicas() { @@ -124,4 +142,31 @@ private Optional buildLandscape(IMVReg mvReg) { l.sort((a, b) -> Long.compareUnsigned(b.getHlc(), a.getHlc())); return Optional.ofNullable(l.isEmpty() ? null : l.get(0)); } + + private void houseKeep(StoreDescriptorAndReplicas storeDescriptorAndReplicas) { + Map storedDescriptors = storeDescriptorAndReplicas.descriptorMap; + Set aliveReplicas = storeDescriptorAndReplicas.replicaIds; + for (StoreKey storeKey : storedDescriptors.keySet()) { + if (!aliveReplicas.contains(storeKey.getReplicaId()) + && shouldClean(aliveReplicas, storeKey.getReplicaId())) { + log.debug("store[{}] is not alive, remove its descriptor", storeKey.getStoreId()); + removeDescriptor(storeKey); + } + } + } + + private boolean shouldClean(Set aliveReplicas, ByteString failedReplicas) { + // Choose cleaner deterministically from the identical aliveReplicas set across nodes. + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodes(aliveReplicas) + .build(); + ByteString cleaner = hash.get(failedReplicas); + return cleaner != null && cleaner.equals(landscapeORMap.id().getId()); + } + + private record StoreDescriptorAndReplicas(Map descriptorMap, + Set replicaIds) { + } } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java index 6f529b78a..57bde6ba3 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeObserver.java @@ -26,13 +26,17 @@ import java.util.Map; import java.util.Optional; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; class BaseKVLandscapeObserver implements IBaseKVLandscapeObserver { + private final Logger log; private final BehaviorSubject> landscapeSubject = BehaviorSubject.create(); private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVLandscapeObserver(IBaseKVLandscapeCRDT landscapeCRDT) { + this.log = MDCLogger.getLogger(BaseKVLandscapeObserver.class, "clusterId", landscapeCRDT.clusterId()); disposable.add(landscapeCRDT.landscape() .map(descriptorMap -> { Map descriptorMapByStoreId = new HashMap<>(); @@ -42,6 +46,7 @@ class BaseKVLandscapeObserver implements IBaseKVLandscapeObserver { } return v.getHlc() > value.getHlc() ? v : value; })); + log.debug("Landscape changed: {}", descriptorMapByStoreId); return descriptorMapByStoreId; }) .subscribe(landscapeSubject::onNext)); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java index 3c45e0a4c..903474fef 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReporter.java @@ -19,68 +19,61 @@ package org.apache.bifromq.basekv.metaservice; -import com.google.protobuf.ByteString; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.CompositeDisposable; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.concurrent.CompletableFuture; -import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.apache.bifromq.basekv.proto.StoreKey; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVLandscapeReporter implements IBaseKVLandscapeReporter { + private final Logger log; private final String storeId; private final IBaseKVLandscapeCRDT landscapeCRDT; private final CompositeDisposable disposable = new CompositeDisposable(); private volatile KVRangeStoreDescriptor latestDescriptor; BaseKVLandscapeReporter(String storeId, IBaseKVLandscapeCRDT landscapeCRDT) { + this.log = MDCLogger.getLogger(BaseKVLandscapeReporter.class, "clusterId", landscapeCRDT.clusterId(), + "storeId", storeId); this.storeId = storeId; this.landscapeCRDT = landscapeCRDT; - disposable.add(Observable.combineLatest( - landscapeCRDT.landscape(), - landscapeCRDT.aliveReplicas(), - (StoreDescriptorAndReplicas::new)) + disposable.add(landscapeCRDT.landscape() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) - .subscribe(this::houseKeep)); + .subscribe(this::afterInflation)); } @Override public CompletableFuture report(KVRangeStoreDescriptor descriptor) { Optional descriptorOnCRDT = landscapeCRDT.getStoreDescriptor(descriptor.getId()); if (descriptorOnCRDT.isEmpty() || !descriptorOnCRDT.get().equals(descriptor)) { + this.latestDescriptor = descriptor; return landscapeCRDT.setStoreDescriptor(descriptor); } return CompletableFuture.completedFuture(null); } + @Override + public Observable refreshSignal() { + return landscapeCRDT.refreshSignal(); + } + @Override public void stop() { landscapeCRDT.removeDescriptor(storeId).join(); disposable.dispose(); } - private void houseKeep(StoreDescriptorAndReplicas storeDescriptorAndReplicas) { - Map storedDescriptors = storeDescriptorAndReplicas.descriptorMap; - Set aliveReplicas = storeDescriptorAndReplicas.replicaIds; - for (StoreKey storeKey : storedDescriptors.keySet()) { - if (!aliveReplicas.contains(storeKey.getReplicaId())) { - log.debug("store[{}] is not alive, remove its descriptor", storeKey.getStoreId()); - landscapeCRDT.removeDescriptor(storeKey); - } - } + private void afterInflation(Map storedDescriptors) { if (!storedDescriptors.containsKey(landscapeCRDT.toDescriptorKey(storeId))) { KVRangeStoreDescriptor latestDescriptor = this.latestDescriptor; if (latestDescriptor != null) { + log.debug("Rectify missing store descriptor"); landscapeCRDT.setStoreDescriptor(latestDescriptor); } } } - - private record StoreDescriptorAndReplicas(Map descriptorMap, - Set replicaIds) { - } } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java index f541c9d62..9cbec5f55 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesCRDT.java @@ -39,7 +39,7 @@ import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; -import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IMVReg; import org.apache.bifromq.basecrdt.core.api.IORMap; @@ -50,9 +50,12 @@ import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; import org.apache.bifromq.basekv.proto.StoreKey; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVStoreBalancerStatesCRDT implements IBaseKVStoreBalancerStatesCRDT { + private final String clusterId; + private final Logger log; private final ICRDTService crdtService; // key: storeId, value: Map of balancerClassFQN -> BalancerState private final IORMap balancerStatesByStoreORMap; @@ -61,12 +64,30 @@ class BaseKVStoreBalancerStatesCRDT implements IBaseKVStoreBalancerStatesCRDT { private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVStoreBalancerStatesCRDT(String clusterId, ICRDTService crdtService) { + this.clusterId = clusterId; + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesCRDT.class, "clusterId", clusterId); this.crdtService = crdtService; this.balancerStatesByStoreORMap = crdtService.host(toBalancerStateURI(clusterId)); disposable.add(balancerStatesByStoreORMap.inflation() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) .map(this::buildBalancerStateSnapshots) .subscribe(balancerStatesSubject::onNext)); + disposable.add(Observable.combineLatest( + this.currentBalancerStates(), + this.aliveReplicas(), + (StateSnapshotsAndReplicas::new)) + .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) + .subscribe(this::houseKeep)); + } + + @Override + public String clusterId() { + return clusterId; + } + + @Override + public Observable refuteSignal() { + return crdtService.refreshSignal(); } public Observable> aliveReplicas() { @@ -156,4 +177,31 @@ private Map> buildBalancerStateSnap })); return currentBalancerStates; } + + private void houseKeep(StateSnapshotsAndReplicas stateSnapshotsAndReplicas) { + Map> observed = stateSnapshotsAndReplicas.observed; + Set aliveReplicas = stateSnapshotsAndReplicas.replicaIds; + for (StoreKey storeKey : observed.keySet()) { + if (!aliveReplicas.contains(storeKey.getReplicaId()) + && shouldClean(aliveReplicas, storeKey.getReplicaId())) { + log.debug("store[{}] is not alive, remove its balancer states", storeKey.getStoreId()); + this.removeStore(storeKey); + } + } + } + + private boolean shouldClean(Set aliveReplicas, ByteString failedReplicas) { + // Choose cleaner deterministically from the identical aliveReplicas set across nodes. + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodes(aliveReplicas) + .build(); + ByteString cleaner = hash.get(failedReplicas); + return cleaner != null && cleaner.equals(balancerStatesByStoreORMap.id().getId()); + } + + private record StateSnapshotsAndReplicas(Map> observed, + Set replicaIds) { + } } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java index abe81fb12..b9fa55de0 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesObserver.java @@ -27,13 +27,17 @@ import java.util.HashMap; import java.util.Map; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; class BaseKVStoreBalancerStatesObserver implements IBaseKVStoreBalancerStatesObserver { + private final Logger log; private final BehaviorSubject>> currentBalancerStatesSubject = BehaviorSubject.createDefault(emptyMap()); private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVStoreBalancerStatesObserver(IBaseKVStoreBalancerStatesCRDT statesCRDT) { + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesObserver.class, "clusterId", statesCRDT.clusterId()); disposable.add(statesCRDT.currentBalancerStates() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) .map(statesMap -> { @@ -49,6 +53,7 @@ class BaseKVStoreBalancerStatesObserver implements IBaseKVStoreBalancerStatesObs } return balancerStates; })); + log.debug("Current balancer states changed: {}", currentStates); return currentStates; }) .subscribe(currentBalancerStatesSubject::onNext)); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java index 069b5be38..767dd7e06 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposalCRDT.java @@ -36,7 +36,6 @@ import java.util.Objects; import java.util.Optional; import java.util.concurrent.CompletableFuture; -import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecrdt.core.api.CausalCRDTType; import org.apache.bifromq.basecrdt.core.api.IMVReg; import org.apache.bifromq.basecrdt.core.api.IORMap; @@ -45,9 +44,12 @@ import org.apache.bifromq.basecrdt.service.ICRDTService; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerStatesProposalCRDT { + private final String clusterId; + private final Logger log; private final ICRDTService crdtService; // key: balancerClassFQN, value: BalancerState private final IORMap expectedBalancerStatesORMap; @@ -56,6 +58,8 @@ class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerState private final CompositeDisposable disposable = new CompositeDisposable(); BaseKVStoreBalancerStatesProposalCRDT(String clusterId, ICRDTService crdtService) { + this.clusterId = clusterId; + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesProposalCRDT.class, "clusterId", clusterId); this.crdtService = crdtService; this.expectedBalancerStatesORMap = crdtService.host(toBalancerStateProposalURI(clusterId)); disposable.add(expectedBalancerStatesORMap.inflation() @@ -64,6 +68,11 @@ class BaseKVStoreBalancerStatesProposalCRDT implements IBaseKVStoreBalancerState .subscribe(expectedBalancerStatesSubject::onNext)); } + @Override + public String clusterId() { + return clusterId; + } + public Observable> expectedBalancerStates() { return expectedBalancerStatesSubject.distinctUntilChanged(); } @@ -115,6 +124,7 @@ private Map buildExpectedBalancerStateSnapshots(l balancerStateOpt.ifPresent(stateSnapshot -> balancerStatesMap.put(balancerClassFQN, stateSnapshot)); }); + log.debug("Expected balancer states changed: {}", balancerStatesMap); return balancerStatesMap; } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java index 20238789e..fb12538c3 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesProposer.java @@ -24,11 +24,15 @@ import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; class BaseKVStoreBalancerStatesProposer implements IBaseKVStoreBalancerStatesProposer { + private final Logger log; private final IBaseKVStoreBalancerStatesProposalCRDT proposalCRDT; BaseKVStoreBalancerStatesProposer(IBaseKVStoreBalancerStatesProposalCRDT proposalCRDT) { + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesProposer.class, "clusterId", proposalCRDT.clusterId()); this.proposalCRDT = proposalCRDT; } @@ -76,6 +80,7 @@ public void stop() { private CompletableFuture proposeBalancerState(String balancerFactoryClass, BalancerStateSnapshot state) { + log.debug("Propose balancer state: balancerClass={}, state={}", balancerFactoryClass, state); CompletableFuture resultFuture = new CompletableFuture<>(); long now = state.getHlc(); proposalCRDT.expectedBalancerStates() diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java index c4a192d0a..a2531f52c 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/BaseKVStoreBalancerStatesReporter.java @@ -19,34 +19,32 @@ package org.apache.bifromq.basekv.metaservice; -import com.google.protobuf.ByteString; import com.google.protobuf.Struct; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.CompositeDisposable; import java.util.Map; -import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; -import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basekv.proto.BalancerStateSnapshot; import org.apache.bifromq.basekv.proto.StoreKey; +import org.apache.bifromq.logger.MDCLogger; +import org.slf4j.Logger; -@Slf4j class BaseKVStoreBalancerStatesReporter implements IBaseKVStoreBalancerStatesReporter { + private final Logger log; private final String storeId; private final IBaseKVStoreBalancerStatesCRDT statesCRDT; private final CompositeDisposable disposable = new CompositeDisposable(); private final Map latestState = new ConcurrentHashMap<>(); BaseKVStoreBalancerStatesReporter(String storeId, IBaseKVStoreBalancerStatesCRDT statesCRDT) { + this.log = MDCLogger.getLogger(BaseKVStoreBalancerStatesReporter.class, "clusterId", statesCRDT.clusterId(), + "storeId", storeId); this.storeId = storeId; this.statesCRDT = statesCRDT; - disposable.add(Observable.combineLatest( - statesCRDT.currentBalancerStates(), - statesCRDT.aliveReplicas(), - (StateSnapshotsAndReplicas::new)) + disposable.add(statesCRDT.currentBalancerStates() .observeOn(IBaseKVMetaService.SHARED_SCHEDULER) - .subscribe(this::houseKeep)); + .subscribe(this::afterInflation)); } @Override @@ -64,32 +62,26 @@ public CompletableFuture reportBalancerState(String balancerFactoryClassFQ return CompletableFuture.completedFuture(null); } + @Override + public Observable refreshSignal() { + return statesCRDT.refuteSignal(); + } + @Override public void stop() { statesCRDT.removeStore(storeId).join(); disposable.dispose(); } - private void houseKeep(StateSnapshotsAndReplicas stateSnapshotsAndReplicas) { - Map> observed = stateSnapshotsAndReplicas.observed; - Set aliveReplicas = stateSnapshotsAndReplicas.replicaIds; - for (StoreKey storeKey : observed.keySet()) { - if (!aliveReplicas.contains(storeKey.getReplicaId())) { - log.debug("store[{}] is not alive, remove its balancer states", storeKey.getStoreId()); - statesCRDT.removeStore(storeKey); - } - } + private void afterInflation(Map> observed) { if (!observed.containsKey(statesCRDT.toDescriptorKey(storeId))) { + log.debug("Rectify missing store balancer states"); latestState.forEach((balancerClassFQN, balancerState) -> statesCRDT.setStoreBalancerState(storeId, balancerClassFQN, balancerState.enable(), balancerState.loadRules())); } } - private record StateSnapshotsAndReplicas(Map> observed, - Set replicaIds) { - } - private record BalancerState(boolean enable, Struct loadRules) { } diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java index 1a58c7e94..4fc27eb26 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeCRDT.java @@ -32,6 +32,19 @@ * The interface of a BaseKV landscape CRDT. */ public interface IBaseKVLandscapeCRDT { + /** + * The id of base-kv cluster. + * @return the cluster id + */ + String clusterId(); + + /** + * A signal to refresh the landscape CRDT. + * + * @return the observable of the signal + */ + Observable refreshSignal(); + /** * Get the observable of alive replicas of landscape CRDT. * diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java index 5bbdf5b7e..7b39b0c9b 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVLandscapeReporter.java @@ -19,6 +19,7 @@ package org.apache.bifromq.basekv.metaservice; +import io.reactivex.rxjava3.core.Observable; import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; @@ -33,6 +34,13 @@ public interface IBaseKVLandscapeReporter { */ CompletableFuture report(KVRangeStoreDescriptor descriptor); + /** + * A signal to refresh the landscape reporter's state. + * + * @return an observable that emits a timestamp when the reporter should refresh its state + */ + Observable refreshSignal(); + /** * Stop the reporter. */ diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java index a0e44b963..9038a307a 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesCRDT.java @@ -32,6 +32,10 @@ * The interface of a BaseKV store balancer states CRDT. */ public interface IBaseKVStoreBalancerStatesCRDT { + String clusterId(); + + Observable refuteSignal(); + Observable> aliveReplicas(); Observable>> currentBalancerStates(); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java index 3ca2d36e2..4bde0de7a 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesProposalCRDT.java @@ -30,6 +30,7 @@ * The interface of a BaseKV store balancer states CRDT. */ public interface IBaseKVStoreBalancerStatesProposalCRDT { + String clusterId(); Observable> expectedBalancerStates(); diff --git a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java index c4ca82670..9e6891f16 100644 --- a/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java +++ b/base-kv/base-kv-meta-service/src/main/java/org/apache/bifromq/basekv/metaservice/IBaseKVStoreBalancerStatesReporter.java @@ -20,6 +20,7 @@ package org.apache.bifromq.basekv.metaservice; import com.google.protobuf.Struct; +import io.reactivex.rxjava3.core.Observable; import java.util.concurrent.CompletableFuture; /** @@ -36,6 +37,13 @@ public interface IBaseKVStoreBalancerStatesReporter { */ CompletableFuture reportBalancerState(String balancerFactoryClassFQN, boolean disable, Struct loadRules); + /** + * A signal to refresh the reporter's state. + * + * @return an observable that emits a timestamp when the reporter should refresh its state + */ + Observable refreshSignal(); + /** * Stop the reporter. */ diff --git a/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java b/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java index f28fbea46..87aaf741f 100644 --- a/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java +++ b/base-kv/base-kv-meta-service/src/test/java/org/apache/bifromq/basekv/metaservice/BaseKVLandscapeReportTest.java @@ -21,9 +21,7 @@ import static org.awaitility.Awaitility.await; import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; -import java.util.Collections; import java.util.Map; import org.apache.bifromq.basecluster.AgentHostOptions; import org.apache.bifromq.basecluster.IAgentHost; @@ -80,7 +78,6 @@ public void stop() { await().until(() -> observer.getStoreDescriptor(descriptor.getId()).isPresent()); reporter.stop(); - assertEquals(Collections.emptyMap(), observer.landscape().blockingFirst()); - assertTrue(observer.getStoreDescriptor(descriptor.getId()).isEmpty()); + await().until(() -> observer.landscape().blockingFirst().isEmpty()); } } diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java index 62bf66661..c488260e1 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/RaftNodeStateFollower.java @@ -14,11 +14,21 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft; +import com.google.protobuf.ByteString; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.TreeMap; +import java.util.concurrent.CompletableFuture; import org.apache.bifromq.basekv.raft.exception.ClusterConfigChangeException; import org.apache.bifromq.basekv.raft.exception.DropProposalException; import org.apache.bifromq.basekv.raft.exception.LeaderTransferException; @@ -39,16 +49,6 @@ import org.apache.bifromq.basekv.raft.proto.RequestVote; import org.apache.bifromq.basekv.raft.proto.Snapshot; import org.apache.bifromq.basekv.raft.proto.Voting; -import com.google.protobuf.ByteString; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.TreeMap; -import java.util.concurrent.CompletableFuture; class RaftNodeStateFollower extends RaftNodeState { private final TreeMap stabilizingIndexes = new TreeMap<>(Long::compareTo); @@ -387,7 +387,7 @@ void changeClusterConfig(String correlateId, void onSnapshotRestored(ByteString requested, ByteString installed, Throwable ex, CompletableFuture onDone) { if (currentISSRequest == null) { log.debug("Snapshot installation request not found"); - onDone.completeExceptionally(new SnapshotException("No snapshot installation request")); + onDone.completeExceptionally(SnapshotException.noSnapshot()); return; } InstallSnapshot iss = currentISSRequest; @@ -398,7 +398,7 @@ void onSnapshotRestored(ByteString requested, ByteString installed, Throwable ex onDone.completeExceptionally(ex); } else { log.debug("Obsolete snapshot installation"); - onDone.completeExceptionally(new SnapshotException("Obsolete snapshot installed by FSM")); + onDone.completeExceptionally(SnapshotException.obsolete()); } return; } diff --git a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java index ec397ed25..3f4bdf66d 100644 --- a/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java +++ b/base-kv/base-kv-raft/src/main/java/org/apache/bifromq/basekv/raft/exception/SnapshotException.java @@ -14,17 +14,47 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.raft.exception; +/** + * Exception thrown during snapshot operations in the Raft protocol. + * This exception can indicate that a snapshot is obsolete or has other issues. + */ public class SnapshotException extends RuntimeException { - public SnapshotException(String message) { + private SnapshotException(String message) { super(message); } - public SnapshotException(Throwable e) { + private SnapshotException(Throwable e) { super(e); } + + public static ObsoleteSnapshotException obsolete() { + return new ObsoleteSnapshotException(); + } + + public static NoSnapshotException noSnapshot() { + return new NoSnapshotException(); + } + + /** + * Exception indicating that no snapshot is available for installation. + */ + public static class NoSnapshotException extends SnapshotException { + private NoSnapshotException() { + super("No snapshot available"); + } + } + + /** + * Exception indicating that the snapshot is obsolete by a newer snapshot during installation. + */ + public static class ObsoleteSnapshotException extends SnapshotException { + private ObsoleteSnapshotException() { + super("The installed snapshot has been obsoleted by a newer snapshot"); + } + } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java index 5e511d11a..5f73f98e5 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/KVStoreBalanceController.java @@ -31,6 +31,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; @@ -120,7 +121,7 @@ public KVStoreBalanceController(IBaseKVMetaService metaService, this.customBalancerFactories = Lists.newArrayList(factories); this.builtinBalancerFactories = Lists.newArrayList( new RangeBootstrapBalancerFactory(bootstrapDelay), - new RedundantRangeRemovalBalancerFactory(), + new RedundantRangeRemovalBalancerFactory(zombieProbeDelay), new UnreachableReplicaRemovalBalancerFactory(zombieProbeDelay)); this.statesProposal = metaService.balancerStatesProposal(storeClient.clusterId()); this.balancers = new HashMap<>(); @@ -137,7 +138,8 @@ public void start(String localStoreId) { if (state.compareAndSet(State.Init, State.Started)) { this.localStoreId = localStoreId; statesReporter = metaService.balancerStatesReporter(storeClient.clusterId(), localStoreId); - log = MDCLogger.getLogger("balancer.logger", "clusterId", storeClient.clusterId(), "storeId", localStoreId); + log = MDCLogger.getLogger("balancer.logger", + "clusterId", storeClient.clusterId(), "storeId", localStoreId, "balancer", "CONTROLLER"); for (IStoreBalancerFactory factory : builtinBalancerFactories) { StoreBalancer balancer = factory.newBalancer(storeClient.clusterId(), localStoreId); @@ -146,12 +148,12 @@ public void start(String localStoreId) { } for (IStoreBalancerFactory factory : customBalancerFactories) { String balancerFactoryFQN = factory.getClass().getName(); - log.info("Create balancer from factory: {}", balancerFactoryFQN); StoreBalancer balancer = factory.newBalancer(storeClient.clusterId(), localStoreId); + log.info("Create balancer[{}] from factory: {}", balancer.getClass().getName(), balancerFactoryFQN); if (balancer instanceof RangeBootstrapBalancer || balancer instanceof RedundantRangeRemovalBalancer || balancer instanceof UnreachableReplicaRemovalBalancer) { - log.warn("{} should not be created from custom balancer factory", + log.warn("Builtin balancer[{}] should not be created from custom balancer factory", balancer.getClass().getSimpleName()); continue; } @@ -163,14 +165,28 @@ public void start(String localStoreId) { log.info("BalancerController start"); disposables.add(statesProposal.expectedBalancerStates() .subscribe(currentExpected -> { + log.trace("Expected balancer states changed: {}", currentExpected); this.expectedBalancerStates = currentExpected; trigger(); })); disposables.add(storeClient.describe().subscribe(descriptors -> { + log.trace("Landscape changed: {}", descriptors); this.landscape = descriptors; trimRangeHistory(descriptors); trigger(); })); + disposables.add(statesReporter.refreshSignal() + .subscribe(ts -> { + for (Map.Entry entry : balancers.entrySet()) { + String balancerFacClassFQN = entry.getKey(); + StoreBalancerState balancerState = entry.getValue(); + if (!balancerState.isBuiltin) { + log.debug("Report balancer state for {}", balancerFacClassFQN); + statesReporter.reportBalancerState(balancerFacClassFQN, + balancerState.disabled.get(), balancerState.loadRules.get()); + } + } + })); } } @@ -199,6 +215,10 @@ public void stop() { private void trigger() { if (state.get() == State.Started && scheduling.compareAndSet(false, true)) { long jitter = ThreadLocalRandom.current().nextLong(0, retryDelay.toMillis()); + if (task != null && !task.isDone()) { + log.trace("Cancel scheduled balance task"); + task.cancel(true); + } task = executor.schedule(this::updateAndBalance, jitter, TimeUnit.MILLISECONDS); } } @@ -208,6 +228,9 @@ private void updateAndBalance() { Set landscape = this.landscape; if (landscape == null || landscape.isEmpty()) { scheduling.set(false); + if (!Objects.equals(this.landscape, landscape)) { + trigger(); + } return; } for (Map.Entry entry : balancers.entrySet()) { @@ -222,18 +245,25 @@ private void updateAndBalance() { Struct loadRules = balancerState.loadRules.get(); boolean needReport = false; if (balancerState.disabled.get() != disable) { - log.info("Balancer[{}] is {}", balancerFacClassFQN, disable ? "disabled" : "enabled"); + log.info("Balancer[{}] is {}", balancerState.balancer.getClass().getSimpleName(), + disable ? "disabled" : "enabled"); balancerState.disabled.set(disable); needReport = true; } - Struct expectedLoadRules = expectedState.getLoadRules(); - if (!loadRules.equals(expectedLoadRules) - && balancerState.balancer.validate(expectedLoadRules)) { - loadRules = expectedLoadRules; - // report the balancer state - balancerState.loadRules.set(expectedLoadRules); - balancerState.balancer.update(expectedLoadRules); - needReport = true; + Struct expectedLoadRules = loadRules.toBuilder() + .mergeFrom(expectedState.getLoadRules()) + .build(); + if (!loadRules.equals(expectedLoadRules)) { + if (balancerState.balancer.validate(expectedLoadRules)) { + loadRules = expectedLoadRules; + // report the balancer state + balancerState.loadRules.set(expectedLoadRules); + balancerState.balancer.update(expectedLoadRules); + needReport = true; + } else { + log.warn("Balancer[{}] load rules not valid: {}", + balancerState.balancer.getClass().getSimpleName(), expectedLoadRules); + } } if (needReport) { statesReporter.reportBalancerState(balancerFacClassFQN, disable, loadRules); @@ -245,7 +275,7 @@ private void updateAndBalance() { } balancerState.balancer.update(landscape); } catch (Throwable e) { - log.error("Balancer[{}] update failed", balancerFacClassFQN, e); + log.error("Balancer[{}] update failed", balancerState.balancer.getClass().getSimpleName(), e); } } balance(expectedBalancerState, landscape); @@ -254,9 +284,11 @@ private void updateAndBalance() { private void scheduleRetry(Map expected, Set landscape, Duration delay) { + log.debug("Retry balance after {}s", delay.toSeconds()); task = executor.schedule(() -> { - if (expected != this.expectedBalancerStates || landscape != this.landscape) { + if (!Objects.equals(expected, this.expectedBalancerStates) || landscape != this.landscape) { // retry is preemptive + log.trace("Balance retry is preempted"); return; } if (scheduling.compareAndSet(false, true)) { @@ -268,11 +300,11 @@ private void scheduleRetry(Map expected, private void balance(final Map expected, final Set landscape) { metricsManager.scheduleCount.increment(); - Duration delay = Duration.ZERO; + Duration delay = null; for (Map.Entry entry : balancers.entrySet()) { - String balancerFactoryName = entry.getKey(); StoreBalancerState fromBalancerState = entry.getValue(); StoreBalancer fromBalancer = fromBalancerState.balancer; + String balancerName = fromBalancer.getClass().getSimpleName(); if (fromBalancerState.disabled.get()) { continue; } @@ -282,9 +314,8 @@ private void balance(final Map expected, case BalanceNow -> { BalanceCommand commandToRun = ((BalanceNow) result).command; if (!isStaleCommand(commandToRun)) { - log.info("Balancer[{}] command run: {}", balancerFactoryName, commandToRun); - String balancerName = fromBalancer.getClass().getSimpleName(); String cmdName = commandToRun.getClass().getSimpleName(); + log.info("Balancer[{}] command run: {}", balancerName, commandToRun); Sample start = Timer.start(); runCommand(commandToRun) .whenCompleteAsync((success, e) -> { @@ -295,7 +326,7 @@ private void balance(final Map expected, metrics.cmdFailedCounter.increment(); } else { log.info("Balancer[{}] command run result[{}]: {}", - balancerFactoryName, success, commandToRun); + balancerName, success, commandToRun); if (success) { metrics.cmdSucceedCounter.increment(); start.stop(metrics.cmdRunTimer); @@ -305,7 +336,8 @@ private void balance(final Map expected, } scheduling.set(false); if (success) { - if (this.landscape != landscape || this.expectedBalancerStates != expected) { + if (!Objects.equals(this.landscape, landscape) + || !Objects.equals(this.expectedBalancerStates, expected)) { trigger(); } } else { @@ -317,21 +349,21 @@ private void balance(final Map expected, } case AwaitBalance -> { Duration await = ((AwaitBalance) result).await; - delay = await.toNanos() > delay.toNanos() ? await : delay; + delay = delay != null ? (await.toNanos() < delay.toNanos() ? await : delay) : await; } default -> { // do nothing } } } catch (Throwable e) { - log.warn("Balancer[{}] unexpected error", balancerFactoryName, e); + log.warn("Balancer[{}] unexpected error", balancerName, e); } } // no command to run scheduling.set(false); - if (this.landscape != landscape || this.expectedBalancerStates != expected) { + if (!Objects.equals(this.landscape, landscape) || !Objects.equals(this.expectedBalancerStates, expected)) { trigger(); - } else if (!delay.isZero()) { + } else if (delay != null) { // if some balancers are in the progress of generating balance command, wait for a while scheduleRetry(expected, landscape, delay); } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java index 4eff0bed3..3bcf0197a 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/RedundantRangeRemovalBalancerFactory.java @@ -19,14 +19,22 @@ package org.apache.bifromq.basekv.balance; +import java.time.Duration; +import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.balance.impl.RedundantRangeRemovalBalancer; /** * Builtin balancer for redundant range removal. */ class RedundantRangeRemovalBalancerFactory implements IStoreBalancerFactory { + private final Duration delay; + + RedundantRangeRemovalBalancerFactory(Duration delay) { + this.delay = delay; + } + @Override public StoreBalancer newBalancer(String clusterId, String localStoreId) { - return new RedundantRangeRemovalBalancer(clusterId, localStoreId); + return new RedundantRangeRemovalBalancer(clusterId, localStoreId, delay, HLC.INST::getPhysical); } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java index 38b7eebec..c9d979d36 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancer.java @@ -22,7 +22,14 @@ import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveEpoch; +import java.time.Duration; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; import org.apache.bifromq.basehlc.HLC; +import org.apache.bifromq.basekv.balance.AwaitBalance; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.NoNeedBalance; @@ -33,12 +40,6 @@ import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.apache.bifromq.basekv.utils.EffectiveEpoch; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import java.time.Duration; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Supplier; /** * RangeBootstrapBalancer is a specialized StoreBalancer designed to handle the bootstrap process of creating the @@ -49,6 +50,7 @@ public class RangeBootstrapBalancer extends StoreBalancer { private final Supplier millisSource; private final long suspicionDurationMillis; private final AtomicReference bootstrapTrigger = new AtomicReference<>(); + /** * Constructor of StoreBalancer. * @@ -98,19 +100,27 @@ public void update(Set landscape) { KVRangeIdUtil.toString(rangeId)); bootstrapTrigger.set(new BootstrapTrigger(rangeId, FULL_BOUNDARY, randomSuspicionTimeout())); } + } else if (bootstrapTrigger.get() != null) { + log.debug("Effective epoch found: {}, cancel any pending bootstrap", effectiveEpoch.get().epoch()); + bootstrapTrigger.set(null); } } @Override public BalanceResult balance() { BootstrapTrigger current = bootstrapTrigger.get(); - if (current != null && millisSource.get() > current.triggerTime) { - bootstrapTrigger.set(null); - return BalanceNow.of(BootstrapCommand.builder() - .toStore(localStoreId) - .kvRangeId(current.id) - .boundary(current.boundary) - .build()); + if (current != null) { + long nowMillis = millisSource.get(); + if (nowMillis > current.triggerTime) { + bootstrapTrigger.set(null); + return BalanceNow.of(BootstrapCommand.builder() + .toStore(localStoreId) + .kvRangeId(current.id) + .boundary(current.boundary) + .build()); + } else { + return AwaitBalance.of(Duration.ofMillis(current.triggerTime - nowMillis)); + } } return NoNeedBalance.INSTANCE; } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java index b9642fe4e..7089ef3cb 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancer.java @@ -26,9 +26,12 @@ import com.google.protobuf.Struct; import com.google.protobuf.Value; +import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; import java.util.Optional; +import java.util.Set; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; @@ -131,6 +134,10 @@ protected Map doGenerate(Struct loadRules, KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); KVRangeStoreDescriptor storeDescriptor = landscape.get(leaderRange.ownerStoreDescriptor().getId()); ClusterConfig clusterConfig = rangeDescriptor.getConfig(); + if (containsDeadMember(clusterConfig, landscape.keySet())) { + // shortcut when config contains dead members + return Collections.emptyMap(); + } Optional splitHintOpt = rangeDescriptor .getHintsList() .stream() @@ -170,4 +177,13 @@ && compareEndKeys(splitHint.getSplitKey(), endKey(boundary)) < 0) { } return expectedRangeLayout; } + + private boolean containsDeadMember(ClusterConfig clusterConfig, Set live) { + Set members = new HashSet<>(); + members.addAll(clusterConfig.getVotersList()); + members.addAll(clusterConfig.getLearnersList()); + members.addAll(clusterConfig.getNextVotersList()); + members.addAll(clusterConfig.getNextLearnersList()); + return members.stream().anyMatch(m -> !live.contains(m)); + } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java index bedf16fbd..0ae68e33a 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancer.java @@ -23,21 +23,31 @@ import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveRoute; import static org.apache.bifromq.basekv.utils.DescriptorUtil.organizeByEpoch; +import com.google.common.collect.Sets; +import java.time.Duration; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.Map; import java.util.NavigableMap; +import java.util.NavigableSet; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; +import org.apache.bifromq.basekv.balance.AwaitBalance; +import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.NoNeedBalance; import org.apache.bifromq.basekv.balance.StoreBalancer; +import org.apache.bifromq.basekv.balance.command.BalanceCommand; import org.apache.bifromq.basekv.proto.Boundary; import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.basekv.utils.EffectiveEpoch; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; @@ -55,7 +65,9 @@ * caution.

*/ public class RedundantRangeRemovalBalancer extends StoreBalancer { - private volatile NavigableMap> latest = Collections.emptyNavigableMap(); + private final Supplier millisSource; + private final long suspicionDurationMillis; + private final AtomicReference pendingQuitCommand = new AtomicReference<>(); /** * Constructor of StoreBalancer. @@ -63,23 +75,60 @@ public class RedundantRangeRemovalBalancer extends StoreBalancer { * @param clusterId the id of the BaseKV cluster which the store belongs to * @param localStoreId the id of the store which the balancer is responsible for */ - public RedundantRangeRemovalBalancer(String clusterId, String localStoreId) { + public RedundantRangeRemovalBalancer(String clusterId, + String localStoreId, + Duration suspicionDuration, + Supplier millisSource) { super(clusterId, localStoreId); + this.suspicionDurationMillis = suspicionDuration.toMillis(); + this.millisSource = millisSource; } @Override public void update(Set landscape) { - latest = organizeByEpoch(landscape); + NavigableMap> landscapeByEpoch = organizeByEpoch(landscape); + if (landscapeByEpoch.isEmpty()) { + pendingQuitCommand.set(null); + return; + } + boolean scheduled = cleanupRedundantEpoch(landscapeByEpoch); + if (scheduled) { + return; + } + Map.Entry> oldestEntry = landscapeByEpoch.firstEntry(); + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(oldestEntry.getKey(), oldestEntry.getValue()); + scheduled = cleanupIdConflictRange(effectiveEpoch); + if (scheduled) { + return; + } + scheduled = cleanupBoundaryConflictRange(effectiveEpoch); + if (!scheduled) { + if (pendingQuitCommand.get() != null) { + log.debug("No redundant range found, clear pending quit command"); + pendingQuitCommand.set(null); + } + } } @Override public BalanceResult balance() { - if (latest.isEmpty()) { - return NoNeedBalance.INSTANCE; + PendingQuitCommand current = pendingQuitCommand.get(); + if (current != null) { + long nowMillis = millisSource.get(); + if (nowMillis > current.triggerTime) { + pendingQuitCommand.set(null); + return BalanceNow.of(current.quitCmd); + } else { + return AwaitBalance.of(Duration.ofMillis(current.triggerTime - nowMillis)); + } } - if (latest.size() > 1) { + return NoNeedBalance.INSTANCE; + } + + private boolean cleanupRedundantEpoch(NavigableMap> landscapeByEpoch) { + if (landscapeByEpoch.size() > 1) { // deal with epoch-conflict ranges - Set storeDescriptors = latest.lastEntry().getValue(); + Set storeDescriptors = landscapeByEpoch.lastEntry().getValue(); for (KVRangeStoreDescriptor storeDescriptor : storeDescriptors) { if (!storeDescriptor.getId().equals(localStoreId)) { continue; @@ -88,34 +137,42 @@ public BalanceResult balance() { if (rangeDescriptor.getRole() != RaftNodeStatus.Leader) { continue; } - log.debug("Remove Epoch-Conflict range: {} in store {}", - KVRangeIdUtil.toString(rangeDescriptor.getId()), - storeDescriptor.getId()); - return quit(localStoreId, rangeDescriptor); + log.debug("Schedule command to remove epoch-conflict range: id={}, boundary={}", + KVRangeIdUtil.toString(rangeDescriptor.getId()), rangeDescriptor.getBoundary()); + pendingQuitCommand.set( + new PendingQuitCommand(quit(localStoreId, rangeDescriptor), randomSuspicionTimeout())); + return true; } } - return NoNeedBalance.INSTANCE; } - Map.Entry> oldestEntry = latest.firstEntry(); - Map> conflictingRanges = findConflictingRanges(oldestEntry.getValue()); + return false; + } + + private boolean cleanupIdConflictRange(EffectiveEpoch effectiveEpoch) { + Map> conflictingRanges = + findConflictingRanges(effectiveEpoch.storeDescriptors()); if (!conflictingRanges.isEmpty()) { // deal with id-conflict ranges for (KVRangeId rangeId : conflictingRanges.keySet()) { - SortedSet leaderRanges = conflictingRanges.get(rangeId); + NavigableSet leaderRanges = conflictingRanges.get(rangeId); for (LeaderRange leaderRange : leaderRanges) { if (!leaderRange.ownerStoreDescriptor().getId().equals(localStoreId)) { - return NoNeedBalance.INSTANCE; + return false; } - log.debug("Remove Id-Conflict range: {} in store {}", + log.warn("Schedule command to remove id-conflict range: id={}, boundary={}", KVRangeIdUtil.toString(leaderRange.descriptor().getId()), - leaderRange.ownerStoreDescriptor().getId()); - return quit(localStoreId, leaderRange.descriptor()); + leaderRange.descriptor().getBoundary()); + pendingQuitCommand.set( + new PendingQuitCommand(quit(localStoreId, leaderRange.descriptor()), randomSuspicionTimeout())); + return true; } } - return NoNeedBalance.INSTANCE; } + return false; + } + + private boolean cleanupBoundaryConflictRange(EffectiveEpoch effectiveEpoch) { // deal with boundary-conflict ranges - EffectiveEpoch effectiveEpoch = new EffectiveEpoch(oldestEntry.getKey(), oldestEntry.getValue()); NavigableMap effectiveLeaders = getEffectiveRoute(effectiveEpoch).leaderRanges(); for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch.storeDescriptors()) { if (!storeDescriptor.getId().equals(localStoreId)) { @@ -128,19 +185,21 @@ public BalanceResult balance() { Boundary boundary = rangeDescriptor.getBoundary(); LeaderRange leaderRange = effectiveLeaders.get(boundary); if (leaderRange == null || !leaderRange.descriptor().getId().equals(rangeDescriptor.getId())) { - log.debug("Remove Boundary-Conflict range: {} in store {}", - KVRangeIdUtil.toString(rangeDescriptor.getId()), - storeDescriptor.getId()); - return quit(localStoreId, rangeDescriptor); + log.warn("Schedule command to remove boundary-conflict range: id={}, boundary={}", + KVRangeIdUtil.toString(rangeDescriptor.getId()), rangeDescriptor.getBoundary()); + pendingQuitCommand.set( + new PendingQuitCommand(quit(localStoreId, rangeDescriptor), randomSuspicionTimeout())); + return true; } } } - return NoNeedBalance.INSTANCE; + return false; } - private Map> findConflictingRanges(Set effectiveEpoch) { - Map> leaderRangesByRangeId = new HashMap<>(); - Map> conflictingRanges = new HashMap<>(); + private Map> findConflictingRanges( + Set effectiveEpoch) { + Map> leaderRangesByRangeId = new HashMap<>(); + Map> conflictingRanges = new HashMap<>(); for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch) { for (KVRangeDescriptor rangeDescriptor : storeDescriptor.getRangesList()) { if (rangeDescriptor.getRole() != RaftNodeStatus.Leader) { @@ -151,12 +210,42 @@ private Map> findConflictingRanges(Set lr.ownerStoreDescriptor().getId(), String::compareTo) .reversed())); leaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor)); - if (leaderRanges.size() > 1) { - // More than one leader for the same range, add to conflicting ranges - conflictingRanges.put(rangeId, leaderRanges); + } + } + for (KVRangeId rangeId : leaderRangesByRangeId.keySet()) { + NavigableSet leaderRanges = leaderRangesByRangeId.get(rangeId); + LeaderRange firstLeaderRange = leaderRanges.first(); + ClusterConfig firstLeaderClusterConfig = firstLeaderRange.descriptor().getConfig(); + if (leaderRanges.size() > 1) { + NavigableSet restLeaderRanges = leaderRanges.tailSet(firstLeaderRange, false); + // check if rest leader ranges are conflicting: disjoint voter set + for (LeaderRange restLeaderRange : restLeaderRanges) { + ClusterConfig restLeaderClusterConfig = restLeaderRange.descriptor().getConfig(); + if (isDisjoint(firstLeaderClusterConfig, restLeaderClusterConfig)) { + // if disjoint, add to conflicting ranges + conflictingRanges.put(rangeId, leaderRanges); + } } } } return conflictingRanges; } + + private boolean isDisjoint(ClusterConfig firstConfig, ClusterConfig secondConfig) { + Set firstVoters = Sets.newHashSet(firstConfig.getVotersList()); + Set secondVoters = Sets.newHashSet(secondConfig.getVotersList()); + Set firstNextVoters = Sets.newHashSet(firstConfig.getNextVotersList()); + Set secondNextVoters = Sets.newHashSet(secondConfig.getNextVotersList()); + return Collections.disjoint(firstVoters, secondVoters) + && Collections.disjoint(firstNextVoters, secondNextVoters); + } + + private long randomSuspicionTimeout() { + return millisSource.get() + + ThreadLocalRandom.current().nextLong(suspicionDurationMillis, suspicionDurationMillis * 2); + } + + private record PendingQuitCommand(BalanceCommand quitCmd, long triggerTime) { + + } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java index dcb47face..7fabc17db 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancer.java @@ -19,14 +19,11 @@ package org.apache.bifromq.basekv.balance.impl; -import static com.google.common.collect.Sets.difference; -import static com.google.common.collect.Sets.union; - import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import com.google.protobuf.Struct; import com.google.protobuf.Value; -import java.util.Collections; +import java.util.ArrayList; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; @@ -76,6 +73,17 @@ public ReplicaCntBalancer(String clusterId, Preconditions.checkArgument(validate(defaultLoadRules), "Invalid default load rules"); } + private ClusterConfig buildConfig(Set voters, Set learners) { + return ClusterConfig.newBuilder() + .addAllVoters(voters) + .addAllLearners(learners) + .build(); + } + + private void sanitize(Set s, Set live) { + s.retainAll(live); + } + @Override public Struct initialLoadRules() { return defaultLoadRules; @@ -116,137 +124,190 @@ private boolean meetExpectedConfig(Struct loadRules, Map landscape, EffectiveRoute effectiveRoute, Map expectedRangeLayout) { - int expectedVoters = (int) loadRules.getFieldsMap().get(LOAD_RULE_VOTERS).getNumberValue(); - int expectedLearners = (int) loadRules.getFieldsMap().get(LOAD_RULE_LEARNERS).getNumberValue(); - // meeting goal one - meet the expected number of Voter replicas and learner replicas for each Range dynamically + final Set liveStores = landscape.keySet(); + final int expectedVoters = (int) loadRules.getFieldsMap().get(LOAD_RULE_VOTERS).getNumberValue(); + final int expectedLearners = (int) loadRules.getFieldsMap().get(LOAD_RULE_LEARNERS).getNumberValue(); + + if (liveStores.size() < expectedVoters) { + for (Map.Entry e : effectiveRoute.leaderRanges().entrySet()) { + ClusterConfig cc = e.getValue().descriptor().getConfig(); + for (String v : cc.getVotersList()) { + if (!liveStores.contains(v)) { + // shortcut for rolling restart + return true; + } + } + } + } + boolean meetingGoal = false; + for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { - Boundary boundary = entry.getKey(); LeaderRange leaderRange = entry.getValue(); KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); ClusterConfig clusterConfig = rangeDescriptor.getConfig(); - if (meetingGoal) { - expectedRangeLayout.put(boundary, clusterConfig); - continue; - } + + // if there is running config change process, abort generation and wait for the next round + // keep range config change as linear as possible if (clusterConfig.getNextVotersCount() > 0 || clusterConfig.getNextLearnersCount() > 0) { - // if there is running config change process, abort generation - expectedRangeLayout.put(boundary, clusterConfig); - meetingGoal = true; - continue; + expectedRangeLayout.clear(); + // shortcut + return true; } - // voter count not meet expectation or exceeds actual store node amount - Set voters = new HashSet<>(clusterConfig.getVotersList()); - Set learners = new HashSet<>(clusterConfig.getLearnersList()); - if (clusterConfig.getVotersCount() != expectedVoters || clusterConfig.getVotersCount() > landscape.size()) { - if (clusterConfig.getVotersCount() < expectedVoters) { - // add some voters from the least range count store - List aliveStoresSortedByRangeCountAsc = landscape.entrySet().stream() - .filter(e -> - !learners.contains(e.getKey()) && !voters.contains(e.getKey())) + + final Set voters = new HashSet<>(clusterConfig.getVotersList()); + final Set learners = new HashSet<>(clusterConfig.getLearnersList()); + + // remove unreachable stores from voters and learners + sanitize(voters, liveStores); + sanitize(learners, liveStores); + + Boundary boundary = entry.getKey(); + int targetVoters = Math.min(expectedVoters, liveStores.size()); + boolean needFix = voters.size() != targetVoters; + if (!meetingGoal && needFix) { + String leaderStore = leaderRange.ownerStoreDescriptor().getId(); + if (voters.size() < targetVoters) { + if (!learners.isEmpty()) { + List learnerCandidates = landscape.entrySet().stream() + .filter(e -> learners.contains(e.getKey())) + .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) + .map(Map.Entry::getKey) + .toList(); + for (String s : learnerCandidates) { + learners.remove(s); // promote learner -> voter + voters.add(s); + if (voters.size() == targetVoters) { + break; + } + } + } + + if (voters.size() < targetVoters) { + List freeCandidates = landscape.entrySet().stream() + .filter(e -> !learners.contains(e.getKey()) && !voters.contains(e.getKey())) + .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) + .map(Map.Entry::getKey) + .toList(); + for (String s : freeCandidates) { + voters.add(s); + if (voters.size() == targetVoters) { + break; + } + } + } + + if (expectedLearners == -1) { + Set newLearners = new HashSet<>(liveStores); + newLearners.removeAll(voters); + learners.clear(); + learners.addAll(newLearners); + } + List candidates = landscape.entrySet().stream() + .filter(e -> !learners.contains(e.getKey()) && !voters.contains(e.getKey())) .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) .map(Map.Entry::getKey) .toList(); - for (String aliveStoreId : aliveStoresSortedByRangeCountAsc) { - voters.add(aliveStoreId); - if (voters.size() == expectedVoters) { + for (String s : candidates) { + voters.add(s); + if (voters.size() == targetVoters) { break; } } - } else { - // remove some voters from the most range count store - List aliveStoresSortedByRangeCountDesc = landscape.entrySet().stream() + } else { // voters.size() > targetVoters + List overloaded = landscape.entrySet().stream() .sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount()) .map(Map.Entry::getKey) .toList(); - for (String aliveStoreId : aliveStoresSortedByRangeCountDesc) { - if (!aliveStoreId.equals(leaderRange.ownerStoreDescriptor().getId())) { - voters.remove(aliveStoreId); + for (String s : overloaded) { + if (!s.equals(leaderStore) && voters.contains(s)) { + voters.remove(s); + if (voters.size() == targetVoters) { + break; + } } - if (voters.size() == expectedVoters) { - break; + } + if (voters.size() > targetVoters) { + for (String s : new ArrayList<>(voters)) { + if (!s.equals(leaderStore)) { + voters.remove(s); + if (voters.size() == targetVoters) { + break; + } + } } } } - // remove unreachable voters - voters.removeIf(voter -> !landscape.containsKey(voter)); - ClusterConfig newConfig = ClusterConfig.newBuilder() - .mergeFrom(clusterConfig) - .clearVoters() - .addAllVoters(voters) - .build(); - if (!newConfig.equals(clusterConfig)) { - meetingGoal = true; - } - expectedRangeLayout.put(boundary, newConfig); + expectedRangeLayout.put(boundary, buildConfig(voters, learners)); + meetingGoal = true; } else { - expectedRangeLayout.put(boundary, clusterConfig); + expectedRangeLayout.put(boundary, buildConfig(voters, learners)); } } + if (meetingGoal) { return true; } - // voter count met the expectation, check learner count + for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { - Boundary boundary = entry.getKey(); LeaderRange leaderRange = entry.getValue(); KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); ClusterConfig clusterConfig = rangeDescriptor.getConfig(); - if (meetingGoal) { - expectedRangeLayout.put(boundary, clusterConfig); - continue; - } + Set voters = new HashSet<>(clusterConfig.getVotersList()); Set learners = new HashSet<>(clusterConfig.getLearnersList()); - if (expectedLearners == -1 - || clusterConfig.getLearnersCount() != expectedLearners - || clusterConfig.getLearnersCount() > landscape.size()) { - if (expectedLearners == -1) { - Set newLearners = new HashSet<>(landscape.keySet()); - newLearners.removeAll(voters); - learners.addAll(newLearners); - } else { - if (clusterConfig.getLearnersCount() < expectedLearners) { - // add some learners from the least range count store - List aliveStoresSortedByRangeCountAsc = landscape.entrySet().stream() - .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) - .map(Map.Entry::getKey) - .toList(); - for (String aliveStoreId : aliveStoresSortedByRangeCountAsc) { - if (!voters.contains(aliveStoreId)) { - learners.add(aliveStoreId); - } - if (learners.size() == expectedVoters) { + sanitize(voters, liveStores); + sanitize(learners, liveStores); + + boolean changed = false; + + if (expectedLearners == -1) { + // learners = live - voters + Set newLearners = new HashSet<>(liveStores); + newLearners.removeAll(voters); + if (!newLearners.equals(learners)) { + learners = newLearners; + changed = true; + } + } else { + int maxPossible = Math.max(0, liveStores.size() - voters.size()); + int targetLearners = Math.min(expectedLearners, maxPossible); + + if (learners.size() < targetLearners) { + List candidates = landscape.entrySet().stream() + .sorted(Comparator.comparingInt(e -> e.getValue().getRangesCount())) + .map(Map.Entry::getKey) + .toList(); + for (String s : candidates) { + if (!voters.contains(s) && !learners.contains(s)) { + learners.add(s); + if (learners.size() == targetLearners) { break; } } - } else { - // remove some learners from the most range count store - List aliveStoresSortedByRangeCountDesc = landscape.entrySet().stream() - .sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount()) - .map(Map.Entry::getKey) - .toList(); - for (String aliveStoreId : aliveStoresSortedByRangeCountDesc) { - learners.remove(aliveStoreId); - if (learners.size() == expectedLearners) { + } + changed = true; + } else if (learners.size() > targetLearners) { + List overloaded = landscape.entrySet().stream() + .sorted((a, b) -> b.getValue().getRangesCount() - a.getValue().getRangesCount()) + .map(Map.Entry::getKey) + .toList(); + for (String s : overloaded) { + if (learners.contains(s)) { + learners.remove(s); + if (learners.size() == targetLearners) { break; } } } + changed = true; } - // remove unreachable learners - learners.removeIf(learner -> !landscape.containsKey(learner)); - ClusterConfig newConfig = ClusterConfig.newBuilder() - .mergeFrom(clusterConfig) - .clearLearners() - .addAllLearners(learners) - .build(); - if (!newConfig.equals(clusterConfig)) { - meetingGoal = true; - } - expectedRangeLayout.put(boundary, newConfig); - } else { - expectedRangeLayout.put(boundary, clusterConfig); + } + + Boundary boundary = entry.getKey(); + expectedRangeLayout.put(boundary, buildConfig(voters, learners)); + if (!meetingGoal && changed) { + meetingGoal = true; } } return meetingGoal; @@ -255,133 +316,148 @@ private boolean meetExpectedConfig(Struct loadRules, private boolean balanceVoterCount(Map landscape, EffectiveRoute effectiveRoute, Map expectedRangeLayout) { - // goal one has met, meeting goal two - evenly distributed voter replicas across all stores - boolean meetingGoal = false; + final Set liveStores = landscape.keySet(); Map storeVoterCount = new HashMap<>(); for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { ClusterConfig config = entry.getValue().descriptor().getConfig(); - config.getVotersList() + config.getVotersList().stream() + .filter(liveStores::contains) .forEach(storeId -> storeVoterCount.put(storeId, storeVoterCount.getOrDefault(storeId, 0) + 1)); } - landscape.keySet().forEach(storeId -> { - if (!storeVoterCount.containsKey(storeId)) { - storeVoterCount.put(storeId, 0); - } - }); - record StoreVoterCount(String storeId, int voterCount) { - } + liveStores.forEach(s -> storeVoterCount.putIfAbsent(s, 0)); + + record StoreVoterCount(String storeId, int voterCount) {} - SortedSet storeVoterCountSorted = new TreeSet<>(Comparator - .comparingInt(StoreVoterCount::voterCount).thenComparing(StoreVoterCount::storeId)); + SortedSet storeVoterCountSorted = new TreeSet<>( + Comparator.comparingInt(StoreVoterCount::voterCount).thenComparing(StoreVoterCount::storeId)); storeVoterCount.forEach( (storeId, voterCount) -> storeVoterCountSorted.add(new StoreVoterCount(storeId, voterCount))); + double totalVoters = storeVoterCount.values().stream().mapToInt(Integer::intValue).sum(); - double targetVotersPerStore = totalVoters / landscape.size(); - int maxVotersPerStore = (int) Math.ceil(targetVotersPerStore); + double targetVotersPerStore = liveStores.isEmpty() ? 0 : totalVoters / liveStores.size(); int minVotersPerStore = (int) Math.floor(targetVotersPerStore); - int globalMax = Collections.max(storeVoterCount.values()); - int globalMin = Collections.min(storeVoterCount.values()); + int globalMax = storeVoterCount.values().stream().mapToInt(Integer::intValue).max().orElse(0); + int globalMin = storeVoterCount.values().stream().mapToInt(Integer::intValue).min().orElse(0); if (globalMax - globalMin <= 1) { return false; } + boolean meetingGoal = false; for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { Boundary boundary = entry.getKey(); - LeaderRange leaderRange = entry.getValue(); - KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); - ClusterConfig clusterConfig = rangeDescriptor.getConfig(); - if (meetingGoal) { - expectedRangeLayout.put(boundary, clusterConfig); - continue; - } - // examine in sorted order to ensure the result is deterministic - Set learners = Sets.newHashSet(clusterConfig.getLearnersList()); - SortedSet voterSorted = Sets.newTreeSet(clusterConfig.getVotersList()); - for (String voter : voterSorted) { - if (storeVoterCount.get(voter) >= maxVotersPerStore) { - // voter store has overloaded voters - for (StoreVoterCount underloadedStore : storeVoterCountSorted) { - // move to one underloaded store which is current not in the voter list - if (storeVoterCount.get(underloadedStore.storeId) <= minVotersPerStore - && !voterSorted.contains(underloadedStore.storeId) - && !learners.contains(underloadedStore.storeId)) { - meetingGoal = true; - ClusterConfig newConfig = ClusterConfig.newBuilder() - .addAllVoters( - difference(union(voterSorted, Set.of(underloadedStore.storeId)), Set.of(voter))) - .addAllLearners(learners) - .build(); - expectedRangeLayout.put(boundary, newConfig); - break; + LeaderRange lr = entry.getValue(); + ClusterConfig cc = lr.descriptor().getConfig(); + + Set learners = Sets.newHashSet(cc.getLearnersList()); + SortedSet voterSorted = Sets.newTreeSet(cc.getVotersList()); + sanitize(learners, liveStores); + voterSorted.retainAll(liveStores); + + if (!meetingGoal) { + meet: + for (String voter : new ArrayList<>(voterSorted)) { + int voters = storeVoterCount.getOrDefault(voter, 0); + if (voters == globalMax) { + for (StoreVoterCount under : storeVoterCountSorted) { + if (storeVoterCount.getOrDefault(under.storeId, 0) <= minVotersPerStore + && !voterSorted.contains(under.storeId) + && !learners.contains(under.storeId)) { + // move voter -> underloaded + Set newVoters = new HashSet<>(voterSorted); + newVoters.remove(voter); + newVoters.add(under.storeId); + + expectedRangeLayout.put(boundary, buildConfig(newVoters, learners)); + meetingGoal = true; + break meet; + } } } } + if (!meetingGoal) { + expectedRangeLayout.put(boundary, buildConfig(voterSorted, learners)); + } + } else { + expectedRangeLayout.put(boundary, buildConfig(voterSorted, learners)); } } + if (!meetingGoal) { + expectedRangeLayout.clear(); + } return meetingGoal; } - private boolean balanceLearnerCount(Map landscape, - EffectiveRoute effectiveRoute, - Map expectedRangeLayout) { - boolean meetingGoal = false; + private void balanceLearnerCount(Map landscape, + EffectiveRoute effectiveRoute, + Map expectedRangeLayout) { + final Set liveStores = landscape.keySet(); + Map storeLearnerCount = new HashMap<>(); for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { ClusterConfig config = entry.getValue().descriptor().getConfig(); - config.getLearnersList() + config.getLearnersList().stream() + .filter(liveStores::contains) .forEach(storeId -> storeLearnerCount.put(storeId, storeLearnerCount.getOrDefault(storeId, 0) + 1)); } - landscape.keySet().forEach(storeId -> { - if (!storeLearnerCount.containsKey(storeId)) { - storeLearnerCount.put(storeId, 0); - } - }); - record StoreLearnerCount(String storeId, int voterCount) { - } + liveStores.forEach(s -> storeLearnerCount.putIfAbsent(s, 0)); + + record StoreLearnerCount(String storeId, int learnerCount) {} - SortedSet storeVoterCountSorted = new TreeSet<>(Comparator - .comparingInt(StoreLearnerCount::voterCount).thenComparing(StoreLearnerCount::storeId)); - storeLearnerCount.forEach( - (storeId, voterCount) -> storeVoterCountSorted.add(new StoreLearnerCount(storeId, voterCount))); + SortedSet storeLearnerCountSorted = new TreeSet<>( + Comparator.comparingInt(StoreLearnerCount::learnerCount).thenComparing(StoreLearnerCount::storeId)); + storeLearnerCount.forEach((id, c) -> storeLearnerCountSorted.add(new StoreLearnerCount(id, c))); double totalLearners = storeLearnerCount.values().stream().mapToInt(Integer::intValue).sum(); - double targetLearnersPerStore = totalLearners / landscape.size(); - int maxLearnersPerStore = (int) Math.ceil(targetLearnersPerStore); + double targetLearnersPerStore = liveStores.isEmpty() ? 0 : totalLearners / liveStores.size(); + int minLearnersPerStore = (int) Math.floor(targetLearnersPerStore); + + int globalMax = storeLearnerCount.values().stream().mapToInt(Integer::intValue).max().orElse(0); + int globalMin = storeLearnerCount.values().stream().mapToInt(Integer::intValue).min().orElse(0); + if (globalMax - globalMin <= 1) { + return; + } + boolean meetingGoal = false; for (Map.Entry entry : effectiveRoute.leaderRanges().entrySet()) { Boundary boundary = entry.getKey(); - LeaderRange leaderRange = entry.getValue(); - KVRangeDescriptor rangeDescriptor = leaderRange.descriptor(); - ClusterConfig clusterConfig = rangeDescriptor.getConfig(); - if (meetingGoal) { - expectedRangeLayout.put(boundary, clusterConfig); - continue; - } - // examine in sorted order to ensure the result is deterministic - Set voters = Sets.newHashSet(clusterConfig.getVotersList()); - SortedSet learnerSorted = Sets.newTreeSet(clusterConfig.getLearnersList()); - for (String learner : learnerSorted) { - if (storeLearnerCount.get(learner) > maxLearnersPerStore) { - // learner store has overloaded learners - for (StoreLearnerCount underloadedStore : storeVoterCountSorted) { - // move to one underloaded store which is current not in the voter or learner list - if (storeLearnerCount.get(underloadedStore.storeId) < maxLearnersPerStore - && !voters.contains(underloadedStore.storeId) - && !learnerSorted.contains(underloadedStore.storeId)) { - meetingGoal = true; - ClusterConfig newConfig = ClusterConfig.newBuilder() - .addAllVoters(voters) - .addAllLearners(difference( - union(learnerSorted, Set.of(underloadedStore.storeId)), Set.of(learner))) - .build(); - expectedRangeLayout.put(boundary, newConfig); - break; + LeaderRange lr = entry.getValue(); + ClusterConfig cc = lr.descriptor().getConfig(); + + Set voters = Sets.newHashSet(cc.getVotersList()); + SortedSet learnerSorted = Sets.newTreeSet(cc.getLearnersList()); + sanitize(voters, liveStores); + learnerSorted.retainAll(liveStores); + + if (!meetingGoal) { + meet: + for (String learner : new ArrayList<>(learnerSorted)) { + int learners = storeLearnerCount.getOrDefault(learner, 0); + if (learners == globalMax) { + for (StoreLearnerCount under : storeLearnerCountSorted) { + if (storeLearnerCount.getOrDefault(under.storeId, 0) < minLearnersPerStore + && !voters.contains(under.storeId) + && !learnerSorted.contains(under.storeId)) { + Set newLearners = new HashSet<>(learnerSorted); + newLearners.remove(learner); + newLearners.add(under.storeId); + + expectedRangeLayout.put(boundary, buildConfig(voters, newLearners)); + meetingGoal = true; + break meet; + } } } } + if (!meetingGoal) { + expectedRangeLayout.put(boundary, buildConfig(voters, learnerSorted)); + } + } else { + expectedRangeLayout.put(boundary, buildConfig(voters, learnerSorted)); } } - return meetingGoal; + if (!meetingGoal) { + expectedRangeLayout.clear(); + } } } diff --git a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java index b3ea01c5b..abf7b204f 100644 --- a/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java +++ b/base-kv/base-kv-store-balance-controller/src/main/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancer.java @@ -14,13 +14,21 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.balance.impl; import static org.apache.bifromq.basekv.proto.State.StateType.Normal; +import com.google.common.collect.Sets; +import java.time.Duration; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Supplier; import org.apache.bifromq.basehlc.HLC; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; @@ -34,14 +42,6 @@ import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; -import com.google.common.collect.Sets; -import java.time.Duration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Supplier; /** * The UnreachableReplicaRemovalBalancer is a specialized balancer responsible for managing and removing unreachable @@ -99,7 +99,10 @@ public UnreachableReplicaRemovalBalancer(String clusterId, String localStoreId, public void update(Set landscape) { Map> descriptorMap = build(landscape); latestDescriptorMap = descriptorMap; - + if (!descriptorMap.containsKey(localStoreId)) { + replicaSuspicionTimeMap.clear(); + return; // No need to process if local store is not present in the landscape + } // Track the current leaders Set currentLeaders = new HashSet<>(); diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java index e3f537967..dc44b4be4 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/KVStoreBalanceControllerTest.java @@ -79,6 +79,7 @@ public class KVStoreBalanceControllerTest { private static final String LOCAL_STORE_ID = "localStoreId"; private final PublishSubject> proposalSubject = PublishSubject.create(); private final PublishSubject> storeDescSubject = PublishSubject.create(); + private final PublishSubject refreshSignal = PublishSubject.create(); @Mock private IBaseKVMetaService metaService; @Mock @@ -103,6 +104,7 @@ public void setup() throws IOException { when(balancerFactory.newBalancer(eq(CLUSTER_ID), eq(LOCAL_STORE_ID))).thenReturn(storeBalancer); when(metaService.balancerStatesProposal(eq(CLUSTER_ID))).thenReturn(statesProposal); when(metaService.balancerStatesReporter(eq(CLUSTER_ID), eq(LOCAL_STORE_ID))).thenReturn(statesReporter); + when(statesReporter.refreshSignal()).thenReturn(refreshSignal); when(statesProposal.expectedBalancerStates()).thenReturn(proposalSubject); when(storeClient.describe()).thenReturn(storeDescSubject); executor = Executors.newScheduledThreadPool(1); @@ -400,6 +402,18 @@ public void testInvalidRules() { verify(statesReporter, never()).reportBalancerState(anyString(), anyBoolean(), any(Struct.class)); } + @Test + public void testRefreshSignal() { + reset(statesReporter); + refreshSignal.onNext(System.currentTimeMillis()); + verify(statesReporter, times(1)) + .reportBalancerState(anyString(), anyBoolean(), any(Struct.class)); + verify(statesReporter, times(1)) + .reportBalancerState(eq(balancerFactory.getClass().getName()), + eq(false), + eq(Struct.getDefaultInstance())); + } + private Set generateDescriptor(KVRangeId id, long ver) { List voters = Lists.newArrayList(LOCAL_STORE_ID, "store1"); List learners = Lists.newArrayList(); diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java index 6c9aec88d..7e167a8f3 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeBootstrapBalancerTest.java @@ -21,18 +21,20 @@ import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertSame; +import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.balance.BalanceNow; -import org.apache.bifromq.basekv.balance.BalanceResult; -import org.apache.bifromq.basekv.balance.BalanceResultType; -import org.apache.bifromq.basekv.balance.command.BootstrapCommand; -import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import java.time.Duration; import java.util.Collections; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Supplier; +import org.apache.bifromq.basekv.balance.BalanceNow; +import org.apache.bifromq.basekv.balance.BalanceResult; +import org.apache.bifromq.basekv.balance.BalanceResultType; +import org.apache.bifromq.basekv.balance.command.BootstrapCommand; +import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -58,7 +60,7 @@ public void updateWithoutStoreDescriptors() { BalanceResult result = balancer.balance(); assertSame(result.type(), BalanceResultType.BalanceNow); - assertEquals(FULL_BOUNDARY, ((BootstrapCommand) ((BalanceNow) result).command).getBoundary()); + assertEquals(((BootstrapCommand) ((BalanceNow) result).command).getBoundary(), FULL_BOUNDARY); } @@ -74,6 +76,51 @@ public void balanceWithTrigger() { BalanceResult result = balancer.balance(); assertSame(result.type(), BalanceResultType.BalanceNow); - assertEquals(FULL_BOUNDARY, ((BootstrapCommand) ((BalanceNow) result).command).getBoundary()); + assertEquals(((BootstrapCommand) ((BalanceNow) result).command).getBoundary(), FULL_BOUNDARY); + } + + @Test + public void returnsAwaitImmediatelyBeforeDeadline() { + balancer.update(Collections.emptySet()); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.AwaitBalance); + + Duration remaining = ((org.apache.bifromq.basekv.balance.AwaitBalance) result).await; + assertFalse(remaining.isZero()); + assertTrue(remaining.toMillis() <= 2000L); + } + + @Test + public void awaitThenBalanceNowAfterDeadline() { + balancer.update(Collections.emptySet()); + + BalanceResult r1 = balancer.balance(); + assertSame(r1.type(), BalanceResultType.AwaitBalance); + long r1ms = ((org.apache.bifromq.basekv.balance.AwaitBalance) r1).await.toMillis(); + assertTrue(r1ms > 0); + + long half = Math.max(1, r1ms / 2); + mockTime.addAndGet(half); + BalanceResult r2 = balancer.balance(); + assertSame(r2.type(), BalanceResultType.AwaitBalance); + long r2ms = ((org.apache.bifromq.basekv.balance.AwaitBalance) r2).await.toMillis(); + assertTrue(r2ms >= 0 && r2ms < r1ms); + + mockTime.addAndGet(r2ms + 1); + BalanceResult r3 = balancer.balance(); + assertSame(r3.type(), BalanceResultType.BalanceNow); + assertEquals(((BootstrapCommand) ((BalanceNow) r3).command).getBoundary(), FULL_BOUNDARY); + } + + @Test + public void noSecondTriggerAfterBootstrapFires() { + balancer.update(Collections.emptySet()); + mockTime.addAndGet(2000L); + BalanceResult fired = balancer.balance(); + assertSame(fired.type(), BalanceResultType.BalanceNow); + + BalanceResult next = balancer.balance(); + assertSame(next.type(), BalanceResultType.NoNeedBalance); } } \ No newline at end of file diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java index 1b1aebccd..ab1348c72 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeLeaderBalancerTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.balance.impl; @@ -25,6 +25,8 @@ import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; +import com.google.protobuf.ByteString; +import java.util.Set; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.BalanceResultType; @@ -34,10 +36,9 @@ import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; -import com.google.protobuf.ByteString; -import java.util.Set; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -58,6 +59,7 @@ public void noEffectiveRouteNoBalanceNeeded() { KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary( Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a")).setEndKey(ByteString.copyFromUtf8("z")) .build()) @@ -139,6 +141,7 @@ public void balanceToOtherNoLeaderStore() { KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder() .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(toBoundary(null, ByteString.copyFromUtf8("z"))) .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addLearners("otherStore").build()) .build(); @@ -148,6 +151,7 @@ public void balanceToOtherNoLeaderStore() { .setId(kvRangeId2) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(toBoundary(ByteString.copyFromUtf8("z"), null)) .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addVoters("otherStore").build()) .build(); @@ -161,6 +165,7 @@ public void balanceToOtherNoLeaderStore() { KVRangeDescriptor kvRangeDescriptor3 = KVRangeDescriptor.newBuilder() .setId(kvRangeId3) .setRole(RaftNodeStatus.Follower) + .setState(State.StateType.Normal) .setBoundary(toBoundary(ByteString.copyFromUtf8("z"), null)) .setConfig(ClusterConfig.newBuilder().addVoters("otherStore").build()) .build(); @@ -185,6 +190,7 @@ public void transferLeadership() { KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder() .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addLearners("otherStore").build()) .build(); @@ -193,6 +199,7 @@ public void transferLeadership() { .setId(kvRangeId2) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder().addVoters(localStoreId).addVoters("otherStore").build()) .build(); @@ -205,6 +212,7 @@ public void transferLeadership() { KVRangeDescriptor kvRangeDescriptor3 = KVRangeDescriptor.newBuilder() .setId(kvRangeId3) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder().addVoters("otherStore").build()) .build(); diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java index a43f8fd47..0c0d6b62f 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RangeSplitBalancerTest.java @@ -163,4 +163,176 @@ public void stopSplitWhenExceedMaxRanges() { .build())); assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); } + + @Test + public void skipWhenConfigHasDeadVoter() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setConfig(ClusterConfig.newBuilder() + .addVoters("store1") + .addVoters("deadStore") + .build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void skipWhenConfigHasDeadLearner() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setConfig(ClusterConfig.newBuilder() + .addVoters("store1") + .addLearners("ghost") + .build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void skipWhenConfigHasDeadNextMembers() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setConfig(ClusterConfig.newBuilder() + .addVoters("store1") + .addNextVoters("deadV") + .addNextLearners("deadL") + .build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void skipWhenOngoingConfigChange() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setConfig(ClusterConfig.newBuilder() + .addVoters("store1") + .addNextVoters("store1") + .build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void skipWhenSplitKeyEqualsStartOrOutOfRange() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .setBoundary(org.apache.bifromq.basekv.proto.Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("a")) + .setEndKey(ByteString.copyFromUtf8("z")) + .build()) + .setConfig(ClusterConfig.newBuilder().addVoters("store1").build()) + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("a")) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + + KVRangeDescriptor rd2 = rd.toBuilder().clearHints() + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .setSplitKey(ByteString.copyFromUtf8("z")) + .build()) + .build(); + KVRangeStoreDescriptor sd2 = sd.toBuilder().clearRanges().addRanges(rd2).build(); + balancer.update(Set.of(sd2)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void stopSplitWhenSplitKeyNotProvided() { + RangeSplitBalancer balancer = new RangeSplitBalancer(clusterId, "store1", HintType, 30, 0.8, 30, 30_000); + + KVRangeDescriptor rd = rangeDescriptorBuilder + .addHints(SplitHint.newBuilder() + .setType(HintType) + .putLoad("ioDensity", 40) + .putLoad("ioLatencyNanos", 100) + .build()) + .build(); + + KVRangeStoreDescriptor sd = storeDescriptorBuilder + .clearRanges() + .addRanges(rd) + .putStatistics("cpu.usage", 0.7) + .build(); + + balancer.update(Set.of(sd)); + assertEquals(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } } diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java index ecfeca3b9..00871e263 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/RedundantRangeRemovalBalancerTest.java @@ -23,9 +23,11 @@ import static org.testng.Assert.assertSame; import com.google.protobuf.ByteString; +import java.time.Duration; import java.util.Collections; import java.util.HashSet; import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.BalanceResultType; @@ -34,20 +36,22 @@ import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class RedundantRangeRemovalBalancerTest { - private final String clusterId = "testCluster"; private final String localStoreId = "localStore"; private RedundantRangeRemovalBalancer balancer; + private AtomicLong mockTime; @BeforeMethod public void setUp() { - balancer = new RedundantRangeRemovalBalancer(clusterId, localStoreId); + mockTime = new AtomicLong(0L); // Start time at 0 + balancer = new RedundantRangeRemovalBalancer(clusterId, localStoreId, Duration.ofSeconds(1), mockTime::get); } @Test @@ -56,6 +60,7 @@ public void noRedundantEpoch() { KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("a")) @@ -85,6 +90,7 @@ public void removeRangeInRedundantEpoch() { KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder() .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("a")) @@ -99,6 +105,7 @@ public void removeRangeInRedundantEpoch() { KVRangeDescriptor kvRangeDescriptor2 = KVRangeDescriptor.newBuilder() .setId(kvRangeId2) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("n")) @@ -121,6 +128,11 @@ public void removeRangeInRedundantEpoch() { balancer.update(storeDescriptors); BalanceResult command = balancer.balance(); + // first returns AwaitBalance due to suspicion delay + assertEquals(command.type(), BalanceResultType.AwaitBalance); + // advance mock time beyond the max suspicion window (2s) + mockTime.set(3000L); + command = balancer.balance(); assertEquals(command.type(), BalanceResultType.BalanceNow); ChangeConfigCommand changeConfigCommand = (ChangeConfigCommand) ((BalanceNow) command).command; @@ -137,6 +149,7 @@ public void noLocalLeaderRangeInRedundantEpoch() { .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("a")) .setEndKey(ByteString.copyFromUtf8("m")) @@ -150,6 +163,7 @@ public void noLocalLeaderRangeInRedundantEpoch() { KVRangeDescriptor kvRangeDescriptor2 = KVRangeDescriptor.newBuilder() .setId(kvRangeId2) .setRole(RaftNodeStatus.Follower) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("n")) @@ -187,6 +201,7 @@ public void removeRedundantEffectiveRange() { .setId(kvRangeId1) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(config) .build(); @@ -194,6 +209,7 @@ public void removeRedundantEffectiveRange() { .setId(kvRangeId2) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(config) .build(); @@ -209,7 +225,10 @@ public void removeRedundantEffectiveRange() { balancer.update(storeDescriptors); BalanceResult result = balancer.balance(); - + // first returns AwaitBalance due to suspicion delay + assertEquals(result.type(), BalanceResultType.AwaitBalance); + mockTime.set(3000L); + result = balancer.balance(); assertEquals(result.type(), BalanceResultType.BalanceNow); ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; @@ -226,6 +245,7 @@ public void ignoreNonLocalStore() { KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(Boundary.newBuilder() .setStartKey(ByteString.copyFromUtf8("a")) @@ -259,6 +279,7 @@ public void removeIdConflictingRangeWhenLocalStoreIsLoser() { KVRangeDescriptor localRange = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setVer(1) .setBoundary(boundary) .setConfig(ClusterConfig.newBuilder() @@ -289,6 +310,10 @@ public void removeIdConflictingRangeWhenLocalStoreIsLoser() { balancer.update(Set.of(localStoreDesc, peerStoreDesc)); BalanceResult result = balancer.balance(); + // first returns AwaitBalance due to suspicion delay + assertEquals(result.type(), BalanceResultType.AwaitBalance); + mockTime.set(3000L); + result = balancer.balance(); assertEquals(result.type(), BalanceResultType.BalanceNow); ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; @@ -311,6 +336,7 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(ClusterConfig.newBuilder() .addVoters(localStoreId) @@ -321,6 +347,7 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(boundary) .setConfig(ClusterConfig.newBuilder() .addVoters(peerStoreId) @@ -342,4 +369,49 @@ public void ignoreIdConflictingRangeWhenLocalStoreIsWinner() { BalanceResult result = balancer.balance(); assertSame(result.type(), BalanceResultType.NoNeedBalance); } -} \ No newline at end of file + + @Test + public void idConflictButVotersOverlapShouldNotDelete() { + String peerStoreId = "peer"; + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + Boundary boundary = Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("a")) + .setEndKey(ByteString.copyFromUtf8("z")).build(); + + KVRangeDescriptor localRange = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(boundary) + .setConfig(ClusterConfig.newBuilder() + .addVoters(localStoreId) + .addVoters("x").build()) + .build(); + + KVRangeDescriptor peerRange = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(boundary) + .setConfig(ClusterConfig.newBuilder() + .addVoters(localStoreId) + .addVoters(peerStoreId).build()) + .build(); + + KVRangeStoreDescriptor localStoreDesc = KVRangeStoreDescriptor.newBuilder() + .setId(localStoreId) + .addRanges(localRange) + .build(); + KVRangeStoreDescriptor peerStoreDesc = KVRangeStoreDescriptor.newBuilder() + .setId(peerStoreId) + .addRanges(peerRange) + .build(); + + balancer.update(Set.of(localStoreDesc, peerStoreDesc)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.NoNeedBalance); + } +} diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java index 346eedd31..1b5b7fc1f 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/ReplicaCntBalancerTest.java @@ -40,6 +40,7 @@ import org.apache.bifromq.basekv.proto.KVRangeDescriptor; import org.apache.bifromq.basekv.proto.KVRangeId; import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.proto.State; import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.basekv.utils.EffectiveRoute; @@ -69,6 +70,7 @@ public void balanceToAddVoter() { .setId(kvRangeId1) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("a")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -82,6 +84,7 @@ public void balanceToAddVoter() { .setId(kvRangeId2) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -127,6 +130,7 @@ public void balanceToAddLearner() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -163,16 +167,17 @@ public void balanceToAddLearner() { } @Test - public void balanceToRemoveVoter() { + public void balanceToRemoveLearner() { KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") - .addVoters("remoteStore") + .addLearners("learnerStore") .build()) .build(); @@ -190,63 +195,43 @@ public void balanceToRemoveVoter() { BalanceResult result = balancer.balance(); ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; assertTrue(command.getVoters().contains("localStore")); - assertFalse(command.getVoters().contains("remoteStore")); assertTrue(command.getLearners().isEmpty()); } @Test - public void balanceToRemoveLearner() { + public void promoteLearnersToVoters() { KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); - KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() + KVRangeDescriptor leader = KVRangeDescriptor.newBuilder() .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") - .addLearners("learnerStore") + .addLearners("remoteStore") .build()) .build(); - - KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() - .setId("localStore") - .addRanges(kvRangeDescriptor) - .putStatistics("cpu.usage", 0.5) - .build(); - - Set storeDescriptors = new HashSet<>(); - storeDescriptors.add(storeDescriptor); - - balancer.update(storeDescriptors); - - BalanceResult result = balancer.balance(); - ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; - assertTrue(command.getVoters().contains("localStore")); - assertTrue(command.getLearners().isEmpty()); - } - - @Test - public void nothingChanged() { - KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); - KVRangeDescriptor kvRangeDescriptor = KVRangeDescriptor.newBuilder() + KVRangeDescriptor learner = KVRangeDescriptor.newBuilder() .setId(kvRangeId) - .setRole(RaftNodeStatus.Leader) + .setRole(RaftNodeStatus.Follower) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") - .addLearners("learnerStore") + .addLearners("remoteStore") .build()) .build(); KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() .setId("localStore") - .addRanges(kvRangeDescriptor) + .addRanges(leader) .putStatistics("cpu.usage", 0.5) .build(); KVRangeStoreDescriptor learnerStoreDescriptor = KVRangeStoreDescriptor.newBuilder() - .setId("learnerStore") - .addRanges(kvRangeDescriptor) + .setId("remoteStore") + .addRanges(learner) .putStatistics("cpu.usage", 0.5) .build(); @@ -255,8 +240,10 @@ public void nothingChanged() { storeDescriptors.add(learnerStoreDescriptor); balancer.update(storeDescriptors); - - assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) balancer.balance()).command; + assertTrue(command.getVoters().contains("localStore")); + assertTrue(command.getVoters().contains("remoteStore")); + assertTrue(command.getLearners().isEmpty()); } @Test @@ -267,6 +254,7 @@ public void balanceToAddAllRestLearners() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -318,6 +306,7 @@ public void balanceVoterCount() { KVRangeDescriptor kvRangeDescriptor1 = KVRangeDescriptor.newBuilder() .setId(kvRangeId1) .setVer(1) + .setState(State.StateType.Normal) .setRole(RaftNodeStatus.Leader) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() @@ -329,6 +318,7 @@ public void balanceVoterCount() { .setId(kvRangeId2) .setVer(2) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -364,6 +354,7 @@ public void balanceLearnerCount() { .setId(kvRangeId1) .setVer(1) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("a")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("s1") @@ -380,6 +371,7 @@ public void balanceLearnerCount() { .setId(kvRangeId2) .setVer(2) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("a")) .setEndKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() @@ -398,6 +390,7 @@ public void balanceLearnerCount() { .setId(kvRangeId3) .setVer(2) .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) .setConfig(ClusterConfig.newBuilder() .addVoters("s3") @@ -432,6 +425,7 @@ public void generateCorrectClusterConfig() { .setId(kvRangeId) .setRole(RaftNodeStatus.Leader) .setVer(1) + .setState(State.StateType.Normal) .setBoundary(FULL_BOUNDARY) .setConfig(ClusterConfig.newBuilder() .addVoters("localStore") @@ -464,4 +458,502 @@ public void generateCorrectClusterConfig() { assertTrue(balancer.verify(layout, allStoreDescriptors)); } + + @Test + public void removeDeadVoterAndBackfillEvenIfCountEqualsExpected() { + // live: s1, s2, s3;expected voters=3 + // range current voters = [s1, ghost, s2] + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addVoters("ghost") + .addVoters("s2") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + + Set stores = new HashSet<>(); + stores.add(s1); + stores.add(s2); + stores.add(s3); + + // votersPerRange=3,learnersPerRange=0 + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0); + balancer.update(stores); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + // expected:ghost removed s3 added + assertEquals(cmd.getKvRangeId(), kvRangeId); + assertTrue(cmd.getVoters().contains("s1")); + assertTrue(cmd.getVoters().contains("s2")); + assertTrue(cmd.getVoters().contains("s3")); + assertFalse(cmd.getVoters().contains("ghost")); + assertTrue(cmd.getLearners().isEmpty()); + } + + @Test + public void abortWhenConfigChangeInProgress_nextFieldsPresent() { + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + ClusterConfig cfgWithNext = ClusterConfig.newBuilder() + .addVoters("localStore") + .addNextVoters("someone") + .build(); + + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(FULL_BOUNDARY) + .setConfig(cfgWithNext) + .build(); + + KVRangeStoreDescriptor local = KVRangeStoreDescriptor.newBuilder() + .setId("localStore") + .addRanges(range) + .build(); + + Set stores = new HashSet<>(); + stores.add(local); + + balancer = new ReplicaCntBalancer("testCluster", "localStore", 1, 0); + balancer.update(stores); + + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void learnersMinusOneUsesLiveMinusVotersAndSanitizes() { + // expectedLearners = -1 => learners = live - voters; + balancer = new ReplicaCntBalancer("testCluster", "s1", 1, -1); + + KVRangeId kvRangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(kvRangeId) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addLearners("ghostLearner") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + + Set stores = new HashSet<>(); + stores.add(s1); + stores.add(s2); + stores.add(s3); + + balancer.update(stores); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + // expected:learners = live - voters = {s2, s3};ghostLearner removed + assertTrue(cmd.getVoters().contains("s1")); + assertFalse(cmd.getLearners().contains("ghostLearner")); + assertTrue(cmd.getLearners().contains("s2")); + assertTrue(cmd.getLearners().contains("s3")); + assertEquals(cmd.getLearners().size(), 2); + } + + @Test + public void skipWhenCapacityInsufficientAndHasDeadVoter() { + // expected voters=3,live voters=S1,S2, S3(dead) + ReplicaCntBalancer balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setBoundary(org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addVoters("s2") + .addVoters("deadS3") // dead + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + Set landscape = new HashSet<>(); + landscape.add(s1); + landscape.add(s2); + + balancer.update(landscape); + + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void learnersMinusOnePreferPromoteLearnersToFillVoters() { + // expected: voters=3, learners=-1 + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, -1); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addLearners("s2") + .addLearners("s3") + .addLearners("s4") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build(); + + balancer.update(Set.of(s1, s2, s3, s4)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + // voters should be s1 + two from {s2,s3,s4} + assertTrue(cmd.getVoters().contains("s1")); + assertEquals(cmd.getVoters().size(), 3); + // after promotion, learners should be live - voters = the remaining one + assertEquals(cmd.getLearners().size(), 1); + Set all = Set.of("s1", "s2", "s3", "s4"); + Set union = new HashSet<>(cmd.getVoters()); + union.addAll(cmd.getLearners()); + assertEquals(union, all); + } + + @Test + public void noChangeWhenLiveLessThanExpectedAndNoDeadVoter() { + // expected voters=3, live={s1,s2}, voters={s1,s2} + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, 0); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addVoters("s2") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + + balancer.update(Set.of(s1, s2)); + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void neverRemoveLeaderWhenShrinkingVoters() { + // expected voters=3, voters currently 4 (leader must stay) + balancer = new ReplicaCntBalancer("testCluster", "leader", 3, 0); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("leader") + .addVoters("s2") + .addVoters("s3") + .addVoters("s4") + .build()) + .build(); + + KVRangeStoreDescriptor leader = KVRangeStoreDescriptor.newBuilder().setId("leader").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build(); + + balancer.update(Set.of(leader, s2, s3, s4)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertTrue(cmd.getVoters().contains("leader")); + assertEquals(cmd.getVoters().size(), 3); + } + + @Test + public void balanceVoterCountNoopWhenSpreadWithinOne() { + // two stores, two ranges: counts differ by at most 1 -> no rebalance + balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 0); + + KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() + .setId(r1).setVer(1).setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("s1").build()) + .build(); + + KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() + .setId(r2).setVer(1).setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("s2").build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(d1).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").addRanges(d2).build(); + + balancer.update(Set.of(s1, s2)); + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void fixedLearnerCountRemovesDeadAndBackfills() { + // expected learners=2; current learners={deadL, s2}; live={s1,s2,s3,s4} + balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 2); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addLearners("deadL") + .addLearners("s2") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + KVRangeStoreDescriptor s4 = KVRangeStoreDescriptor.newBuilder().setId("s4").build(); + + balancer.update(Set.of(s1, s2, s3, s4)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertTrue(cmd.getVoters().contains("s1")); + assertEquals(cmd.getLearners().size(), 2); + assertTrue(cmd.getLearners().contains("s2")); + assertFalse(cmd.getLearners().contains("deadL")); + } + + @Test + public void zeroLearnersTargetClearsLearners() { + // expected learners=0 + balancer = new ReplicaCntBalancer("testCluster", "s1", 1, 0); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addLearners("s2") + .addLearners("s3") + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + + balancer.update(Set.of(s1, s2, s3)); + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + assertTrue(cmd.getLearners().isEmpty()); + assertTrue(cmd.getVoters().contains("s1")); + } + + @Test + public void learnersMinusOneWithAllLiveAsVotersMakesLearnersEmpty() { + balancer = new ReplicaCntBalancer("testCluster", "s1", 3, -1); + + KVRangeId rid = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor range = KVRangeDescriptor.newBuilder() + .setId(rid) + .setRole(RaftNodeStatus.Leader) + .setVer(1) + .setState(State.StateType.Normal) + .setBoundary(FULL_BOUNDARY) + .setConfig(ClusterConfig.newBuilder() + .addVoters("s1") + .addVoters("s2") + .addVoters("s3") + .addLearners("ghost") // should be sanitized away + .build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(range).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").build(); + KVRangeStoreDescriptor s3 = KVRangeStoreDescriptor.newBuilder().setId("s3").build(); + + balancer.update(Set.of(s1, s2, s3)); + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertEquals(cmd.getVoters(), Set.of("s1", "s2", "s3")); + assertTrue(cmd.getLearners().isEmpty()); + } + + @Test + public void balanceVoterCountPrefersZeroCountStoreFirst() { + KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeId r3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build(); + KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() + .setId(r1) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) + .build(); + KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() + .setId(r2) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("m")) + .setEndKey(ByteString.copyFromUtf8("z")) + .build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) + .build(); + + KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build(); + KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB") + .addRanges(KVRangeDescriptor.newBuilder() + .setId(r3) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("z")) + .build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sB").build()) + .build()) + .build(); + KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build(); + + balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 0); + balancer.update(Set.of(sA, sB, sC)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertTrue(cmd.getVoters().contains("sC")); + assertFalse(cmd.getVoters().contains("sA")); + } + + @Test + public void balanceVoterCountDoesOnlyOneChangePerRound() { + KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId r2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() + .setId(r1) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) + .build(); + KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() + .setId(r2) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build()) + .setConfig(ClusterConfig.newBuilder().addVoters("sA").build()) + .build(); + + KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build(); + KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB").build(); + KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build(); + + balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 0); + balancer.update(Set.of(sA, sB, sC)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + assertTrue(cmd.getKvRangeId().equals(r1) || cmd.getKvRangeId().equals(r2)); + } + + @Test + public void balanceVoterCountSkipsTargetsAlreadyInVotersOrLearners() { + KVRangeId r1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeDescriptor d1 = KVRangeDescriptor.newBuilder() + .setId(r1) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("z")).build()) + .setConfig(ClusterConfig.newBuilder() + .addVoters("sA") + .addLearners("sB") + .build()) + .build(); + + KVRangeDescriptor d2 = KVRangeDescriptor.newBuilder() + .setId(KVRangeId.newBuilder().setEpoch(1).setId(2).build()) + .setVer(1) + .setRole(RaftNodeStatus.Leader) + .setState(State.StateType.Normal) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("z")).build()) + .setConfig(ClusterConfig.newBuilder() + .addVoters("sA") + .addLearners("sB") + .build()) + .build(); + + KVRangeStoreDescriptor sA = KVRangeStoreDescriptor.newBuilder().setId("sA").addRanges(d1).addRanges(d2).build(); + KVRangeStoreDescriptor sB = KVRangeStoreDescriptor.newBuilder().setId("sB").build(); + KVRangeStoreDescriptor sC = KVRangeStoreDescriptor.newBuilder().setId("sC").build(); + + balancer = new ReplicaCntBalancer("testCluster", "sA", 1, 1); + balancer.update(Set.of(sA, sB, sC)); + + BalanceResult result = balancer.balance(); + assertSame(result.type(), BalanceResultType.BalanceNow); + ChangeConfigCommand cmd = (ChangeConfigCommand) ((BalanceNow) result).command; + + assertTrue(cmd.getVoters().contains("sC")); + assertFalse(cmd.getVoters().contains("sB")); + } } diff --git a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java index 752ebbcff..dc2ccb358 100644 --- a/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java +++ b/base-kv/base-kv-store-balance-controller/src/test/java/org/apache/bifromq/basekv/balance/impl/UnreachableReplicaRemovalBalancerTest.java @@ -25,6 +25,12 @@ import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertSame; +import java.time.Duration; +import java.util.Arrays; +import java.util.Collections; +import java.util.Map; +import java.util.Set; +import java.util.function.Supplier; import org.apache.bifromq.basekv.balance.BalanceNow; import org.apache.bifromq.basekv.balance.BalanceResult; import org.apache.bifromq.basekv.balance.BalanceResultType; @@ -36,22 +42,16 @@ import org.apache.bifromq.basekv.raft.proto.ClusterConfig; import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.apache.bifromq.basekv.raft.proto.RaftNodeSyncState; -import java.time.Duration; -import java.util.Arrays; -import java.util.Collections; -import java.util.Map; -import java.util.Set; -import java.util.function.Supplier; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class UnreachableReplicaRemovalBalancerTest { - private UnreachableReplicaRemovalBalancer balancer; - private Supplier mockTimeSource; private final String localStoreId = "localStore"; private final String peerStoreId = "peerStore"; private final KVRangeId rangeId = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + private UnreachableReplicaRemovalBalancer balancer; + private Supplier mockTimeSource; @BeforeMethod public void setUp() { @@ -61,6 +61,36 @@ public void setUp() { new UnreachableReplicaRemovalBalancer("clusterId", localStoreId, Duration.ofSeconds(15), mockTimeSource); } + @Test + public void noChangeWhenLocalStoreMissingInitially() { + KVRangeStoreDescriptor peerStoreDescriptor = createStoreDescriptor(peerStoreId); + balancer.update(Set.of(peerStoreDescriptor)); + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + + @Test + public void noNPEWhenLocalStoreDisappearsAfterBeingLeader() { + KVRangeStoreDescriptor localStoreDescriptor = createStoreDescriptor( + localStoreId, + createRangeDescriptor( + rangeId, + RaftNodeStatus.Leader, + Map.of(localStoreId, RaftNodeSyncState.Replicating, peerStoreId, RaftNodeSyncState.Probing), + Set.of(localStoreId, peerStoreId), + Set.of() + ) + ); + KVRangeStoreDescriptor peerStoreDescriptor = createStoreDescriptor(peerStoreId); + + when(mockTimeSource.get()).thenReturn(System.currentTimeMillis()); + balancer.update(Set.of(localStoreDescriptor, peerStoreDescriptor)); + + when(mockTimeSource.get()).thenReturn(System.currentTimeMillis() + 16000); + balancer.update(Set.of(peerStoreDescriptor)); + + assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); + } + @Test public void noChangeWhenAllReplicasAreReachable() { KVRangeStoreDescriptor storeDescriptor = createStoreDescriptor( @@ -73,7 +103,6 @@ public void noChangeWhenAllReplicasAreReachable() { balancer.update(Set.of(storeDescriptor)); - assertSame(balancer.balance().type(), BalanceResultType.NoNeedBalance); } @@ -101,7 +130,7 @@ public void removesUnreachableVoterReplicaAfterTimeout() { // Verify that the unhealthy replica is scheduled for removal assertEquals(localStoreId, command.getToStore()); assertEquals(rangeId, command.getKvRangeId()); - assertEquals(5, command.getExpectedVer()); + assertEquals(command.getExpectedVer(), 5); assertFalse(command.getVoters().contains(peerStoreId)); } @@ -129,11 +158,10 @@ public void removesUnreachableLearnerReplicaAfterTimeout() { // Verify that the unhealthy replica is scheduled for removal assertEquals(localStoreId, command.getToStore()); assertEquals(rangeId, command.getKvRangeId()); - assertEquals(5, command.getExpectedVer()); + assertEquals(command.getExpectedVer(), 5); assertFalse(command.getLearners().contains(peerStoreId)); } - @Test public void noCommandIfReplicaReachableAgain() { KVRangeStoreDescriptor localStoreDescriptor = createStoreDescriptor( @@ -177,7 +205,6 @@ public void removesReplicaIfLeaderChanged() { Set.of(localStoreId), Set.of(peerStoreId)) ); - balancer.update(Set.of(storeDescriptor, peerStoreDescriptor)); // Simulate a leader change diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java index c29a07839..13d6af6ad 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/StoreBalancer.java @@ -40,7 +40,8 @@ public abstract class StoreBalancer { * @param localStoreId the id of the store which the balancer is responsible for */ public StoreBalancer(String clusterId, String localStoreId) { - this.log = MDCLogger.getLogger("balancer.logger", "clusterId", clusterId, "storeId", localStoreId); + this.log = MDCLogger.getLogger("balancer.logger", + "clusterId", clusterId, "storeId", localStoreId, "balancer", this.getClass().getSimpleName()); this.clusterId = clusterId; this.localStoreId = localStoreId; } diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java index 2c36c3f38..99fc0701a 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/command/RecoveryCommand.java @@ -22,6 +22,7 @@ import lombok.Getter; import lombok.Setter; import lombok.experimental.SuperBuilder; +import org.apache.bifromq.basekv.utils.KVRangeIdUtil; @Getter @Setter @@ -35,6 +36,7 @@ public CommandType type() { @Override public String toString() { - return String.format("RecoveryCommand{toStore=%s}", getToStore()); + return String.format("RecoveryCommand{toStore=%s, kvRangeId=%s}", + getToStore(), KVRangeIdUtil.toString(getKvRangeId())); } } diff --git a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java index 125ff4208..dadf83ba9 100644 --- a/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java +++ b/base-kv/base-kv-store-balance-spi/src/main/java/org/apache/bifromq/basekv/balance/util/CommandUtil.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.balance.util; @@ -23,8 +23,14 @@ import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKey; import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary; -import org.apache.bifromq.basekv.balance.BalanceNow; -import org.apache.bifromq.basekv.balance.BalanceResult; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.NavigableMap; +import java.util.Set; import org.apache.bifromq.basekv.balance.command.BalanceCommand; import org.apache.bifromq.basekv.balance.command.BootstrapCommand; import org.apache.bifromq.basekv.balance.command.ChangeConfigCommand; @@ -37,14 +43,6 @@ import org.apache.bifromq.basekv.utils.EffectiveRoute; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.basekv.utils.LeaderRange; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.NavigableMap; -import java.util.Set; /** * Utility class for generating balance commands. @@ -57,24 +55,24 @@ public class CommandUtil { * @param rangeDescriptor the range descriptor of the range which the balancer is responsible for * @return the generated ChangeConfigCommand */ - public static BalanceResult quit(String localStoreId, KVRangeDescriptor rangeDescriptor) { + public static BalanceCommand quit(String localStoreId, KVRangeDescriptor rangeDescriptor) { ClusterConfig config = rangeDescriptor.getConfig(); if (config.getVotersCount() > 1 || config.getLearnersCount() > 0) { - return BalanceNow.of(ChangeConfigCommand.builder() + return ChangeConfigCommand.builder() .toStore(localStoreId) .kvRangeId(rangeDescriptor.getId()) .expectedVer(rangeDescriptor.getVer()) .voters(Set.of(localStoreId)) .learners(Collections.emptySet()) - .build()); + .build(); } else { - return BalanceNow.of(ChangeConfigCommand.builder() + return ChangeConfigCommand.builder() .toStore(localStoreId) .kvRangeId(rangeDescriptor.getId()) .expectedVer(rangeDescriptor.getVer()) .voters(Collections.emptySet()) .learners(Collections.emptySet()) - .build()); + .build(); } } diff --git a/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java b/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java index 2b06a80a7..ae41eafa1 100644 --- a/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java +++ b/base-kv/base-kv-store-balance-spi/src/test/java/org/apache/bifromq/basekv/balance/util/CommandUtilTest.java @@ -19,17 +19,19 @@ package org.apache.bifromq.basekv.balance.util; +import static com.google.protobuf.ByteString.copyFromUtf8; import static org.apache.bifromq.basekv.balance.util.CommandUtil.diffBy; import static org.apache.bifromq.basekv.utils.BoundaryUtil.toBoundary; -import static com.google.protobuf.ByteString.copyFromUtf8; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; import static org.testng.Assert.assertNull; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.balance.BalanceNow; -import org.apache.bifromq.basekv.balance.BalanceResult; -import org.apache.bifromq.basekv.balance.BalanceResultType; +import com.google.protobuf.ByteString; +import java.util.Collections; +import java.util.NavigableMap; +import java.util.Set; +import java.util.TreeMap; import org.apache.bifromq.basekv.balance.command.BalanceCommand; import org.apache.bifromq.basekv.balance.command.BootstrapCommand; import org.apache.bifromq.basekv.balance.command.ChangeConfigCommand; @@ -44,11 +46,6 @@ import org.apache.bifromq.basekv.utils.EffectiveRoute; import org.apache.bifromq.basekv.utils.KVRangeIdUtil; import org.apache.bifromq.basekv.utils.LeaderRange; -import com.google.protobuf.ByteString; -import java.util.Collections; -import java.util.NavigableMap; -import java.util.Set; -import java.util.TreeMap; import org.testng.annotations.Test; public class CommandUtilTest { @@ -65,10 +62,9 @@ public void quitWithMultipleVoters() { .build()) .build(); - BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor); + BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor); - assertEquals(result.type(), BalanceResultType.BalanceNow); - ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; + ChangeConfigCommand command = (ChangeConfigCommand) result; assertEquals(command.getToStore(), localStoreId); assertEquals(command.getKvRangeId(), kvRangeId); @@ -89,10 +85,9 @@ public void quitWithLearners() { .build()) .build(); - BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor); + BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor); - assertEquals(result.type(), BalanceResultType.BalanceNow); - ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; + ChangeConfigCommand command = (ChangeConfigCommand) result; assertEquals(command.getToStore(), localStoreId); assertEquals(command.getKvRangeId(), kvRangeId); @@ -112,10 +107,9 @@ public void quitWithSingleVoterNoLearners() { .build()) .build(); - BalanceResult result = CommandUtil.quit(localStoreId, kvRangeDescriptor); + BalanceCommand result = CommandUtil.quit(localStoreId, kvRangeDescriptor); - assertEquals(result.type(), BalanceResultType.BalanceNow); - ChangeConfigCommand command = (ChangeConfigCommand) ((BalanceNow) result).command; + ChangeConfigCommand command = (ChangeConfigCommand) result; assertEquals(command.getToStore(), localStoreId); assertEquals(command.getKvRangeId(), kvRangeId); diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java index b3fb57119..15d961508 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/server/BaseKVStoreService.java @@ -20,7 +20,6 @@ package org.apache.bifromq.basekv.server; import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap; -import static org.apache.bifromq.basekv.utils.BoundaryUtil.FULL_BOUNDARY; import static org.apache.bifromq.baserpc.server.UnaryResponse.response; import com.google.common.collect.Sets; @@ -94,10 +93,11 @@ public String storeId() { public void start() { log.debug("Starting BaseKVStore service"); kvRangeStore.start(new AgentHostStoreMessenger(agentHost, clusterId, kvRangeStore.id())); - kvRangeStore.bootstrap(KVRangeIdUtil.generate(), FULL_BOUNDARY); landscapeReporter = metaService.landscapeReporter(clusterId, kvRangeStore.id()); // sync store descriptor via crdt disposables.add(kvRangeStore.describe().subscribe(landscapeReporter::report)); + disposables.add(landscapeReporter.refreshSignal() + .subscribe(ts -> landscapeReporter.report(kvRangeStore.describe().blockingFirst()))); log.debug("BaseKVStore service started"); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java index af807d3f1..0f60e10ba 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/range/KVRangeFSM.java @@ -336,7 +336,7 @@ public void tick() { statsCollector.tick(); dumpSessions.values().forEach(KVRangeDumpSession::tick); shrinkWAL(); - checkZombieState(); + checkAndRepairFromZombieState(); estimateSplitHint(); } @@ -372,12 +372,10 @@ private CompletableFuture doClose() { .thenCompose(v -> statsCollector.stop()) .thenCompose(v -> mgmtTaskRunner.awaitDone()) .thenCompose(v -> wal.close()) - .thenCompose(v -> { + .thenCompose(v -> awaitShutdown(fsmExecutor)) + .whenComplete((v, e) -> { kvRange.close(); metricManager.close(); - return awaitShutdown(fsmExecutor); - }) - .whenComplete((v, e) -> { cmdFutures.values() .forEach(f -> f.completeExceptionally(new KVRangeException.TryLater("Range closed"))); queryRunner.close(); @@ -646,29 +644,27 @@ private CompletableFuture apply(LogEntry entry) { switch (entry.getTypeCase()) { case CONFIG -> { IKVRangeWriter rangeWriter = kvRange.toWriter(); - applyConfigChange(entry.getTerm(), entry.getIndex(), entry.getConfig(), rangeWriter) - .whenComplete((callback, e) -> { - if (onDone.isCancelled()) { - rangeWriter.abort(); - } else { - try { - if (e != null) { - rangeWriter.abort(); - onDone.completeExceptionally(e); - } else { - rangeWriter.lastAppliedIndex(entry.getIndex()); - rangeWriter.done(); - callback.run(); - linearizer.afterLogApplied(entry.getIndex()); - metricManager.reportLastAppliedIndex(entry.getIndex()); - onDone.complete(null); - } - } catch (Throwable t) { - log.error("Failed to apply log", t); - onDone.completeExceptionally(t); + try { + Supplier> afterLogApplied = applyConfigChange(entry.getTerm(), + entry.getIndex(), entry.getConfig(), rangeWriter); + rangeWriter.lastAppliedIndex(entry.getIndex()); + rangeWriter.done(); + afterLogApplied.get() + .whenComplete((v, e) -> { + if (e != null) { + log.error("Failed to apply config change", e); + onDone.completeExceptionally(e); + } else { + linearizer.afterLogApplied(entry.getIndex()); + metricManager.reportLastAppliedIndex(entry.getIndex()); + onDone.complete(null); } - } - }); + }); + } catch (Throwable t) { + rangeWriter.abort(); + log.error("Failed to apply command", t); + onDone.completeExceptionally(t); + } } case DATA -> { try { @@ -723,18 +719,17 @@ private CompletableFuture apply(LogEntry entry) { return onDone; } - private CompletableFuture applyConfigChange(long term, long index, - ClusterConfig config, - IKVRangeWritable rangeWriter) { - CompletableFuture onDone = new CompletableFuture<>(); + private Supplier> applyConfigChange(long term, + long index, + ClusterConfig config, + IKVRangeWritable rangeWriter) { State state = rangeWriter.state(); log.info("Apply new config[term={}, index={}]: state={}, leader={}\n{}", term, index, state, wal.isLeader(), config); rangeWriter.clusterConfig(config); if (config.getNextVotersCount() != 0 || config.getNextLearnersCount() != 0) { // skip joint-config - onDone.complete(NOOP); - return onDone; + return () -> CompletableFuture.completedFuture(null); } Set members = newHashSet(); members.addAll(config.getVotersList()); @@ -753,18 +748,17 @@ private CompletableFuture applyConfigChange(long term, long index, .setType(Removed) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> { quitSignal.complete(null); finishCommand(taskId); - }); + return CompletableFuture.completedFuture(null); + }; } else { rangeWriter.state(State.newBuilder() .setType(Normal) .setTaskId(taskId) .build()); - onDone.complete(() -> { - finishCommand(taskId); - }); + return () -> compactWAL().thenRun(() -> finishCommand(taskId)); } } else { // request config change failed, the config entry is appended due to leader reelection @@ -774,16 +768,17 @@ private CompletableFuture applyConfigChange(long term, long index, .setType(Removed) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> { quitSignal.complete(null); finishCommand(taskId); - }); + return CompletableFuture.completedFuture(null); + }; } else { rangeWriter.state(State.newBuilder() .setType(Normal) .setTaskId(taskId) .build()); - onDone.complete(() -> finishCommand(taskId)); + return () -> compactWAL().thenRun(() -> finishCommand(taskId)); } } } @@ -802,12 +797,13 @@ private CompletableFuture applyConfigChange(long term, long index, .build()); } rangeWriter.bumpVer(false); - onDone.complete(() -> { + return () -> { finishCommand(taskId); if (remove) { quitSignal.complete(null); } - }); + return CompletableFuture.completedFuture(null); + }; } case ToBePurged -> { String taskId = state.getTaskId(); @@ -817,25 +813,28 @@ private CompletableFuture applyConfigChange(long term, long index, .setType(Removed) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> { finishCommand(taskId); quitSignal.complete(null); - }); + return CompletableFuture.completedFuture(null); + }; } else { rangeWriter.state(State.newBuilder() .setType(Normal) .setTaskId(taskId) .build()); - onDone.complete(() -> { + return () -> compactWAL().thenRun(() -> { + // purge failed due to leader change, reset back to normal + log.debug("Purge failed due to leader change[newConfig={}]", config); finishCommand(taskId); }); } } - default -> - // skip internal config change triggered by leadership change - onDone.complete(NOOP); + default -> { + // skip internal config change triggered by leadership change, no need to compact WAL + return () -> CompletableFuture.completedFuture(null); + } } - return onDone; } private CompletableFuture applyCommand(long ver, @@ -876,11 +875,6 @@ logTerm, logIndex, taskId, print(ver), state, newConfig.getVotersList(), // make a checkpoint if needed CompletableFuture compactWALFuture = CompletableFuture.completedFuture(null); if (wal.latestSnapshot().getLastAppliedIndex() < logIndex - 1) { - // cancel all on-going dump sessions - dumpSessions.forEach((sessionId, session) -> { - session.cancel(); - dumpSessions.remove(sessionId, session); - }); compactWALFuture = compactWAL(); } compactWALFuture.whenCompleteAsync((v, e) -> { @@ -905,67 +899,84 @@ logTerm, logIndex, taskId, print(ver), state, newConfig.getVotersList(), ? newHashSet(clusterConfig.getVotersList()) : newHashSet(newConfig.getVotersList()); Set nextLearners = toBePurged ? emptySet() : newHashSet(newConfig.getLearnersList()); - List> onceFutures = newHostingStoreIds.stream() - .map(storeId -> messenger - .once(m -> { - if (m.hasEnsureRangeReply()) { - EnsureRangeReply reply = m.getEnsureRangeReply(); - return reply.getResult() == EnsureRangeReply.Result.OK; - } - return false; - }) - .orTimeout(5, TimeUnit.SECONDS) - ) - .collect(Collectors.toList()); - CompletableFuture.allOf(onceFutures.toArray(CompletableFuture[]::new)) - .whenCompleteAsync((v1, t) -> { - if (t != null) { - String errorMessage = String.format("ConfigChange aborted[taskId=%s] due to %s", - taskId, t.getMessage()); - log.warn(errorMessage); - finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage)); - wal.stepDown(); - return; - } - wal.changeClusterConfig(taskId, nextVoters, nextLearners) - .whenCompleteAsync((v2, e2) -> { - if (e2 != null) { - String errorMessage = - String.format("ConfigChange aborted[taskId=%s] due to %s", - taskId, e2.getMessage()); - log.debug(errorMessage); - finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage)); - wal.stepDown(); + if (wal.isLeader()) { + List> onceFutures = newHostingStoreIds.stream() + .map(storeId -> messenger + .once(m -> { + if (m.hasEnsureRangeReply()) { + EnsureRangeReply reply = m.getEnsureRangeReply(); + return reply.getResult() == EnsureRangeReply.Result.OK; } - // postpone finishing command when config entry is applied - }, fsmExecutor); - }, fsmExecutor); - newHostingStoreIds.forEach(storeId -> { - log.debug("Send EnsureRequest: taskId={}, targetStoreId={}", taskId, storeId); - ClusterConfig ensuredClusterConfig = ClusterConfig.getDefaultInstance(); - messenger.send(KVRangeMessage.newBuilder() - .setRangeId(id) - .setHostStoreId(storeId) - .setEnsureRange(EnsureRange.newBuilder() - .setVer(ver) // ensure the new kvrange is compatible in target store - .setBoundary(boundary) - .setInitSnapshot(Snapshot.newBuilder() - .setTerm(0) - .setIndex(0) - .setClusterConfig(ensuredClusterConfig) // empty voter set - .setData(KVRangeSnapshot.newBuilder() - .setVer(ver) - .setId(id) - // no checkpoint specified - .setLastAppliedIndex(0) - .setBoundary(boundary) - .setState(state) - .setClusterConfig(ensuredClusterConfig) - .build().toByteString()) + return false; + }) + .orTimeout(5, TimeUnit.SECONDS) + ) + .collect(Collectors.toList()); + CompletableFuture.allOf(onceFutures.toArray(CompletableFuture[]::new)) + .whenCompleteAsync((v1, t) -> { + if (t != null) { + String errorMessage = String.format("ConfigChange aborted[taskId=%s] due to %s", + taskId, t.getMessage()); + log.warn(errorMessage); + finishCommandWithError(taskId, new KVRangeException.TryLater(errorMessage)); + wal.stepDown(); + return; + } + wal.changeClusterConfig(taskId, nextVoters, nextLearners) + .whenCompleteAsync((v2, e2) -> { + if (e2 != null) { + String errorMessage = + String.format("ConfigChange aborted[taskId=%s] due to %s", + taskId, e2.getMessage()); + log.debug(errorMessage); + finishCommandWithError(taskId, + new KVRangeException.TryLater(errorMessage)); + wal.stepDown(); + } + // postpone finishing command when config entry is applied + }, fsmExecutor); + }, fsmExecutor); + newHostingStoreIds.forEach(storeId -> { + log.debug("Send EnsureRequest: taskId={}, targetStoreId={}", taskId, storeId); + ClusterConfig ensuredClusterConfig = ClusterConfig.getDefaultInstance(); + messenger.send(KVRangeMessage.newBuilder() + .setRangeId(id) + .setHostStoreId(storeId) + .setEnsureRange(EnsureRange.newBuilder() + .setVer(ver) // ensure the new kvrange is compatible in target store + .setBoundary(boundary) + .setInitSnapshot(Snapshot.newBuilder() + .setTerm(0) + .setIndex(0) + .setClusterConfig(ensuredClusterConfig) // empty voter set + .setData(KVRangeSnapshot.newBuilder() + .setVer(ver) + .setId(id) + // no checkpoint specified + .setLastAppliedIndex(0) + .setBoundary(boundary) + .setState(state) + .setClusterConfig(ensuredClusterConfig) + .build().toByteString()) + .build()) .build()) - .build()) - .build()); - }); + .build()); + }); + } else { + wal.changeClusterConfig(taskId, nextVoters, nextLearners) + .whenCompleteAsync((v2, e2) -> { + if (e2 != null) { + String errorMessage = + String.format("ConfigChange aborted[taskId=%s] due to %s", + taskId, e2.getMessage()); + log.debug(errorMessage); + finishCommandWithError(taskId, + new KVRangeException.TryLater(errorMessage)); + wal.stepDown(); + } + // postpone finishing command when config entry is applied + }, fsmExecutor); + } if (state.getType() == Normal) { if (toBePurged) { rangeWriter.state(State.newBuilder() @@ -1495,7 +1506,6 @@ private CompletableFuture restore(KVRangeSnapshot snapshot, return restorer.restoreFrom(leader, snapshot) .handle((result, ex) -> { if (ex != null) { - log.warn("Restored from snapshot error: \n{}", snapshot, ex); return onInstalled.call(null, ex); } else { return onInstalled.call(kvRange.checkpoint(), null); @@ -1504,10 +1514,9 @@ private CompletableFuture restore(KVRangeSnapshot snapshot, .thenCompose(f -> f) .whenCompleteAsync(unwrap((v, e) -> { if (e != null) { - if (e instanceof SnapshotException) { - log.error("Failed to apply snapshot to WAL \n{}", snapshot, e); - // WAL and FSM are inconsistent, need to quit and recreate again - quitSignal.complete(null); + if (e instanceof SnapshotException.ObsoleteSnapshotException) { + log.debug("Obsolete snapshot, reset kvRange to latest snapshot: \n{}", snapshot); + kvRange.toReseter(wal.latestSnapshot()).done(); } } else { linearizer.afterLogApplied(snapshot.getLastAppliedIndex()); @@ -1564,6 +1573,11 @@ private void shrinkWAL() { } private CompletableFuture compactWAL() { + // cancel all on-going dump sessions + dumpSessions.forEach((sessionId, session) -> { + session.cancel(); + dumpSessions.remove(sessionId, session); + }); return mgmtTaskRunner.add(this::doCompactWAL); } @@ -1592,7 +1606,7 @@ private void detectZombieState(KVRangeDescriptor descriptor) { } } - private void checkZombieState() { + private void checkAndRepairFromZombieState() { if (zombieAt > 0 && Duration.ofMillis(HLC.INST.getPhysical() - zombieAt).toSeconds() > opts.getZombieTimeoutSec()) { ClusterConfig clusterConfig = wal.latestClusterConfig(); @@ -1604,7 +1618,8 @@ private void checkZombieState() { clusterConfig); wal.recover().whenComplete((v, e) -> recovering.set(false)); } - } else { + } else if (!clusterConfig.getVotersList().contains(hostStoreId) + && !clusterConfig.getLearnersList().contains(hostStoreId)) { log.info("Zombie state detected, send quit signal."); quitSignal.complete(null); } diff --git a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java index 42f63ab79..c121a241d 100644 --- a/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java +++ b/base-kv/base-kv-store-server/src/main/java/org/apache/bifromq/basekv/store/wal/KVRangeWALSubscription.java @@ -72,8 +72,7 @@ class KVRangeWALSubscription implements IKVRangeWALSubscription { applyRunner.add(restore(task)) .handle((snap, e) -> fetchRunner.add(() -> { if (e != null) { - log.error( - "Failed to install snapshot\n{}", snap); + log.error("Failed to restore from snapshot\n{}", task.snapshot, e); return; } log.debug("Snapshot installed\n{}", snap); diff --git a/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java b/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java index c39045fa6..44abd56e4 100644 --- a/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java +++ b/base-kv/base-kv-type-proto/src/main/java/org/apache/bifromq/basekv/utils/DescriptorUtil.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basekv.utils; @@ -22,10 +22,6 @@ import static org.apache.bifromq.basekv.utils.BoundaryUtil.endKey; import static org.apache.bifromq.basekv.utils.BoundaryUtil.startKey; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import com.google.protobuf.ByteString; import java.util.Comparator; import java.util.HashMap; @@ -37,6 +33,10 @@ import java.util.TreeMap; import java.util.TreeSet; import java.util.stream.Collectors; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; /** * Utilities for processing descriptor. @@ -100,14 +100,21 @@ public static EffectiveRoute getEffectiveRoute(EffectiveEpoch effectiveEpoch) { for (KVRangeStoreDescriptor storeDescriptor : effectiveEpoch.storeDescriptors()) { for (KVRangeDescriptor rangeDescriptor : storeDescriptor.getRangesList()) { if (rangeDescriptor.getRole() == RaftNodeStatus.Leader) { - ByteString startKey = startKey(rangeDescriptor.getBoundary()); - if (startKey == null) { - firstLeaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor)); - continue; + switch (rangeDescriptor.getState()) { + case Normal, ConfigChanging, PreparedMerging, WaitingForMerge -> { + ByteString startKey = startKey(rangeDescriptor.getBoundary()); + if (startKey == null) { + firstLeaderRanges.add(new LeaderRange(rangeDescriptor, storeDescriptor)); + continue; + } + sortedLeaderRanges.computeIfAbsent(startKey, + k -> new TreeSet<>(Comparator.comparingLong(l -> l.descriptor().getId().getId()))) + .add(new LeaderRange(rangeDescriptor, storeDescriptor)); + } + default -> { + // skip other states + } } - sortedLeaderRanges.computeIfAbsent(startKey, - k -> new TreeSet<>(Comparator.comparingLong(l -> l.descriptor().getId().getId()))) - .add(new LeaderRange(rangeDescriptor, storeDescriptor)); } } } diff --git a/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java b/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java index 3b724b5af..40b19a232 100644 --- a/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java +++ b/base-kv/base-kv-type-proto/src/test/java/org/apache/bifromq/basekv/utils/DescriptorUtilTest.java @@ -19,16 +19,15 @@ package org.apache.bifromq.basekv.utils; +import static org.apache.bifromq.basekv.proto.State.StateType.Merged; +import static org.apache.bifromq.basekv.proto.State.StateType.Normal; +import static org.apache.bifromq.basekv.proto.State.StateType.PreparedMerging; +import static org.apache.bifromq.basekv.proto.State.StateType.Removed; import static org.apache.bifromq.basekv.utils.DescriptorUtil.getEffectiveEpoch; import static org.apache.bifromq.basekv.utils.DescriptorUtil.organizeByEpoch; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.basekv.proto.Boundary; -import org.apache.bifromq.basekv.proto.KVRangeDescriptor; -import org.apache.bifromq.basekv.proto.KVRangeId; -import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; -import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import com.google.protobuf.ByteString; import java.util.Arrays; import java.util.HashSet; @@ -37,6 +36,11 @@ import java.util.NavigableMap; import java.util.Set; import java.util.stream.Collectors; +import org.apache.bifromq.basekv.proto.Boundary; +import org.apache.bifromq.basekv.proto.KVRangeDescriptor; +import org.apache.bifromq.basekv.proto.KVRangeId; +import org.apache.bifromq.basekv.proto.KVRangeStoreDescriptor; +import org.apache.bifromq.basekv.raft.proto.RaftNodeStatus; import org.testng.annotations.Test; public class DescriptorUtilTest { @@ -460,6 +464,7 @@ public void getEffectiveRouteContiguousChain() { KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() .setId(id1) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundaryBuilder1.build()) .build(); @@ -470,6 +475,7 @@ public void getEffectiveRouteContiguousChain() { KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() .setId(id2) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundaryBuilder2.build()) .build(); @@ -479,6 +485,7 @@ public void getEffectiveRouteContiguousChain() { KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder() .setId(id3) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundaryBuilder3.build()) .build(); @@ -517,11 +524,13 @@ public void getEffectiveRouteSelectsSmallestVer() { KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() .setId(id1) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundary) .build(); KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() .setId(id2) .setRole(RaftNodeStatus.Leader) + .setState(Normal) .setBoundary(boundary) .build(); @@ -568,4 +577,257 @@ public void getEffectiveRouteWithNoLeaders() { assertTrue(routeMap.isEmpty()); } -} \ No newline at end of file + + @Test + public void getEffectiveRouteFiltersByState() { + KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeId id3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build(); + KVRangeId id4 = KVRangeId.newBuilder().setEpoch(1).setId(4).build(); + + // Allowed states + KVRangeDescriptor rNormal = KVRangeDescriptor.newBuilder() + .setId(id1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("b")).build()) + .setState(Normal) + .build(); + KVRangeDescriptor rPreparedMerging = KVRangeDescriptor.newBuilder() + .setId(id2) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("b")) + .setEndKey(ByteString.copyFromUtf8("m")) + .build()) + .setState(PreparedMerging) + .build(); + + // Disallowed states + KVRangeDescriptor rMerged = KVRangeDescriptor.newBuilder() + .setId(id3) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("m")) + .setEndKey(ByteString.copyFromUtf8("z")) + .build()) + .setState(Merged) + .build(); + KVRangeDescriptor rRemoved = KVRangeDescriptor.newBuilder() + .setId(id4) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("z")) + .build()) + .setState(Removed) + .build(); + + KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() + .setId("store1") + .addRanges(rNormal) + .addRanges(rPreparedMerging) + .addRanges(rMerged) + .addRanges(rRemoved) + .build(); + + Set storeDescriptors = new HashSet<>(); + storeDescriptors.add(storeDescriptor); + + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors); + EffectiveRoute effectiveRoute = DescriptorUtil.getEffectiveRoute(effectiveEpoch); + NavigableMap routeMap = effectiveRoute.leaderRanges(); + + // Only two allowed ranges should be present + assertEquals(routeMap.size(), 2); + List ids = routeMap.values().stream().map(lr -> lr.descriptor().getId().getId()).toList(); + assertTrue(ids.contains(1L)); + assertTrue(ids.contains(2L)); + } + + @Test + public void getEffectiveRoutePrefersNullStartKeyAsFirst() { + KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + + // First range without startKey (should be chosen as first) + KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() + .setId(id1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setEndKey(ByteString.copyFromUtf8("m")) + .build()) + .setState(Normal) + .build(); + + // Second range with explicit startKey + KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() + .setId(id2) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("m")).build()) + .setState(Normal) + .build(); + + KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() + .setId("store1") + .addRanges(r2) + .addRanges(r1) + .build(); + + Set storeDescriptors = new HashSet<>(); + storeDescriptors.add(storeDescriptor); + + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors); + NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges(); + + assertEquals(routeMap.firstEntry().getValue().descriptor().getId(), id1); + } + + @Test + public void getEffectiveRouteStopsAtNullEndKey() { + KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + KVRangeId id3 = KVRangeId.newBuilder().setEpoch(1).setId(3).build(); + + KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() + .setId(id1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("b")).build()) + .setState(Normal) + .build(); + // Tail range with null endKey + KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() + .setId(id2) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("b")).build()) + .setState(Normal) + .build(); + // An extra range that should never be reached after tail + KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder() + .setId(id3) + .setRole(RaftNodeStatus.Leader) + .setBoundary( + Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("c")).setEndKey(ByteString.copyFromUtf8("d")) + .build()) + .setState(Normal) + .build(); + + KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() + .setId("store1") + .addRanges(r1) + .addRanges(r2) + .addRanges(r3) + .build(); + + Set storeDescriptors = new HashSet<>(); + storeDescriptors.add(storeDescriptor); + + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors); + NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges(); + + // Should stop at r2 (endKey null) + assertEquals(routeMap.size(), 2); + List ids = routeMap.values().stream().map(lr -> lr.descriptor().getId().getId()).toList(); + assertTrue(ids.contains(1L)); + assertTrue(ids.contains(2L)); + } + + @Test + public void getEffectiveRouteAllowsGapsByCeilingStartKey() { + KVRangeId id1 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(1).setId(2).build(); + + KVRangeDescriptor r1 = KVRangeDescriptor.newBuilder() + .setId(id1) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setEndKey(ByteString.copyFromUtf8("b")) + .build()) + .setState(Normal) + .build(); + KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder() + .setId(id2) + .setRole(RaftNodeStatus.Leader) + .setBoundary(Boundary.newBuilder() + .setStartKey(ByteString.copyFromUtf8("c")) + .setEndKey(ByteString.copyFromUtf8("z")) + .build()) + .setState(Normal) + .build(); + + KVRangeStoreDescriptor storeDescriptor = KVRangeStoreDescriptor.newBuilder() + .setId("store1") + .addRanges(r1) + .addRanges(r2) + .build(); + + Set storeDescriptors = new HashSet<>(); + storeDescriptors.add(storeDescriptor); + + EffectiveEpoch effectiveEpoch = new EffectiveEpoch(1, storeDescriptors); + NavigableMap routeMap = DescriptorUtil.getEffectiveRoute(effectiveEpoch).leaderRanges(); + assertEquals(routeMap.size(), 2); + } + + @Test + public void organizeByEpochRetainsStoresWithoutRangesInEpoch() { + // store1 has epoch 1 & 2 ranges, store2 has only epoch 2 + KVRangeId id11 = KVRangeId.newBuilder().setEpoch(1).setId(1).build(); + KVRangeId id21 = KVRangeId.newBuilder().setEpoch(2).setId(1).build(); + KVRangeDescriptor r11 = KVRangeDescriptor.newBuilder() + .setId(id11) + .setBoundary(Boundary.newBuilder().setEndKey(ByteString.copyFromUtf8("m")).build()) + .build(); + KVRangeDescriptor r21 = KVRangeDescriptor.newBuilder() + .setId(id21) + .setBoundary(Boundary.newBuilder().setStartKey(ByteString.copyFromUtf8("n")).build()) + .build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("store1").addRanges(r11).addRanges(r21) + .build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("store2").addRanges(r21).build(); + + Set set = new HashSet<>(); + set.add(s1); + set.add(s2); + + NavigableMap> byEpoch = DescriptorUtil.organizeByEpoch(set); + // Epoch 1 should contain both stores, with store2 having 0 ranges + assertTrue(byEpoch.containsKey(1L)); + Set epoch1 = byEpoch.get(1L); + assertEquals(epoch1.size(), 2); + for (KVRangeStoreDescriptor d : epoch1) { + if (d.getId().equals("store1")) { + assertEquals(d.getRangesCount(), 1); + assertEquals(d.getRanges(0).getId().getEpoch(), 1L); + } else if (d.getId().equals("store2")) { + assertEquals(d.getRangesCount(), 0); + } + } + } + + @Test + public void getEffectiveEpochOldestSelectionWithMixedStores() { + // store1 has epoch 2, store2 has epoch 3; no epoch 1 present => pick epoch 2 + KVRangeId id2 = KVRangeId.newBuilder().setEpoch(2).setId(1).build(); + KVRangeId id3 = KVRangeId.newBuilder().setEpoch(3).setId(1).build(); + KVRangeDescriptor r2 = KVRangeDescriptor.newBuilder().setId(id2).build(); + KVRangeDescriptor r3 = KVRangeDescriptor.newBuilder().setId(id3).build(); + + KVRangeStoreDescriptor s1 = KVRangeStoreDescriptor.newBuilder().setId("s1").addRanges(r2).build(); + KVRangeStoreDescriptor s2 = KVRangeStoreDescriptor.newBuilder().setId("s2").addRanges(r3).build(); + Set set = new HashSet<>(); + set.add(s1); + set.add(s2); + + Set result = DescriptorUtil.getEffectiveEpoch(set).get().storeDescriptors(); + assertEquals(result.size(), 2); + // All descriptors in effective epoch must be epoch 2 versions of both stores, with s2 having 0 ranges + for (KVRangeStoreDescriptor d : result) { + if (d.getId().equals("s1")) { + assertEquals(d.getRangesCount(), 1); + assertEquals(d.getRanges(0).getId().getEpoch(), 2L); + } else if (d.getId().equals("s2")) { + assertEquals(d.getRangesCount(), 0); + } + } + } +} diff --git a/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java b/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java index 32c2dd5e3..6d418ce5d 100644 --- a/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java +++ b/base-logger/src/main/java/org/apache/bifromq/logger/MDCLogger.java @@ -74,12 +74,20 @@ private void logWithMDC(Supplier> isEnabled, if (lvl.isEmpty()) { return; } + Object[] evaluated = args; + if (args != null && args.length > 0) { + evaluated = new Object[args.length]; + for (int i = 0; i < args.length; i++) { + Object a = args[i]; + evaluated[i] = (a instanceof Supplier) ? ((Supplier) a).get() : a; + } + } for (int i = 0; i < tags.length; i += 2) { MDC.put(tags[i], tags[i + 1]); } Map extraCtx = extraContext(); extraCtx.forEach(MDC::put); - delegate.log(marker, FQCN, lvl.get().toInt(), msg, args, t); + delegate.log(marker, FQCN, lvl.get().toInt(), msg, evaluated, t); for (int i = 0; i < tags.length; i += 2) { MDC.remove(tags[i]); } diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java index dcf7a1353..46b0b2c5b 100644 --- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java +++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/BiDiStream.java @@ -21,8 +21,6 @@ import static io.grpc.stub.ClientCalls.asyncBidiStreamingCall; -import org.apache.bifromq.baserpc.RPCContext; -import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject; import io.grpc.CallOptions; import io.grpc.Channel; import io.grpc.Context; @@ -36,10 +34,11 @@ import java.util.concurrent.atomic.AtomicBoolean; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.baserpc.RPCContext; +import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject; @Slf4j class BiDiStream implements IBiDiStream { - private final String tenantId; private final String serverId; private final ClientCallStreamObserver callStreamObserver; private final Subject outSubject = PublishSubject.create(); @@ -54,7 +53,6 @@ class BiDiStream implements IBiDiStream { MethodDescriptor methodDescriptor, Map metadata, CallOptions callOptions) { - this.tenantId = tenantId; this.serverId = serverId; Context ctx = Context.ROOT.fork() .withValue(RPCContext.TENANT_ID_CTX_KEY, tenantId) diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java index f753ba63b..e4dc3a4d4 100644 --- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java +++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ClientChannel.java @@ -19,16 +19,6 @@ package org.apache.bifromq.baserpc.client; -import org.apache.bifromq.baseenv.EnvProvider; -import org.apache.bifromq.baseenv.NettyEnv; -import org.apache.bifromq.baserpc.BluePrint; -import org.apache.bifromq.baserpc.client.interceptor.TenantAwareClientInterceptor; -import org.apache.bifromq.baserpc.client.loadbalancer.IServerSelector; -import org.apache.bifromq.baserpc.client.loadbalancer.TrafficDirectiveLoadBalancerProvider; -import org.apache.bifromq.baserpc.client.nameresolver.TrafficGovernorNameResolverProvider; -import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject; -import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceLandscape; -import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; import com.google.common.util.concurrent.MoreExecutors; import io.grpc.Channel; import io.grpc.ConnectivityState; @@ -50,6 +40,16 @@ import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import lombok.Builder; +import org.apache.bifromq.baseenv.EnvProvider; +import org.apache.bifromq.baseenv.NettyEnv; +import org.apache.bifromq.baserpc.BluePrint; +import org.apache.bifromq.baserpc.client.interceptor.TenantAwareClientInterceptor; +import org.apache.bifromq.baserpc.client.loadbalancer.IServerSelector; +import org.apache.bifromq.baserpc.client.loadbalancer.TrafficDirectiveLoadBalancerProvider; +import org.apache.bifromq.baserpc.client.nameresolver.TrafficGovernorNameResolverProvider; +import org.apache.bifromq.baserpc.client.util.FastBehaviorSubject; +import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceLandscape; +import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; class ClientChannel implements IClientChannel { private final String serviceUniqueName; @@ -136,7 +136,7 @@ public Observable>> serverList() { @Override public Observable serverSelectorObservable() { - return serverSelectorSubject; + return serverSelectorSubject.distinctUntilChanged(); } @Override diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java index 27f63fcfa..03159dcd3 100644 --- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java +++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/ManagedBiDiStream.java @@ -116,14 +116,14 @@ final boolean isReady() { abstract void onServiceUnavailable(); private void reportNoServerAvailable() { - log.debug("Stream@{} no server available to target: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} no server available to target: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); onNoServerAvailable(); } private void reportServiceUnavailable() { - log.debug("Stream@{} service unavailable to target: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} service unavailable to target: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); onServiceUnavailable(); } @@ -246,12 +246,12 @@ void close() { private void gracefulRetarget() { if (state.compareAndSet(State.Normal, State.PendingRetarget)) { - log.debug("Stream@{} start graceful retarget process: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} start graceful retarget process: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); if (prepareRetarget()) { // if it's ready to retarget, close it and start a new one - log.debug("Stream@{} close current bidi-stream immediately before retargeting: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} close current bidi-stream immediately before retargeting: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); state.set(State.Retargeting); bidiStream.get().close(); scheduleRetargetNow(); @@ -272,10 +272,12 @@ private void scheduleRetargetNow() { private void scheduleRetarget(Duration delay) { if (retargetScheduled.compareAndSet(false, true)) { - log.debug("Stream@{} schedule retarget task in {}ms: method={}", - this.hashCode(), delay.toMillis(), methodDescriptor.getBareMethodName()); - CompletableFuture.runAsync(() -> retarget(this.serverSelector), - CompletableFuture.delayedExecutor(delay.toMillis(), MILLISECONDS)); + log.debug("Stream@{} schedule retarget task in {}ms: method={}, state={}", + this.hashCode(), delay.toMillis(), methodDescriptor.getBareMethodName(), state.get()); + CompletableFuture.runAsync(() -> { + retargetScheduled.set(false); + retarget(this.serverSelector); + }, CompletableFuture.delayedExecutor(delay.toMillis(), MILLISECONDS)); } } @@ -328,7 +330,6 @@ private void retarget(IServerSelector serverSelector) { } } } - retargetScheduled.set(false); if (serverSelector != this.serverSelector) { // server selector has been changed, schedule a retarget scheduleRetargetNow(); @@ -338,10 +339,11 @@ private void retarget(IServerSelector serverSelector) { private void target(String serverId) { if (state.compareAndSet(State.Init, State.Normal) || state.compareAndSet(State.StreamDisconnect, State.Normal) + || state.compareAndSet(State.PendingRetarget, State.Normal) || state.compareAndSet(State.NoServerAvailable, State.Normal) || state.compareAndSet(State.Retargeting, State.Normal)) { - log.debug("Stream@{} build bidi-stream to target server[{}]: method={}", - this.hashCode(), serverId, methodDescriptor.getBareMethodName()); + log.debug("Stream@{} build stream to server[{}]: method={}, state={}", + this.hashCode(), serverId, methodDescriptor.getBareMethodName(), state.get()); BidiStreamContext bidiStreamContext = BidiStreamContext.from(new BiDiStream<>( tenantId, serverId, @@ -350,13 +352,13 @@ private void target(String serverId) { metadataSupplier.get(), callOptions)); bidiStream.set(bidiStreamContext); - onStreamCreated(); bidiStreamContext.subscribe(this::onNext, this::onError, this::onCompleted); bidiStreamContext.onReady(ts -> onStreamReady()); + onStreamCreated(); } if (bidiStream.get().bidiStream().isReady()) { - log.debug("Stream@{} ready after build to server[{}]: method={}", - this.hashCode(), serverId, methodDescriptor.getBareMethodName()); + log.debug("Stream@{} ready: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); onStreamReady(); } } @@ -367,8 +369,8 @@ private void onNext(OutT out) { if (state.get() == State.PendingRetarget && canStartRetarget()) { // do not close the stream inline CompletableFuture.runAsync(() -> { - log.debug("Stream@{} close current bidi-stream before retargeting: method={}", - this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} close current stream before retargeting: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); state.set(State.Retargeting); bidiStream.get().close(); scheduleRetargetNow(); @@ -377,18 +379,33 @@ private void onNext(OutT out) { } private void onError(Throwable t) { - log.debug("BidiStream@{} error: method={}", this.hashCode(), methodDescriptor.getBareMethodName(), t); - state.compareAndSet(State.Normal, State.StreamDisconnect); + log.debug("Stream@{} error: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get(), t); + State s = state.get(); + if (s == State.Normal || s == State.PendingRetarget) { + state.compareAndSet(s, State.StreamDisconnect); + } onStreamError(t); - scheduleRetargetWithRandomDelay(); + if (s == State.PendingRetarget) { + scheduleRetargetNow(); + } else { + scheduleRetargetWithRandomDelay(); + } } private void onCompleted() { - log.debug("BidiStream@{} complete: method={}", this.hashCode(), methodDescriptor.getBareMethodName()); + log.debug("Stream@{} close by server: method={}, state={}", + this.hashCode(), methodDescriptor.getBareMethodName(), state.get()); // server gracefully close the stream - state.compareAndSet(State.Normal, State.StreamDisconnect); - onStreamError(new CancellationException("server close the bidi-stream")); - scheduleRetargetWithRandomDelay(); + State s = state.get(); + if (s == State.Normal || s == State.PendingRetarget) { + state.compareAndSet(s, State.StreamDisconnect); + } + onStreamError(new CancellationException("Server shutdown")); + if (s == State.PendingRetarget) { + scheduleRetargetNow(); + } + // wait for selector change to trigger retargeting } enum State { diff --git a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java index 2539d9cdc..1f6c38572 100644 --- a/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java +++ b/base-rpc/base-rpc-client/src/main/java/org/apache/bifromq/baserpc/client/loadbalancer/TrafficDirectiveLoadBalancer.java @@ -19,17 +19,16 @@ package org.apache.bifromq.baserpc.client.loadbalancer; -import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.IN_PROC_SERVER_ATTR_KEY; -import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_GROUP_TAG_ATTR_KEY; -import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_ID_ATTR_KEY; import static com.google.common.base.Preconditions.checkNotNull; import static io.grpc.ConnectivityState.CONNECTING; import static io.grpc.ConnectivityState.IDLE; import static io.grpc.ConnectivityState.READY; import static io.grpc.ConnectivityState.SHUTDOWN; import static io.grpc.ConnectivityState.TRANSIENT_FAILURE; +import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.IN_PROC_SERVER_ATTR_KEY; +import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_GROUP_TAG_ATTR_KEY; +import static org.apache.bifromq.baserpc.client.loadbalancer.Constants.SERVER_ID_ATTR_KEY; -import org.apache.bifromq.baseenv.EnvProvider; import com.google.common.collect.Maps; import io.grpc.Attributes; import io.grpc.ConnectivityState; @@ -48,6 +47,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.IntStream; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.baseenv.EnvProvider; @Slf4j public class TrafficDirectiveLoadBalancer extends LoadBalancer { @@ -72,6 +72,12 @@ public class TrafficDirectiveLoadBalancer extends LoadBalancer { this.currentPicker = new SubChannelPicker(); } + private static Set difference(Set a, Set b) { + Set aCopy = new HashSet<>(a); + aCopy.removeAll(b); + return aCopy; + } + @Override public void handleResolvedAddresses(ResolvedAddresses resolvedAddresses) { log.debug("Handle traffic change: resolvedAddresses={}", resolvedAddresses); @@ -183,25 +189,11 @@ private void updateBalancingState() { currentPicker.refresh(serverChannels); helper.updateBalancingState(newState, currentPicker); - Map allServers = currentServers; - ITenantRouter tenantRouter = - new TenantRouter(currentServers, currentTrafficDirective, currentServerGroupTags); - updateListener.onUpdate(new IServerSelector() { - @Override - public boolean exists(String serverId) { - return allServers.containsKey(serverId); - } - - @Override - public IServerGroupRouter get(String tenantId) { - return tenantRouter.get(tenantId); - } - - @Override - public String toString() { - return allServers.toString(); - } - }); + if (newState == READY || (newState == TRANSIENT_FAILURE && currentServers.isEmpty())) { + // notify when channel is ready or TRANSIENT_FAILURE state and no servers available + updateListener.onUpdate( + new TenantAwareServerSelector(currentServers, currentServerGroupTags, currentTrafficDirective)); + } } balancingStateUpdateScheduled.set(false); } @@ -275,10 +267,4 @@ private void updateSubChannelState(Subchannel subchannel, ConnectivityStateInfo subchannel.requestConnection(); } } - - private static Set difference(Set a, Set b) { - Set aCopy = new HashSet<>(a); - aCopy.removeAll(b); - return aCopy; - } } diff --git a/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java b/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java index 2f740f0c4..7439c173d 100644 --- a/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java +++ b/base-rpc/base-rpc-server/src/main/java/org/apache/bifromq/baserpc/server/AbstractResponsePipeline.java @@ -14,13 +14,11 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.baserpc.server; -import org.apache.bifromq.base.util.FutureTracker; -import org.apache.bifromq.baserpc.metrics.RPCMetric; import io.grpc.Status; import io.grpc.stub.StreamObserver; import io.micrometer.core.instrument.Timer; @@ -28,6 +26,8 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicBoolean; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.FutureTracker; +import org.apache.bifromq.baserpc.metrics.RPCMetric; @Slf4j abstract class AbstractResponsePipeline extends AbstractStreamObserver { @@ -40,6 +40,7 @@ abstract class AbstractResponsePipeline extends AbstractStr protected AbstractResponsePipeline(StreamObserver responseObserver) { super(responseObserver); + this.responseObserver.setOnCancelHandler(this::cleanup); } @Override @@ -80,7 +81,7 @@ private void close(Throwable t) { /** * Handle the request and return the result via completable future, remember always throw exception asynchronously - * Returned future complete exceptionally will cause pipeline close + * Returned future complete exceptionally will cause pipeline close. * * @param tenantId the tenantId * @param request the request @@ -110,7 +111,6 @@ final CompletableFuture startHandlingRequest(RequestT request) { return respFuture; } - final void emitResponse(RequestT req, ResponseT resp) { if (!isClosed()) { log.trace("Response sent in pipeline@{}: request={}, response={}", hashCode(), req, resp); @@ -125,7 +125,6 @@ final void emitResponse(RequestT req, ResponseT resp) { protected void afterClose() { } - private void fail(Throwable throwable) { if (!isClosed()) { if (throwable instanceof CancellationException) { diff --git a/base-rpc/base-rpc-traffic-governor/pom.xml b/base-rpc/base-rpc-traffic-governor/pom.xml index 7e3e47a08..78932bdb9 100644 --- a/base-rpc/base-rpc-traffic-governor/pom.xml +++ b/base-rpc/base-rpc-traffic-governor/pom.xml @@ -38,10 +38,26 @@ org.apache.bifromq base-rpc-grpc-inproc
+ org.awaitility awaitility + + org.apache.logging.log4j + log4j-api + test + + + org.apache.logging.log4j + log4j-core + test + + + org.apache.logging.log4j + log4j-slf4j2-impl + test + diff --git a/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java b/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java index 4cd67863a..b0724d0c6 100644 --- a/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java +++ b/base-rpc/base-rpc-traffic-governor/src/main/java/org/apache/bifromq/baserpc/trafficgovernor/RPCServiceTrafficManager.java @@ -14,24 +14,20 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.baserpc.trafficgovernor; -import static org.apache.bifromq.baserpc.trafficgovernor.SharedScheduler.RPC_SHARED_SCHEDULER; import static java.util.Collections.emptySet; +import static org.apache.bifromq.baserpc.trafficgovernor.SharedScheduler.RPC_SHARED_SCHEDULER; -import org.apache.bifromq.basecrdt.service.ICRDTService; -import org.apache.bifromq.basehlc.HLC; -import org.apache.bifromq.baserpc.proto.RPCServer; import com.google.common.base.Preconditions; import com.google.common.collect.Sets; import com.google.protobuf.ByteString; import io.grpc.inprocess.InProcessSocketAddress; import io.reactivex.rxjava3.core.Observable; import io.reactivex.rxjava3.disposables.CompositeDisposable; -import io.reactivex.rxjava3.disposables.Disposable; import io.reactivex.rxjava3.subjects.BehaviorSubject; import java.net.InetSocketAddress; import java.util.Map; @@ -41,6 +37,10 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; +import org.apache.bifromq.basecrdt.service.ICRDTService; +import org.apache.bifromq.basehlc.HLC; +import org.apache.bifromq.baserpc.proto.RPCServer; @Slf4j class RPCServiceTrafficManager extends RPCServiceAnnouncer @@ -131,8 +131,8 @@ private Set refreshAliveServerList(Map announ for (RPCServer server : announcedServers.values()) { if (aliveAnnouncers.contains(server.getAnnouncerId())) { aliveServers.add(build(server)); - } else { - // this is a side effect: revoke the announcement made by dead announcer + } else if (shouldClean(aliveAnnouncers, server.getAnnouncerId())) { + // revoke the announcement made by dead announcer log.debug("Remove not alive server announcement: {}", server.getId()); revoke(server.getId()); } @@ -140,6 +140,18 @@ private Set refreshAliveServerList(Map announ return aliveServers; } + private boolean shouldClean(Set aliveAnnouncers, ByteString failedAnnouncer) { + aliveAnnouncers.add(id()); + RendezvousHash hash = RendezvousHash.builder() + .keyFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodeFunnel((from, into) -> into.putBytes(from.asReadOnlyByteBuffer())) + .nodes(aliveAnnouncers) + .build(); + ByteString cleaner = hash.get(failedAnnouncer); + return cleaner.equals(id()); + } + + private ServerEndpoint build(RPCServer server) { return new ServerEndpoint(server.getAgentHostId(), server.getId(), @@ -157,41 +169,49 @@ private static class ServerRegistration implements IServerRegistration { private final RPCServiceTrafficManager manager; private final AtomicReference localServer; - private final Disposable disposable; - private final CompositeDisposable disposables; + private final CompositeDisposable myDisposibles = new CompositeDisposable(); + private final CompositeDisposable allDisposibles; - private ServerRegistration(RPCServer server, RPCServiceTrafficManager announcer, - CompositeDisposable disposables) { + private ServerRegistration(RPCServer server, + RPCServiceTrafficManager announcer, + CompositeDisposable allDisposables) { this.localServer = new AtomicReference<>(server); this.manager = announcer; - this.disposables = disposables; + this.allDisposibles = allDisposables; // make an announcement via rpcServiceCRDT log.debug("Announce local server[{}]:{}", announcer.serviceUniqueName, server); announcer.announce(localServer.get()).join(); // enforce the announcement consistent eventually - disposable = announcer.announcedServers() + myDisposibles.add(announcer.announcedServers() .doOnDispose(() -> manager.revoke(localServer.get().getId()).join()) .subscribe(serverMap -> { RPCServer localServer = this.localServer.get(); if (!serverMap.containsKey(localServer.getId())) { - RPCServer toUpdate = localServer.toBuilder().setAnnouncedTS(HLC.INST.get()).build(); - log.debug("Re-announce local server: {}", toUpdate); - // refresh announcement time - announcer.announce(toUpdate); + reannounce(); } else if (localServer.getAnnouncedTS() < serverMap.get(localServer.getId()).getAnnouncedTS()) { localServer = serverMap.get(localServer.getId()); log.debug("Update local server from announcement: server={}", localServer); } - }); - disposables.add(disposable); + })); + myDisposibles.add(announcer.crdtService.refreshSignal() + .subscribe(ts -> reannounce())); + allDisposables.add(myDisposibles); + } + + private void reannounce() { + RPCServer localServer = this.localServer.get(); + RPCServer toUpdate = localServer.toBuilder().setAnnouncedTS(HLC.INST.get()).build(); + log.debug("Re-announce local server: {}", toUpdate); + // refresh announcement time + manager.announce(toUpdate); } @Override public void stop() { - disposables.remove(disposable); - disposable.dispose(); + allDisposibles.remove(myDisposibles); + myDisposibles.dispose(); } } } diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java index d443430a8..754f46284 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/Batcher.java @@ -14,32 +14,33 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler; -import org.apache.bifromq.basescheduler.exception.BackPressureException; -import org.apache.bifromq.basescheduler.spi.ICapacityEstimator; import io.micrometer.core.instrument.Counter; import io.micrometer.core.instrument.DistributionSummary; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Metrics; import io.micrometer.core.instrument.Tags; import io.micrometer.core.instrument.Timer; -import java.util.ArrayDeque; import java.util.LinkedList; import java.util.Queue; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentLinkedDeque; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.RejectedExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basescheduler.exception.BackPressureException; +import org.apache.bifromq.basescheduler.spi.ICapacityEstimator; @Slf4j final class Batcher { @@ -72,7 +73,7 @@ final class Batcher { this.batchCallBuilder = batchCallBuilder; this.capacityEstimator = capacityEstimator; this.maxBurstLatency = maxBurstLatency; - this.batchPool = new ArrayDeque<>(); + this.batchPool = new ConcurrentLinkedDeque<>(); this.emaQueueingTime = new EMALong(System::nanoTime, 0.1, 0.9, maxBurstLatency); Tags tags = Tags.of("name", name, "key", Integer.toUnsignedString(System.identityHashCode(this))); maxPipelineDepthGauge = Gauge.builder("batcher.pipeline.max", capacityEstimator::maxPipelineDepth) @@ -96,7 +97,7 @@ public CompletableFuture submit(BatcherKeyT batcherKey, CallT reque return CompletableFuture.failedFuture( new RejectedExecutionException("Batcher has been shut down")); } - if (emaQueueingTime.get() < maxBurstLatency) { + if (Math.max(emaQueueingTime.get(), headCallWaitingNanos()) < maxBurstLatency) { ICallTask callTask = new CallTask<>(batcherKey, request); boolean offered = callTaskBuffers.offer(callTask); assert offered; @@ -104,10 +105,18 @@ public CompletableFuture submit(BatcherKeyT batcherKey, CallT reque return callTask.resultPromise(); } else { dropCounter.increment(); - return CompletableFuture.failedFuture(new BackPressureException("Too high average latency")); + return CompletableFuture.failedFuture(new BackPressureException("Batch call busy")); } } + private long headCallWaitingNanos() { + ICallTask head = callTaskBuffers.peek(); + if (head != null) { + return System.nanoTime() - head.ts(); + } + return 0; + } + public CompletableFuture close() { if (state.compareAndSet(State.RUNNING, State.SHUTTING_DOWN)) { checkShutdownCompletion(); @@ -137,6 +146,7 @@ private void cleanupMetrics() { while ((batchCall = batchPool.poll()) != null) { batchCall.destroy(); } + batchCallBuilder.close(); } private void trigger() { @@ -178,30 +188,37 @@ private void batchAndEmit() { int finalBatchSize = batchSize; CompletableFuture future = batchCall.execute(); runningBatchCalls.add(future); - future.whenComplete((v, e) -> { - runningBatchCalls.remove(future); - long execEnd = System.nanoTime(); - if (e != null) { - batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e)); - } else { - long batchCallLatency = execEnd - execBegin; - capacityEstimator.record(finalBatchSize, batchCallLatency); - batchExecTimer.record(batchCallLatency, TimeUnit.NANOSECONDS); - batchedTasks.forEach(t -> { - long callLatency = execEnd - t.ts(); - batchCallTimer.record(callLatency, TimeUnit.NANOSECONDS); - }); - } - returnBatchCall(batchCall); - pipelineDepth.getAndDecrement(); - // After each completion, check for shutdown - if (state.get() == State.SHUTTING_DOWN) { - checkShutdownCompletion(); - } - if (!callTaskBuffers.isEmpty()) { - trigger(); - } - }); + future + .orTimeout(maxBurstLatency, TimeUnit.NANOSECONDS) // Ensure we don't block indefinitely + .whenComplete((v, e) -> { + runningBatchCalls.remove(future); + long execEnd = System.nanoTime(); + if (e != null) { + if (e instanceof TimeoutException) { + batchedTasks.forEach(t -> t.resultPromise() + .completeExceptionally(new BackPressureException("Batch Call timeout", e))); + } else { + batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e)); + } + } else { + long batchCallLatency = execEnd - execBegin; + capacityEstimator.record(finalBatchSize, batchCallLatency); + batchExecTimer.record(batchCallLatency, TimeUnit.NANOSECONDS); + batchedTasks.forEach(t -> { + long callLatency = execEnd - t.ts(); + batchCallTimer.record(callLatency, TimeUnit.NANOSECONDS); + }); + } + returnBatchCall(batchCall); + pipelineDepth.getAndDecrement(); + // After each completion, check for shutdown + if (state.get() == State.SHUTTING_DOWN) { + checkShutdownCompletion(); + } + if (!callTaskBuffers.isEmpty()) { + trigger(); + } + }); } catch (Throwable e) { log.error("Batch call failed unexpectedly", e); batchedTasks.forEach(t -> t.resultPromise().completeExceptionally(e)); diff --git a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java index 0f8d898c9..f412a559e 100644 --- a/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java +++ b/base-scheduler/src/main/java/org/apache/bifromq/basescheduler/EMALong.java @@ -19,43 +19,58 @@ package org.apache.bifromq.basescheduler; -import java.util.concurrent.atomic.AtomicLong; +import com.google.common.base.Preconditions; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; class EMALong { private static final double NANOS_PER_SECOND = 1_000_000_000.0; private final Supplier nowSupplier; - private final double alpha; - private final double decay; + private final double alpha; // (0,1] + private final double decay; // (0,1] private final long decayDelayNanos; - private final AtomicLong value = new AtomicLong(0); - private final AtomicLong lastUpdateTime = new AtomicLong(0); + private final AtomicReference state; public EMALong(Supplier nowSupplier, double alpha, double decay, long decayDelayNanos) { + Preconditions.checkArgument(alpha > 0.0 && alpha <= 1.0, "alpha must be in (0,1]"); + Preconditions.checkArgument(decay > 0.0 && decay <= 1.0, "decay must be in (0,1]"); + Preconditions.checkArgument(decayDelayNanos >= 0, "decayDelayNanos must be non-negative"); this.nowSupplier = nowSupplier; this.alpha = alpha; this.decay = decay; this.decayDelayNanos = decayDelayNanos; + this.state = new AtomicReference<>(new State(0L, 0L)); } public void update(long newValue) { - value.updateAndGet(v -> { - lastUpdateTime.set(nowSupplier.get()); - if (v == 0) { - return newValue; - } else { - return (long) Math.ceil(v * (1 - alpha) + newValue * alpha); + long now = nowSupplier.get(); + while (true) { + State prev = state.get(); + long newEma = (prev.ema == 0L) ? newValue : (long) Math.ceil(prev.ema * (1 - alpha) + newValue * alpha); + State next = new State(newEma, now); + if (state.compareAndSet(prev, next)) { + return; } - }); + } } public long get() { long now = nowSupplier.get(); - long lastUpdate = lastUpdateTime.get(); - if (decayDelayNanos < Long.MAX_VALUE && lastUpdate + decayDelayNanos < now) { - return (long) (value.get() - * Math.pow(decay, Math.ceil((now - lastUpdate - decayDelayNanos) / NANOS_PER_SECOND))); + State s = state.get(); + if (s.ema == 0L || s.lastTs == 0L) { + return s.ema; + } + if (decayDelayNanos < Long.MAX_VALUE) { + long dt = now - s.lastTs; + if (dt > decayDelayNanos) { + double seconds = Math.ceil((dt - decayDelayNanos) / NANOS_PER_SECOND); + double decayed = s.ema * Math.pow(decay, seconds); + return decayed < 1.0 ? 0L : Math.round(decayed); + } } - return value.get(); + return s.ema; + } + + private record State(long ema, long lastTs) { } } diff --git a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java index 21d3b63e9..1661d5405 100644 --- a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java +++ b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/BatchCallSchedulerTest.java @@ -14,14 +14,14 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler; import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; -import org.apache.bifromq.basescheduler.exception.BackPressureException; import java.time.Duration; import java.util.ArrayList; import java.util.List; @@ -33,6 +33,7 @@ import java.util.concurrent.atomic.AtomicInteger; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.basescheduler.exception.BackPressureException; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -61,9 +62,7 @@ public void batchCall() { executor.submit(() -> { int i; while ((i = count.decrementAndGet()) >= 0) { - scheduler.schedule(i).whenComplete((v, e) -> { - latch.countDown(); - }); + scheduler.schedule(i).whenComplete((v, e) -> latch.countDown()); } }); latch.await(); @@ -93,4 +92,17 @@ public void backPressure() { assertEquals(e.getCause().getClass(), BackPressureException.class); } } + + @Test + public void batchCallTimeout() { + TestBatchCallScheduler scheduler = + new TestBatchCallScheduler(1, Duration.ofNanos(Long.MAX_VALUE), Duration.ofSeconds(1)); + try { + scheduler.schedule(1).join(); + fail(); + } catch (Throwable e) { + assertEquals(e.getCause().getClass(), BackPressureException.class); + } + scheduler.close(); + } } diff --git a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java index 49fd1726a..3ca03f620 100644 --- a/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java +++ b/base-scheduler/src/test/java/org/apache/bifromq/basescheduler/EMALongTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.basescheduler; @@ -81,10 +81,10 @@ void testDecayBeforeDelay() { void testDecayAfterDelay() { // set decay=0.5, decayDelay=1s EMALong ema = new EMALong(nowSupplier, 0.5, 0.5, 1_000_000_000L); - fakeTime.set(0L); + fakeTime.set(1L); ema.update(100L); // advance time to after delay + 2s total => one decay period - fakeTime.set(1_000_000_000L + 1_000_000_000L); + fakeTime.set(1_000_000_001L + 1_000_000_000L); // (now - lastUpdate - delay) / 1e9 = (2s - 1s)/1e9 = 1 => ceil(1) =1 // value * decay^1 = 100 * 0.5 = 50 assertEquals(ema.get(), 50); @@ -94,7 +94,7 @@ void testDecayAfterDelay() { void testMultipleDecayPeriods() { // decay=0.5, delay=1s EMALong ema = new EMALong(nowSupplier, 0.5, 0.5, 1_000_000_000L); - fakeTime.set(0L); + fakeTime.set(1L); ema.update(80L); // advance time to after delay + 3.2s => ceil(3.2)=4 periods fakeTime.set(1_000_000_000L + 3_200_000_000L); diff --git a/base-util/pom.xml b/base-util/pom.xml index ca88f6c85..701acd89b 100644 --- a/base-util/pom.xml +++ b/base-util/pom.xml @@ -31,6 +31,10 @@ base-util + + com.google.guava + guava + io.micrometer micrometer-core diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java b/base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java similarity index 94% rename from bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java rename to base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java index b5ec12199..583e33483 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/RendezvousHash.java +++ b/base-util/src/main/java/org/apache/bifromq/base/util/RendezvousHash.java @@ -14,10 +14,10 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ -package org.apache.bifromq.dist.worker; +package org.apache.bifromq.base.util; import static com.google.common.hash.Hashing.murmur3_128; @@ -31,7 +31,7 @@ * @param The type of the node. */ @Builder -class RendezvousHash { +public class RendezvousHash { private final Funnel keyFunnel; private final Funnel nodeFunnel; private final Iterable nodes; diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java b/base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java similarity index 98% rename from bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java rename to base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java index a08733379..55b954c35 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/RendezvousHashTest.java +++ b/base-util/src/test/java/org/apache/bifromq/base/util/RendezvousHashTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.bifromq.dist.worker; +package org.apache.bifromq.base.util; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; diff --git a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java index edb96f62b..b65f248fb 100644 --- a/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java +++ b/bifromq-dist/bifromq-dist-server/src/test/java/org/apache/bifromq/dist/server/DistServiceTest.java @@ -135,6 +135,7 @@ public void setup() { .storeOptions(kvRangeStoreOptions) .subBrokerManager(subBrokerMgr) .settingProvider(settingProvider) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); distServer = IDistServer.builder() .rpcServerBuilder(rpcServerBuilder) @@ -145,7 +146,7 @@ public void setup() { rpcServer = rpcServerBuilder.build(); rpcServer.start(); - await().until(() -> BoundaryUtil.isValidSplitSet(workerClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(workerClient.latestEffectiveRouter().keySet())); distClient.connState().filter(s -> s == IRPCClient.ConnState.READY).blockingFirst(); log.info("Setup finished, and start testing"); } diff --git a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java index e0f87f398..70fcdb47a 100644 --- a/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java +++ b/bifromq-dist/bifromq-dist-worker/src/main/java/org/apache/bifromq/dist/worker/DeliverExecutorGroup.java @@ -39,6 +39,7 @@ import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import lombok.extern.slf4j.Slf4j; +import org.apache.bifromq.base.util.RendezvousHash; import org.apache.bifromq.deliverer.IMessageDeliverer; import org.apache.bifromq.deliverer.TopicMessagePackHolder; import org.apache.bifromq.dist.worker.schema.GroupMatching; diff --git a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java index 106d29007..975f0e1d1 100644 --- a/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java +++ b/bifromq-dist/bifromq-dist-worker/src/test/java/org/apache/bifromq/dist/worker/DistWorkerTest.java @@ -235,10 +235,11 @@ public void setup() { .subBrokerManager(receiverManager) .settingProvider(settingProvider) .inlineFanoutThreshold(1) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); rpcServer = rpcServerBuilder.build(); rpcServer.start(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); log.info("Setup finished, and start testing"); } diff --git a/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto b/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto index 66fa081d3..83ce4f89d 100644 --- a/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto +++ b/bifromq-inbox/bifromq-inbox-coproc-proto/src/main/proto/inboxservice/InboxStoreCoProc.proto @@ -196,9 +196,13 @@ message BatchFetchReply{ repeated Fetched result = 1; } +message MatchedRoute{ + string topicFilter = 1; // the matched topic filter + uint64 incarnation = 2; // route incarnation +} // insert won't change version & lastActive timestamp message SubMessagePack{ - map matchedTopicFilters = 1; // key: topicFilter, value: route incarnation + repeated MatchedRoute matchedRoute = 1; commontype.TopicMessagePack messages = 2; } @@ -219,9 +223,8 @@ message InsertResult{ NO_INBOX = 1; } message SubStatus{ - string topicFilter = 1; - uint64 incarnation = 2; - bool rejected = 3; + MatchedRoute matchedRoute = 1; + bool rejected = 2; // true if the route is outdated } Code code = 1; repeated SubStatus result = 2; diff --git a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java index 941a3865c..77c0d4280 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java +++ b/bifromq-inbox/bifromq-inbox-server/src/main/java/org/apache/bifromq/inbox/server/InboxWriter.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -22,6 +22,15 @@ import static org.apache.bifromq.base.util.CompletableFutureUtil.unwrap; import static org.apache.bifromq.plugin.subbroker.TypeUtil.toResult; +import java.time.Duration; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.base.util.AsyncRetry; import org.apache.bifromq.base.util.exception.RetryTimeoutException; import org.apache.bifromq.basekv.client.exception.BadVersionException; @@ -34,6 +43,7 @@ import org.apache.bifromq.inbox.server.scheduler.IInboxInsertScheduler; import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; import org.apache.bifromq.inbox.storage.proto.SubMessagePack; import org.apache.bifromq.plugin.subbroker.DeliveryPack; import org.apache.bifromq.plugin.subbroker.DeliveryReply; @@ -41,14 +51,6 @@ import org.apache.bifromq.sysprops.props.DataPlaneMaxBurstLatencyMillis; import org.apache.bifromq.type.MatchInfo; import org.apache.bifromq.type.TopicMessagePack; -import org.apache.bifromq.util.TopicUtil; -import java.time.Duration; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.concurrent.CompletableFuture; -import lombok.extern.slf4j.Slf4j; @Slf4j class InboxWriter implements InboxWriterPipeline.ISendRequestHandler { @@ -62,7 +64,7 @@ class InboxWriter implements InboxWriterPipeline.ISendRequestHandler { @Override public CompletableFuture handle(SendRequest request) { - Map> matchInfosByInbox = new HashMap<>(); + Map> matchInfosByInbox = new HashMap<>(); Map> subMsgPacksByInbox = new HashMap<>(); // break DeliveryPack into SubMessagePack by each TenantInboxInstance for (String tenantId : request.getRequest().getPackageMap().keySet()) { @@ -71,11 +73,15 @@ public CompletableFuture handle(SendRequest request) { Map subMsgPackByInbox = new HashMap<>(); for (MatchInfo matchInfo : pack.getMatchInfoList()) { TenantInboxInstance tenantInboxInstance = TenantInboxInstance.from(tenantId, matchInfo); - matchInfosByInbox.computeIfAbsent(tenantInboxInstance, k -> new LinkedList<>()).add(matchInfo); + MatchedRoute matchedRoute = MatchedRoute.newBuilder() + .setTopicFilter(matchInfo.getMatcher().getMqttTopicFilter()) + .setIncarnation(matchInfo.getIncarnation()) + .build(); + matchInfosByInbox.computeIfAbsent(tenantInboxInstance, k -> new HashMap<>()) + .put(matchedRoute, matchInfo); subMsgPackByInbox.computeIfAbsent(tenantInboxInstance, k -> SubMessagePack.newBuilder().setMessages(topicMessagePack)) - .putMatchedTopicFilters(matchInfo.getMatcher().getMqttTopicFilter(), - matchInfo.getIncarnation()); + .addMatchedRoute(matchedRoute); } for (TenantInboxInstance tenantInboxInstance : subMsgPackByInbox.keySet()) { subMsgPacksByInbox.computeIfAbsent(tenantInboxInstance, k -> new LinkedList<>()) @@ -127,20 +133,21 @@ public CompletableFuture handle(SendRequest request) { Map> tenantMatchResultMap = new HashMap<>(); int i = 0; for (TenantInboxInstance tenantInboxInstance : subMsgPacksByInbox.keySet()) { - String receiverId = tenantInboxInstance.receiverId(); + Map matchedRoutesMap = matchInfosByInbox.get(tenantInboxInstance); InsertResult result = replyFutures.get(i++).join(); Map matchResultMap = tenantMatchResultMap.computeIfAbsent(tenantInboxInstance.tenantId(), k -> new HashMap<>()); switch (result.getCode()) { - case OK -> result.getResultList().forEach(insertionResult -> { - DeliveryResult.Code code = - insertionResult.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK; - matchResultMap.putIfAbsent(MatchInfo.newBuilder().setReceiverId(receiverId) - .setMatcher(TopicUtil.from(insertionResult.getTopicFilter())) - .setIncarnation(insertionResult.getIncarnation()).build(), code); - }); + case OK -> { + Function resultFinder = + getFinalResultFinder(result.getResultList()); + for (MatchedRoute matchedRoute : matchedRoutesMap.keySet()) { + matchResultMap.putIfAbsent(matchedRoutesMap.get(matchedRoute), + resultFinder.apply(matchedRoute)); + } + } case NO_INBOX -> { - for (MatchInfo matchInfo : matchInfosByInbox.get(tenantInboxInstance)) { + for (MatchInfo matchInfo : matchedRoutesMap.values()) { matchResultMap.putIfAbsent(matchInfo, DeliveryResult.Code.NO_RECEIVER); } } @@ -155,4 +162,44 @@ public CompletableFuture handle(SendRequest request) { .build()).build(); })); } + + private Function getFinalResultFinder(List subStatuses) { + Function resultFinder = getResultFinder(subStatuses); + return matchedRoute -> { + DeliveryResult.Code code = resultFinder.apply(matchedRoute); + if (code == null) { + // incompleted result from coproc + log.warn("MatchedRoute {} is missing in result", matchedRoute); + return DeliveryResult.Code.NO_SUB; + } + return code; + }; + } + + private Function getResultFinder( + List subStatuses) { + if (subStatuses.size() == 1) { + InsertResult.SubStatus onlyStatus = subStatuses.get(0); + return matchedRoute -> { + if (matchedRoute.equals(onlyStatus.getMatchedRoute())) { + return onlyStatus.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK; + } + return null; + }; + } else if (subStatuses.size() < 10) { + return matchedRoute -> { + for (InsertResult.SubStatus status : subStatuses) { + if (status.getMatchedRoute().equals(matchedRoute)) { + return status.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK; + } + } + return null; + }; + } else { + Map resultMap = subStatuses.stream() + .collect(Collectors.toMap(InsertResult.SubStatus::getMatchedRoute, + e -> e.getRejected() ? DeliveryResult.Code.NO_SUB : DeliveryResult.Code.OK)); + return resultMap::get; + } + } } diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java index 7306ea6e3..bf560d9d8 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java +++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxServiceTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -26,15 +26,6 @@ import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.when; -import org.apache.bifromq.baserpc.client.IRPCClient; -import org.apache.bifromq.baserpc.server.IRPCServer; -import org.apache.bifromq.baserpc.server.RPCServerBuilder; -import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; -import org.apache.bifromq.plugin.eventcollector.IEventCollector; -import org.apache.bifromq.plugin.settingprovider.ISettingProvider; -import org.apache.bifromq.plugin.settingprovider.Setting; -import org.apache.bifromq.retain.client.IRetainClient; -import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; import java.lang.reflect.Method; import java.time.Duration; import java.util.concurrent.CompletableFuture; @@ -51,11 +42,20 @@ import org.apache.bifromq.basekv.metaservice.IBaseKVMetaService; import org.apache.bifromq.basekv.store.option.KVRangeStoreOptions; import org.apache.bifromq.basekv.utils.BoundaryUtil; +import org.apache.bifromq.baserpc.client.IRPCClient; +import org.apache.bifromq.baserpc.server.IRPCServer; +import org.apache.bifromq.baserpc.server.RPCServerBuilder; +import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; import org.apache.bifromq.dist.client.IDistClient; import org.apache.bifromq.dist.client.MatchResult; import org.apache.bifromq.dist.client.UnmatchResult; import org.apache.bifromq.inbox.client.IInboxClient; import org.apache.bifromq.inbox.store.IInboxStore; +import org.apache.bifromq.plugin.eventcollector.IEventCollector; +import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; +import org.apache.bifromq.plugin.settingprovider.ISettingProvider; +import org.apache.bifromq.plugin.settingprovider.Setting; +import org.apache.bifromq.retain.client.IRetainClient; import org.apache.bifromq.sessiondict.client.ISessionDictClient; import org.mockito.Mock; import org.mockito.MockitoAnnotations; @@ -144,6 +144,7 @@ public void setup() { .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) .detachTimeout(Duration.ofSeconds(2)) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); inboxServer = IInboxServer.builder() .rpcServerBuilder(rpcServerBuilder) @@ -153,7 +154,7 @@ public void setup() { .build(); rpcServer = rpcServerBuilder.build(); rpcServer.start(); - await().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreClient.latestEffectiveRouter().keySet())); inboxClient.connState().filter(s -> s == IRPCClient.ConnState.READY).blockingFirst(); log.info("Setup finished, and start testing"); } diff --git a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java index 1d811d3e0..67a51d029 100644 --- a/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java +++ b/bifromq-inbox/bifromq-inbox-server/src/test/java/org/apache/bifromq/inbox/server/InboxWriterTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.server; @@ -25,16 +25,17 @@ import static org.mockito.Mockito.when; import static org.testng.Assert.assertEquals; +import java.util.concurrent.CompletableFuture; +import lombok.SneakyThrows; import org.apache.bifromq.inbox.rpc.proto.SendReply; import org.apache.bifromq.inbox.rpc.proto.SendRequest; import org.apache.bifromq.inbox.server.scheduler.IInboxInsertScheduler; import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; import org.apache.bifromq.plugin.subbroker.DeliveryReply; import org.apache.bifromq.plugin.subbroker.DeliveryResult; import org.apache.bifromq.plugin.subbroker.DeliveryResults; -import java.util.concurrent.CompletableFuture; -import lombok.SneakyThrows; import org.mockito.Mock; import org.mockito.MockitoAnnotations; import org.testng.annotations.AfterMethod; @@ -86,8 +87,10 @@ public void insertScheduleRejected() { InsertResult.newBuilder() .addResult(InsertResult.SubStatus.newBuilder() .setRejected(true) - .setIncarnation(1L) - .setTopicFilter("/foo/+") + .setMatchedRoute(MatchedRoute.newBuilder() + .setIncarnation(1L) + .setTopicFilter("/foo/+") + .build()) .build()) .setCode(InsertResult.Code.OK) .build())); @@ -103,8 +106,10 @@ public void insertScheduleOk() { .setCode(InsertResult.Code.OK) .addResult(InsertResult.SubStatus.newBuilder() .setRejected(false) - .setTopicFilter("/foo/+") - .setIncarnation(1L) + .setMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter("/foo/+") + .setIncarnation(1L) + .build()) .build()) .build())); SendRequest request = sendRequest(); diff --git a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java index 3a019d80b..383e06acb 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java +++ b/bifromq-inbox/bifromq-inbox-store/src/main/java/org/apache/bifromq/inbox/store/InboxStoreCoProc.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.store; @@ -116,6 +116,7 @@ import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; import org.apache.bifromq.inbox.storage.proto.LWT; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; import org.apache.bifromq.inbox.storage.proto.SubMessagePack; import org.apache.bifromq.inbox.store.delay.DelayTaskRunner; import org.apache.bifromq.inbox.store.delay.ExpireInboxTask; @@ -966,39 +967,48 @@ private Runnable batchInsert(BatchInsertRequest request, List bufferMsgList = new ArrayList<>(); Set insertResults = new HashSet<>(); for (SubMessagePack messagePack : params.getMessagePackList()) { - Map matchedTopicFilters = messagePack.getMatchedTopicFiltersMap(); Map qos0TopicFilters = new HashMap<>(); Map qos1TopicFilters = new HashMap<>(); Map qos2TopicFilters = new HashMap<>(); TopicMessagePack topicMsgPack = messagePack.getMessages(); - for (String matchedTopicFilter : matchedTopicFilters.keySet()) { - long matchedIncarnation = matchedTopicFilters.get(matchedTopicFilter); - TopicFilterOption tfOption = metadata.getTopicFiltersMap().get(matchedTopicFilter); + for (MatchedRoute matchedRoute : messagePack.getMatchedRouteList()) { + long matchedIncarnation = matchedRoute.getIncarnation(); + TopicFilterOption tfOption = metadata.getTopicFiltersMap().get(matchedRoute.getTopicFilter()); if (tfOption == null) { - insertResults.add( - InsertResult.SubStatus.newBuilder().setTopicFilter(matchedTopicFilter) - .setIncarnation(matchedIncarnation).setRejected(true).build()); + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(true) + .build()); } else { if (tfOption.getIncarnation() > matchedIncarnation) { // messages from old sub incarnation log.debug("Receive message from previous subscription: topicFilter={}, inc={}, prevInc={}", - matchedTopicFilter, tfOption.getIncarnation(), matchedIncarnation); + matchedRoute, tfOption.getIncarnation(), matchedIncarnation); + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(true) + .build()); + } else { + // messages from current incarnation + insertResults.add(InsertResult.SubStatus.newBuilder() + .setMatchedRoute(matchedRoute) + .setRejected(false) + .build()); } switch (tfOption.getQos()) { - case AT_MOST_ONCE -> qos0TopicFilters.put(matchedTopicFilter, tfOption); - case AT_LEAST_ONCE -> qos1TopicFilters.put(matchedTopicFilter, tfOption); - case EXACTLY_ONCE -> qos2TopicFilters.put(matchedTopicFilter, tfOption); + case AT_MOST_ONCE -> qos0TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); + case AT_LEAST_ONCE -> qos1TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); + case EXACTLY_ONCE -> qos2TopicFilters.put(matchedRoute.getTopicFilter(), tfOption); default -> { // never happens } } - insertResults.add(InsertResult.SubStatus.newBuilder() - .setTopicFilter(matchedTopicFilter) - .setIncarnation(matchedIncarnation) - .setRejected(false) - .build()); } } + if (qos0TopicFilters.isEmpty() && qos1TopicFilters.isEmpty() && qos2TopicFilters.isEmpty()) { + // no matched topic filter, skip this message pack + continue; + } String topic = topicMsgPack.getTopic(); for (TopicMessagePack.PublisherPack publisherPack : topicMsgPack.getMessageList()) { for (Message message : publisherPack.getMessageList()) { diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java index 2d08a10a0..e70fe507c 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxInsertTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.store; @@ -39,6 +39,7 @@ import org.apache.bifromq.inbox.storage.proto.InboxVersion; import org.apache.bifromq.inbox.storage.proto.InsertRequest; import org.apache.bifromq.inbox.storage.proto.InsertResult; +import org.apache.bifromq.inbox.storage.proto.MatchedRoute; import org.apache.bifromq.inbox.storage.proto.SubMessagePack; import org.apache.bifromq.plugin.eventcollector.inboxservice.Overflowed; import org.apache.bifromq.type.ClientInfo; @@ -61,7 +62,10 @@ public void insertNoInbox() { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 1L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(1L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(message(AT_MOST_ONCE, "hello")) @@ -86,6 +90,147 @@ public void commitNoInbox() { assertEquals(commitCode, BatchCommitReply.Code.NO_INBOX); } + + @Test(groups = "integration") + public void insertWithUnmatchedTopicFilterRejected() { + long now = 0; + String tenantId = "tenantId-" + System.nanoTime(); + String inboxId = "inboxId-" + System.nanoTime(); + long incarnation = System.nanoTime(); + // do not create any subscription, so all matched topic filters will be unmatched + ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build(); + BatchAttachRequest.Params attachParams = BatchAttachRequest.Params.newBuilder() + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setExpirySeconds(2) + .setLimit(10) + .setClient(client) + .setNow(now) + .build(); + requestAttach(attachParams).get(0); + + String unmatchedTF = "/not/subscribed"; + TopicMessagePack.PublisherPack msg = message(QoS.AT_MOST_ONCE, "hello-unmatched"); + + InsertResult insertResult = requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(unmatchedTF) + .setIncarnation(1L) + .build()) + .setMessages(TopicMessagePack.newBuilder() + .setTopic(unmatchedTF) + .addMessage(msg) + .build()) + .build()) + .build()).get(0); + + // insert is ignored because no subscription matches the topic filter + assertEquals(insertResult.getCode(), InsertResult.Code.OK); + assertEquals(insertResult.getResultCount(), 1); + assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), unmatchedTF); + assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L); + assertTrue(insertResult.getResult(0).getRejected()); + + // no messages should be fetched + Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setMaxFetch(10) + .build()).get(0); + assertEquals(fetched.getQos0MsgCount(), 0); + assertEquals(fetched.getSendBufferMsgCount(), 0); + } + + @Test(groups = "integration") + public void insertWithOldAndCurrentIncarnationMixed() { + long now = 0; + String tenantId = "tenantId-" + System.nanoTime(); + String inboxId = "inboxId-" + System.nanoTime(); + long incarnation = System.nanoTime(); + String topicFilter = "/a/b/c"; + + ClientInfo client = ClientInfo.newBuilder().setTenantId(tenantId).build(); + InboxVersion inboxVersion = requestAttach(BatchAttachRequest.Params.newBuilder() + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setExpirySeconds(2) + .setLimit(10) + .setClient(client) + .setNow(now) + .build()).get(0); + + requestSub(BatchSubRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setVersion(inboxVersion) + .setTopicFilter(topicFilter) + .setOption(TopicFilterOption.newBuilder() + .setIncarnation(1L) + .setQos(QoS.AT_MOST_ONCE) + .build()) + .setMaxTopicFilters(100) + .setNow(now) + .build()); + + TopicMessagePack.PublisherPack msg1 = message(QoS.AT_MOST_ONCE, "keep-me-1"); + TopicMessagePack.PublisherPack msg2 = message(QoS.AT_MOST_ONCE, "keep-me-2"); + + // same topicFilter, same package with 2 matched: one old (0), one current (1) + InsertResult insertResult = requestInsert(InsertRequest.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .addMessagePack(SubMessagePack.newBuilder() + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) // old -> rejected=true + .build()) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(1L) // matched -> rejected=false + .build()) + .setMessages(TopicMessagePack.newBuilder() + .setTopic(topicFilter) + .addMessage(msg1) + .addMessage(msg2) + .build()) + .build()) + .build()).get(0); + + assertEquals(insertResult.getCode(), InsertResult.Code.OK); + boolean oldRejected = false; + boolean currAccepted = false; + for (InsertResult.SubStatus s : insertResult.getResultList()) { + if (s.getMatchedRoute().getTopicFilter().equals(topicFilter) + && s.getMatchedRoute().getIncarnation() == 0L) { + assertTrue(s.getRejected()); + oldRejected = true; + } + if (s.getMatchedRoute().getTopicFilter().equals(topicFilter) + && s.getMatchedRoute().getIncarnation() == 1L) { + assertFalse(s.getRejected()); + currAccepted = true; + } + } + assertTrue(oldRejected && currAccepted); + + Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() + .setTenantId(tenantId) + .setInboxId(inboxId) + .setIncarnation(incarnation) + .setMaxFetch(10) + .build()).get(0); + + assertEquals(fetched.getQos0MsgCount(), 2); + assertEquals(fetched.getQos0Msg(0).getMsg().getMessage(), msg1.getMessage(0)); + assertEquals(fetched.getQos0Msg(1).getMsg().getMessage(), msg2.getMessage(0)); + } + protected void fetchWithoutStartAfter(QoS qos) { long now = 0; String tenantId = "tenantId-" + System.nanoTime(); @@ -120,7 +265,10 @@ protected void fetchWithoutStartAfter(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 1L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(1L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -129,8 +277,8 @@ protected void fetchWithoutStartAfter(QoS qos) { .build()) .build()).get(0); assertEquals(insertResult.getCode(), InsertResult.Code.OK); - assertEquals(insertResult.getResult(0).getTopicFilter(), topicFilter); - assertEquals(insertResult.getResult(0).getIncarnation(), 1L); + assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), topicFilter); + assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L); Fetched fetched = requestFetch( BatchFetchRequest.Params.newBuilder() @@ -190,7 +338,10 @@ protected void fetchWithMaxLimit(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 1L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(1L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -199,8 +350,8 @@ protected void fetchWithMaxLimit(QoS qos) { .build()) .build()).get(0); assertEquals(insertResult.getCode(), InsertResult.Code.OK); - assertEquals(insertResult.getResult(0).getTopicFilter(), topicFilter); - assertEquals(insertResult.getResult(0).getIncarnation(), 1L); + assertEquals(insertResult.getResult(0).getMatchedRoute().getTopicFilter(), topicFilter); + assertEquals(insertResult.getResult(0).getMatchedRoute().getIncarnation(), 1L); Fetched fetched = requestFetch(BatchFetchRequest.Params.newBuilder() .setTenantId(tenantId) @@ -270,7 +421,10 @@ protected void fetchWithStartAfter(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -284,7 +438,10 @@ protected void fetchWithStartAfter(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg4) @@ -386,7 +543,10 @@ protected void commit(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -504,7 +664,10 @@ protected void commitAll(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -518,7 +681,10 @@ protected void commitAll(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg4) @@ -587,7 +753,10 @@ protected void insertDropOldest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) @@ -599,7 +768,10 @@ protected void insertDropOldest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -611,7 +783,10 @@ protected void insertDropOldest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg2) @@ -646,7 +821,10 @@ protected void insertDropOldest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) @@ -715,7 +893,10 @@ protected void insertDropYoungest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) @@ -727,7 +908,10 @@ protected void insertDropYoungest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg1) @@ -755,7 +939,10 @@ protected void insertDropYoungest(QoS qos) { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) @@ -824,7 +1011,10 @@ public void insertQoS012() { .setInboxId(inboxId) .setIncarnation(incarnation) .addMessagePack(SubMessagePack.newBuilder() - .putMatchedTopicFilters(topicFilter, 0L) + .addMatchedRoute(MatchedRoute.newBuilder() + .setTopicFilter(topicFilter) + .setIncarnation(0L) + .build()) .setMessages(TopicMessagePack.newBuilder() .setTopic(topicFilter) .addMessage(msg0) diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java index 73f23877b..fdf2d27e4 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/InboxStoreTest.java @@ -33,15 +33,6 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; -import org.apache.bifromq.baserpc.client.IConnectable; -import org.apache.bifromq.baserpc.server.IRPCServer; -import org.apache.bifromq.baserpc.server.RPCServerBuilder; -import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; -import org.apache.bifromq.plugin.eventcollector.IEventCollector; -import org.apache.bifromq.plugin.settingprovider.ISettingProvider; -import org.apache.bifromq.plugin.settingprovider.Setting; -import org.apache.bifromq.retain.client.IRetainClient; -import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; import com.google.protobuf.ByteString; import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Meter; @@ -87,6 +78,10 @@ import org.apache.bifromq.basekv.store.proto.RWCoProcInput; import org.apache.bifromq.basekv.store.proto.ReplyCode; import org.apache.bifromq.basekv.utils.BoundaryUtil; +import org.apache.bifromq.baserpc.client.IConnectable; +import org.apache.bifromq.baserpc.server.IRPCServer; +import org.apache.bifromq.baserpc.server.RPCServerBuilder; +import org.apache.bifromq.baserpc.trafficgovernor.IRPCServiceTrafficService; import org.apache.bifromq.dist.client.IDistClient; import org.apache.bifromq.inbox.client.IInboxClient; import org.apache.bifromq.inbox.storage.proto.BatchAttachRequest; @@ -117,6 +112,11 @@ import org.apache.bifromq.inbox.storage.proto.InsertResult; import org.apache.bifromq.inbox.storage.proto.Replica; import org.apache.bifromq.metrics.TenantMetric; +import org.apache.bifromq.plugin.eventcollector.IEventCollector; +import org.apache.bifromq.plugin.resourcethrottler.IResourceThrottler; +import org.apache.bifromq.plugin.settingprovider.ISettingProvider; +import org.apache.bifromq.plugin.settingprovider.Setting; +import org.apache.bifromq.retain.client.IRetainClient; import org.apache.bifromq.sessiondict.client.ISessionDictClient; import org.apache.bifromq.type.ClientInfo; import org.apache.bifromq.type.Message; @@ -208,7 +208,7 @@ public void setup() throws IOException { rpcServer.start(); storeClient.connState().filter(connState -> connState == IConnectable.ConnState.READY).blockingFirst(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); log.info("Setup finished, and start testing"); } @@ -232,6 +232,7 @@ private void buildStoreServer() { .bgTaskExecutor(bgTaskExecutor) .detachTimeout(Duration.ofSeconds(1)) .gcInterval(Duration.ofSeconds(1)) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); rpcServer = rpcServerBuilder.build(); } diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java index 12b10a968..b4911cb85 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadExistingTest.java @@ -59,7 +59,7 @@ public void gcJobAfterRestart() { InboxVersion inboxVersion = requestAttach(attachParams).get(0); restartStoreServer(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); when(sessionDictClient.exist(any())).thenReturn(CompletableFuture.completedFuture(OnlineCheckResult.NOT_EXISTS)); ArgumentCaptor deleteCaptor = ArgumentCaptor.forClass(DeleteRequest.class); verify(inboxClient, timeout(10000)).delete(deleteCaptor.capture()); diff --git a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java index 262bb628c..1609614d6 100644 --- a/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java +++ b/bifromq-inbox/bifromq-inbox-store/src/test/java/org/apache/bifromq/inbox/store/LoadSubStatsTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.inbox.store; @@ -73,7 +73,7 @@ public void collectAfterRestart() { restartStoreServer(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); Gauge newSubCountGauge = getSubCountGauge(tenantId); Gauge newPSessionGauge = getPSessionGauge(tenantId); Gauge newPSessionSpaceGauge = getPSessionSpaceGauge(tenantId); diff --git a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java index 855fe427b..a1e593dc4 100644 --- a/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java +++ b/bifromq-mqtt/bifromq-mqtt-server/src/test/java/org/apache/bifromq/mqtt/integration/MQTTTest.java @@ -184,6 +184,7 @@ public final void setupClass() { .resourceThrottler(resourceThrottler) .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) + .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() .setDataEngineConfigurator(new InMemKVEngineConfigurator()) .setWalEngineConfigurator(new InMemKVEngineConfigurator())) @@ -214,6 +215,7 @@ public final void setupClass() { .retainStoreClient(retainStoreKVStoreClient) .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) + .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() .setDataEngineConfigurator(new InMemKVEngineConfigurator()) .setWalEngineConfigurator(new InMemKVEngineConfigurator())) @@ -243,6 +245,7 @@ public final void setupClass() { .distWorkerClient(distWorkerStoreClient) .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) + .bootstrapDelay(Duration.ofSeconds(1)) .storeOptions(new KVRangeStoreOptions() .setDataEngineConfigurator(new InMemKVEngineConfigurator()) .setWalEngineConfigurator(new InMemKVEngineConfigurator())) @@ -299,9 +302,9 @@ public final void setupClass() { .filter(state -> state == IRPCClient.ConnState.READY) .firstElement() .blockingSubscribe(); - await().until(() -> BoundaryUtil.isValidSplitSet(distWorkerStoreClient.latestEffectiveRouter().keySet())); - await().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreKVStoreClient.latestEffectiveRouter().keySet())); - await().until(() -> BoundaryUtil.isValidSplitSet(retainStoreKVStoreClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(distWorkerStoreClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(inboxStoreKVStoreClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(retainStoreKVStoreClient.latestEffectiveRouter().keySet())); lenient().when(settingProvider.provide(any(), anyString())) .thenAnswer(invocation -> { Setting setting = invocation.getArgument(0); diff --git a/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java b/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java index a59619185..a6af28b2b 100644 --- a/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java +++ b/bifromq-plugin/bifromq-plugin-setting-provider/src/main/java/org/apache/bifromq/plugin/settingprovider/Setting.java @@ -28,10 +28,14 @@ */ @Slf4j public enum Setting { - MQTT3Enabled(Boolean.class, val -> true, true), MQTT4Enabled(Boolean.class, val -> true, true), - MQTT5Enabled(Boolean.class, val -> true, true), DebugModeEnabled(Boolean.class, val -> true, false), - ForceTransient(Boolean.class, val -> true, false), ByPassPermCheckError(Boolean.class, val -> true, true), - PayloadFormatValidationEnabled(Boolean.class, val -> true, true), RetainEnabled(Boolean.class, val -> true, true), + MQTT3Enabled(Boolean.class, val -> true, true), + MQTT4Enabled(Boolean.class, val -> true, true), + MQTT5Enabled(Boolean.class, val -> true, true), + DebugModeEnabled(Boolean.class, val -> true, false), + ForceTransient(Boolean.class, val -> true, false), + ByPassPermCheckError(Boolean.class, val -> true, true), + PayloadFormatValidationEnabled(Boolean.class, val -> true, true), + RetainEnabled(Boolean.class, val -> true, true), WildcardSubscriptionEnabled(Boolean.class, val -> true, true), SubscriptionIdentifierEnabled(Boolean.class, val -> true, true), SharedSubscriptionEnabled(Boolean.class, val -> true, true), diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java index f751fbaf5..abdbe24ca 100644 --- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java +++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/LoadMetadataTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.retain.store; @@ -22,9 +22,9 @@ import static org.awaitility.Awaitility.await; import static org.testng.Assert.assertNotSame; -import org.apache.bifromq.basekv.utils.BoundaryUtil; import io.micrometer.core.instrument.Gauge; import java.time.Duration; +import org.apache.bifromq.basekv.utils.BoundaryUtil; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; @@ -47,7 +47,7 @@ public void testLoadMetadata() { Gauge retainCountGauge = getRetainCountGauge(tenantId); restartStoreServer(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); Gauge newSpaceUsageGauge = getSpaceUsageGauge(tenantId); Gauge newRetainCountGauge = getRetainCountGauge(tenantId); diff --git a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java index 926e78a08..4f792c2dd 100644 --- a/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java +++ b/bifromq-retain/bifromq-retain-store/src/test/java/org/apache/bifromq/retain/store/RetainStoreTest.java @@ -14,7 +14,7 @@ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations - * under the License. + * under the License. */ package org.apache.bifromq.retain.store; @@ -28,6 +28,25 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertTrue; +import com.google.protobuf.ByteString; +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.Meter; +import io.micrometer.core.instrument.Metrics; +import io.micrometer.core.instrument.simple.SimpleMeterRegistry; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.Duration; +import java.util.Comparator; +import java.util.Objects; +import java.util.UUID; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadLocalRandom; +import java.util.concurrent.atomic.AtomicReference; +import lombok.extern.slf4j.Slf4j; import org.apache.bifromq.basecluster.AgentHostOptions; import org.apache.bifromq.basecluster.IAgentHost; import org.apache.bifromq.basecrdt.service.CRDTServiceOptions; @@ -69,25 +88,6 @@ import org.apache.bifromq.type.ClientInfo; import org.apache.bifromq.type.Message; import org.apache.bifromq.type.TopicMessage; -import com.google.protobuf.ByteString; -import io.micrometer.core.instrument.Gauge; -import io.micrometer.core.instrument.Meter; -import io.micrometer.core.instrument.Metrics; -import io.micrometer.core.instrument.simple.SimpleMeterRegistry; -import java.io.File; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.time.Duration; -import java.util.Comparator; -import java.util.Objects; -import java.util.UUID; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ScheduledThreadPoolExecutor; -import java.util.concurrent.ThreadLocalRandom; -import java.util.concurrent.atomic.AtomicReference; -import lombok.extern.slf4j.Slf4j; import org.mockito.MockitoAnnotations; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; @@ -143,7 +143,7 @@ public void setup() throws IOException { .metaService(metaService).build(); buildStoreServer(); rpcServer.start(); - await().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); + await().forever().until(() -> BoundaryUtil.isValidSplitSet(storeClient.latestEffectiveRouter().keySet())); log.info("Setup finished, and start testing"); } @@ -158,6 +158,7 @@ private void buildStoreServer() { .tickerThreads(tickerThreads) .bgTaskExecutor(bgTaskExecutor) .gcInterval(Duration.ofSeconds(60)) + .bootstrapDelay(Duration.ofSeconds(1)) .build(); rpcServer = rpcServerBuilder.build(); } diff --git a/build/build-bifromq-starter/conf/log4j2.xml b/build/build-bifromq-starter/conf/log4j2.xml index a646e40ab..c3fd5569e 100644 --- a/build/build-bifromq-starter/conf/log4j2.xml +++ b/build/build-bifromq-starter/conf/log4j2.xml @@ -41,7 +41,7 @@ %d{yyyy-MM-dd HH:mm:ss.SSS} %5p [%t] --- [%F:%L] %m - [id=%X{id},term=%X{term},state=%X{state},leader=%X{leader},f=%X{first},l=%X{last}],c=%X{commit},cfg=%X{config}]%n ]]>