|
17 | 17 |
|
18 | 18 | package org.apache.flink.test.streaming.runtime; |
19 | 19 |
|
| 20 | +import org.apache.flink.api.common.JobID; |
20 | 21 | import org.apache.flink.api.common.RuntimeExecutionMode; |
21 | 22 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; |
| 23 | +import org.apache.flink.api.common.functions.MapFunction; |
| 24 | +import org.apache.flink.api.common.state.CheckpointListener; |
22 | 25 | import org.apache.flink.api.common.typeinfo.IntegerTypeInfo; |
23 | 26 | import org.apache.flink.api.connector.sink2.Committer; |
24 | 27 | import org.apache.flink.api.connector.source.Source; |
25 | 28 | import org.apache.flink.api.connector.source.util.ratelimit.GatedRateLimiter; |
26 | 29 | import org.apache.flink.api.connector.source.util.ratelimit.RateLimiter; |
27 | 30 | import org.apache.flink.api.connector.source.util.ratelimit.RateLimiterStrategy; |
| 31 | +import org.apache.flink.client.program.ClusterClient; |
| 32 | +import org.apache.flink.configuration.CheckpointingOptions; |
| 33 | +import org.apache.flink.configuration.Configuration; |
| 34 | +import org.apache.flink.configuration.CoreOptions; |
| 35 | +import org.apache.flink.configuration.ExternalizedCheckpointRetention; |
| 36 | +import org.apache.flink.configuration.RestartStrategyOptions; |
| 37 | +import org.apache.flink.configuration.StateBackendOptions; |
| 38 | +import org.apache.flink.configuration.StateRecoveryOptions; |
28 | 39 | import org.apache.flink.connector.datagen.source.DataGeneratorSource; |
| 40 | +import org.apache.flink.runtime.messages.FlinkJobNotFoundException; |
| 41 | +import org.apache.flink.runtime.minicluster.MiniCluster; |
| 42 | +import org.apache.flink.runtime.testutils.CommonTestUtils; |
29 | 43 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; |
30 | 44 | import org.apache.flink.streaming.runtime.operators.sink.TestSinkV2; |
31 | 45 | import org.apache.flink.streaming.runtime.operators.sink.TestSinkV2.Record; |
32 | 46 | import org.apache.flink.streaming.runtime.operators.sink.TestSinkV2.RecordSerializer; |
| 47 | +import org.apache.flink.test.junit5.InjectClusterClient; |
| 48 | +import org.apache.flink.test.junit5.InjectMiniCluster; |
33 | 49 | import org.apache.flink.test.util.AbstractTestBase; |
34 | 50 | import org.apache.flink.testutils.junit.SharedObjectsExtension; |
35 | 51 | import org.apache.flink.testutils.junit.SharedReference; |
36 | 52 |
|
37 | 53 | import org.junit.jupiter.api.Test; |
38 | 54 | import org.junit.jupiter.api.extension.RegisterExtension; |
| 55 | +import org.junit.jupiter.api.io.TempDir; |
| 56 | +import org.junit.jupiter.params.ParameterizedTest; |
| 57 | +import org.junit.jupiter.params.provider.CsvSource; |
39 | 58 | import org.slf4j.Logger; |
40 | 59 | import org.slf4j.LoggerFactory; |
41 | 60 |
|
| 61 | +import java.io.File; |
42 | 62 | import java.io.Serializable; |
43 | 63 | import java.util.Arrays; |
44 | 64 | import java.util.Collection; |
45 | 65 | import java.util.Collections; |
46 | 66 | import java.util.List; |
| 67 | +import java.util.Optional; |
47 | 68 | import java.util.Queue; |
48 | 69 | import java.util.concurrent.CompletionStage; |
49 | 70 | import java.util.concurrent.ConcurrentLinkedQueue; |
| 71 | +import java.util.concurrent.ExecutionException; |
| 72 | +import java.util.concurrent.atomic.AtomicBoolean; |
50 | 73 | import java.util.stream.Collectors; |
| 74 | +import java.util.stream.IntStream; |
51 | 75 |
|
52 | 76 | import static org.assertj.core.api.Assertions.assertThat; |
53 | 77 |
|
@@ -125,6 +149,49 @@ private static Record<Integer> flipValue(Record<Integer> r) { |
125 | 149 | return r.withValue(-r.getValue()); |
126 | 150 | } |
127 | 151 |
|
| 152 | + @ParameterizedTest |
| 153 | + @CsvSource({"1, 2", "2, 1", "1, 1"}) |
| 154 | + public void writerAndCommitterExecuteInStreamingModeWithScaling( |
| 155 | + int initialParallelism, |
| 156 | + int scaledParallelism, |
| 157 | + @TempDir File checkpointDir, |
| 158 | + @InjectMiniCluster MiniCluster miniCluster, |
| 159 | + @InjectClusterClient ClusterClient<?> clusterClient) |
| 160 | + throws Exception { |
| 161 | + SharedReference<Queue<Committer.CommitRequest<Record<Integer>>>> committed = |
| 162 | + SHARED_OBJECTS.add(new ConcurrentLinkedQueue<>()); |
| 163 | + final TrackingCommitter trackingCommitter = new TrackingCommitter(committed); |
| 164 | + final Configuration config = createConfigForScalingTest(checkpointDir, initialParallelism); |
| 165 | + |
| 166 | + // first run |
| 167 | + final JobID jobID = |
| 168 | + runStreamingWithScalingTest( |
| 169 | + config, |
| 170 | + initialParallelism, |
| 171 | + trackingCommitter, |
| 172 | + true, |
| 173 | + miniCluster, |
| 174 | + clusterClient); |
| 175 | + |
| 176 | + // second run |
| 177 | + config.set(StateRecoveryOptions.SAVEPOINT_PATH, getCheckpointPath(miniCluster, jobID)); |
| 178 | + config.set(CoreOptions.DEFAULT_PARALLELISM, scaledParallelism); |
| 179 | + runStreamingWithScalingTest( |
| 180 | + config, initialParallelism, trackingCommitter, false, miniCluster, clusterClient); |
| 181 | + |
| 182 | + assertThat(committed.get()) |
| 183 | + .extracting(Committer.CommitRequest::getCommittable) |
| 184 | + .containsExactlyInAnyOrderElementsOf( |
| 185 | + duplicate(EXPECTED_COMMITTED_DATA_IN_STREAMING_MODE)); |
| 186 | + } |
| 187 | + |
| 188 | + private static List<Record<Integer>> duplicate(List<Record<Integer>> values) { |
| 189 | + return IntStream.range(0, 2) |
| 190 | + .boxed() |
| 191 | + .flatMap(i -> values.stream()) |
| 192 | + .collect(Collectors.toList()); |
| 193 | + } |
| 194 | + |
128 | 195 | @Test |
129 | 196 | public void writerAndCommitterExecuteInBatchMode() throws Exception { |
130 | 197 | final StreamExecutionEnvironment env = buildBatchEnv(); |
@@ -184,6 +251,66 @@ private StreamExecutionEnvironment buildStreamEnv() { |
184 | 251 | return env; |
185 | 252 | } |
186 | 253 |
|
| 254 | + private Configuration createConfigForScalingTest(File checkpointDir, int parallelism) { |
| 255 | + final Configuration config = new Configuration(); |
| 256 | + config.set(CoreOptions.DEFAULT_PARALLELISM, parallelism); |
| 257 | + config.set(StateBackendOptions.STATE_BACKEND, "hashmap"); |
| 258 | + config.set(CheckpointingOptions.CHECKPOINTS_DIRECTORY, checkpointDir.toURI().toString()); |
| 259 | + config.set( |
| 260 | + CheckpointingOptions.EXTERNALIZED_CHECKPOINT_RETENTION, |
| 261 | + ExternalizedCheckpointRetention.RETAIN_ON_CANCELLATION); |
| 262 | + config.set(CheckpointingOptions.MAX_RETAINED_CHECKPOINTS, 2000); |
| 263 | + config.set(RestartStrategyOptions.RESTART_STRATEGY, "disable"); |
| 264 | + |
| 265 | + return config; |
| 266 | + } |
| 267 | + |
| 268 | + private StreamExecutionEnvironment buildStreamEnvWithCheckpointDir(Configuration config) { |
| 269 | + final StreamExecutionEnvironment env = |
| 270 | + StreamExecutionEnvironment.getExecutionEnvironment(config); |
| 271 | + env.setRuntimeMode(RuntimeExecutionMode.STREAMING); |
| 272 | + env.enableCheckpointing(100); |
| 273 | + |
| 274 | + return env; |
| 275 | + } |
| 276 | + |
| 277 | + private JobID runStreamingWithScalingTest( |
| 278 | + Configuration config, |
| 279 | + int parallelism, |
| 280 | + TrackingCommitter trackingCommitter, |
| 281 | + boolean shouldMapperFail, |
| 282 | + MiniCluster miniCluster, |
| 283 | + ClusterClient<?> clusterClient) |
| 284 | + throws Exception { |
| 285 | + final StreamExecutionEnvironment env = buildStreamEnvWithCheckpointDir(config); |
| 286 | + final Source<Integer, ?, ?> source = createStreamingSource(); |
| 287 | + |
| 288 | + env.fromSource(source, WatermarkStrategy.noWatermarks(), "source") |
| 289 | + .rebalance() |
| 290 | + .map( |
| 291 | + new FailingCheckpointMapper( |
| 292 | + SHARED_OBJECTS.add(new AtomicBoolean(!shouldMapperFail)))) |
| 293 | + .sinkTo( |
| 294 | + TestSinkV2.<Integer>newBuilder() |
| 295 | + .setCommitter(trackingCommitter, RecordSerializer::new) |
| 296 | + .setWithPostCommitTopology(true) |
| 297 | + .build()); |
| 298 | + |
| 299 | + final JobID jobId = clusterClient.submitJob(env.getStreamGraph().getJobGraph()).get(); |
| 300 | + clusterClient.requestJobResult(jobId).get(); |
| 301 | + |
| 302 | + return jobId; |
| 303 | + } |
| 304 | + |
| 305 | + private String getCheckpointPath(MiniCluster miniCluster, JobID secondJobId) |
| 306 | + throws InterruptedException, ExecutionException, FlinkJobNotFoundException { |
| 307 | + final Optional<String> completedCheckpoint = |
| 308 | + CommonTestUtils.getLatestCompletedCheckpointPath(secondJobId, miniCluster); |
| 309 | + |
| 310 | + assertThat(completedCheckpoint).isPresent(); |
| 311 | + return completedCheckpoint.get(); |
| 312 | + } |
| 313 | + |
187 | 314 | private StreamExecutionEnvironment buildBatchEnv() { |
188 | 315 | final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); |
189 | 316 | env.setRuntimeMode(RuntimeExecutionMode.BATCH); |
@@ -245,4 +372,32 @@ public void commit(Collection<CommitRequest<Record<Integer>>> committables) { |
245 | 372 | @Override |
246 | 373 | public void close() {} |
247 | 374 | } |
| 375 | + |
| 376 | + private static class FailingCheckpointMapper |
| 377 | + implements MapFunction<Integer, Integer>, CheckpointListener { |
| 378 | + |
| 379 | + private final SharedReference<AtomicBoolean> failed; |
| 380 | + private long lastCheckpointId = 0; |
| 381 | + private int emittedBetweenCheckpoint = 0; |
| 382 | + |
| 383 | + FailingCheckpointMapper(SharedReference<AtomicBoolean> failed) { |
| 384 | + this.failed = failed; |
| 385 | + } |
| 386 | + |
| 387 | + @Override |
| 388 | + public Integer map(Integer value) { |
| 389 | + if (lastCheckpointId >= 1 && emittedBetweenCheckpoint > 0 && !failed.get().get()) { |
| 390 | + failed.get().set(true); |
| 391 | + throw new RuntimeException("Planned exception."); |
| 392 | + } |
| 393 | + emittedBetweenCheckpoint++; |
| 394 | + return value; |
| 395 | + } |
| 396 | + |
| 397 | + @Override |
| 398 | + public void notifyCheckpointComplete(long checkpointId) { |
| 399 | + lastCheckpointId = checkpointId; |
| 400 | + emittedBetweenCheckpoint = 0; |
| 401 | + } |
| 402 | + } |
248 | 403 | } |
0 commit comments