Skip to content

Commit

Permalink
[FLINK-36613] Make checkpoint include all source data in RescaleCheck…
Browse files Browse the repository at this point in the history
…pointManuallyITCase
  • Loading branch information
Zakelly committed Nov 13, 2024
1 parent 73c120d commit 6fe39c6
Showing 1 changed file with 32 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -267,17 +267,28 @@ private JobGraph createJobGraphWithKeyedState(
env.addSource(
new NotifyingDefiniteKeySource(
numberKeys, numberElements, failAfterEmission) {

String lastCheckpointPath = null;

/**
* This wait method waits at least two checkpoint finished to
* make sure the latest checkpoint contains all the source data.
*/
@Override
public void waitCheckpointCompleted() throws Exception {
public boolean waitCheckpointCompleted() throws Exception {
Optional<String> mostRecentCompletedCheckpointPath =
getLatestCompletedCheckpointPath(
jobID.get(), miniClusterRef.get());
while (!mostRecentCompletedCheckpointPath.isPresent()) {
Thread.sleep(50);
mostRecentCompletedCheckpointPath =
getLatestCompletedCheckpointPath(
jobID.get(), miniClusterRef.get());
if (mostRecentCompletedCheckpointPath.isPresent()) {
if (lastCheckpointPath == null) {
lastCheckpointPath =
mostRecentCompletedCheckpointPath.get();
} else if (!lastCheckpointPath.equals(
mostRecentCompletedCheckpointPath.get())) {
return true;
}
}
return false;
}
})
.keyBy(
Expand Down Expand Up @@ -315,7 +326,9 @@ public NotifyingDefiniteKeySource(
this.failAfterEmission = failAfterEmission;
}

public void waitCheckpointCompleted() throws Exception {}
public boolean waitCheckpointCompleted() throws Exception {
return true;
}

@Override
public void run(SourceContext<Integer> ctx) throws Exception {
Expand All @@ -334,7 +347,18 @@ public void run(SourceContext<Integer> ctx) throws Exception {
counter++;
}
} else {
waitCheckpointCompleted();
boolean newCheckpoint = false;
long waited = 0L;
// maximum wait 5min
while (!newCheckpoint && waited < 30000L) {
synchronized (ctx.getCheckpointLock()) {
newCheckpoint = waitCheckpointCompleted();
}
if (!newCheckpoint) {
waited += 10L;
Thread.sleep(10L);
}
}
if (failAfterEmission) {
throw new FlinkRuntimeException(
"Make job fail artificially, to retain completed checkpoint.");
Expand Down

0 comments on commit 6fe39c6

Please sign in to comment.