-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
track lowest allowed timestamp per persistence ID #110
Open
leviramsey
wants to merge
3
commits into
akka:main
Choose a base branch
from
leviramsey:monotonic
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
265 changes: 265 additions & 0 deletions
265
core/src/main/scala/akka/persistence/dynamodb/internal/MonotonicTimestamps.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,265 @@ | ||
/* | ||
* Copyright (C) 2024 Lightbend Inc. <https://www.lightbend.com> | ||
*/ | ||
|
||
package akka.persistence.dynamodb.internal | ||
|
||
import akka.Done | ||
import akka.actor.typed.ActorRef | ||
import akka.actor.typed.ActorSystem | ||
import akka.actor.typed.Behavior | ||
import akka.actor.typed.Extension | ||
import akka.actor.typed.ExtensionId | ||
import akka.actor.typed.SupervisorStrategy | ||
import akka.actor.typed.scaladsl.AskPattern.Askable | ||
import akka.actor.typed.scaladsl.Behaviors | ||
import akka.actor.typed.scaladsl.TimerScheduler | ||
import akka.annotation.InternalApi | ||
import akka.persistence.Persistence | ||
|
||
import scala.annotation.tailrec | ||
import scala.collection.immutable.SortedSet | ||
import scala.concurrent.Future | ||
import scala.concurrent.duration._ | ||
import scala.jdk.CollectionConverters.IteratorHasAsScala | ||
|
||
import java.net.URLEncoder | ||
import java.nio.charset.StandardCharsets | ||
import java.time.Instant | ||
import java.time.{ Duration => JDuration } | ||
import java.util.concurrent.ConcurrentHashMap | ||
import java.time.temporal.ChronoUnit | ||
|
||
object MonotonicTimestamps extends ExtensionId[MonotonicTimestamps] { | ||
override def createExtension(system: ActorSystem[_]): MonotonicTimestamps = new MonotonicTimestamps(system) | ||
|
||
def get(system: ActorSystem[_]): MonotonicTimestamps = createExtension(system) | ||
|
||
/** INTERNAL API */ | ||
@InternalApi | ||
private[internal] final class PerPlugin( | ||
system: ActorSystem[_], | ||
name: String, | ||
numRanges: Int, | ||
rangeForPid: String => Int) { | ||
// only written to by a single actor | ||
private val byPid = (0 until numRanges).map { _ => new ConcurrentHashMap[String, Record]() }.toVector | ||
private val rangeActors = (0 until numRanges).map { range => | ||
val actorName = | ||
URLEncoder.encode(s"dynamodb-persistence-monotonic-timestamps-${name}-$range", StandardCharsets.UTF_8) | ||
val behavior = | ||
Behaviors | ||
.supervise( | ||
Behaviors | ||
.setup[Any] { context => | ||
if (byPid(range).isEmpty) { | ||
Behaviors.withTimers { timers => | ||
cleanerBehavior(range, SortedSet.empty, timers) | ||
} | ||
} else { | ||
val recordSet = | ||
byPid(range).values.iterator.asScala | ||
.foldLeft(SortedSet.empty[Record]) { (acc, v) => | ||
acc.incl(v) | ||
} | ||
|
||
Behaviors.withTimers { timers => | ||
scheduleNextCleanup(Instant.now(), recordSet.head.nextTimestamp, timers) | ||
cleanerBehavior(range, recordSet, timers) | ||
} | ||
} | ||
} | ||
.narrow[(Record, ActorRef[Done])]) | ||
.onFailure(SupervisorStrategy.restart) | ||
|
||
system.systemActorOf(behavior, actorName) | ||
}.toVector | ||
|
||
def minTimestampFor(pid: String): Option[Instant] = | ||
byPid(rangeForPid(pid)).get(pid) match { | ||
case null => None | ||
case record => Some(record.nextTimestamp) | ||
} | ||
|
||
def recordTimestampFor(pid: String, timestamp: Instant): Future[Done] = { | ||
rangeActors(rangeForPid(pid)) | ||
.ask[Done]((Record(pid, timestamp.plus(1, ChronoUnit.MICROS)), _))(1.second, system.scheduler) | ||
} | ||
|
||
private def scheduleNextCleanup(now: Instant, nextTimestamp: Instant, timers: TimerScheduler[Any]): Unit = { | ||
val nextCleanupIn = { | ||
val millis = | ||
try { | ||
JDuration.between(now, nextTimestamp).toMillis / 2 | ||
} catch { | ||
case _: ArithmeticException => 10000 // ten second maximum | ||
} | ||
|
||
// minimum 1 second, maximum 10 seconds | ||
(millis.min(10000).max(1)).millis | ||
} | ||
|
||
timers.startSingleTimer(Cleanup, Cleanup, nextCleanupIn) | ||
} | ||
|
||
private[internal] def cleanerBehavior( | ||
range: Int, | ||
recordSet: SortedSet[Record], | ||
timers: TimerScheduler[Any]): Behavior[Any] = | ||
Behaviors.receive { (context, msg) => | ||
msg match { | ||
case Cleanup => | ||
// next timestamp will be greater than this | ||
val keepAfter = InstantFactory.now() | ||
|
||
// adding the nano ensures that this will compare greater than | ||
// any record with timestamp of keepAfter | ||
// "" as pid (not legal pid) will compare less than any record with same timestamp | ||
// net effect is to swap the clusivity of rangeFrom/rangeTo | ||
val pivotRecord = Record("", keepAfter.plusNanos(1)) | ||
|
||
val recordsToKeep = recordSet.rangeFrom(pivotRecord) | ||
val recordsToDrop = recordSet.rangeTo(pivotRecord) | ||
|
||
val kept = | ||
recordsToDrop.foldLeft(recordsToKeep) { (rtk, record) => | ||
val pid = record.pid | ||
|
||
if (byPid(range).remove(pid, record)) rtk | ||
else { | ||
context.log.warn( | ||
"Concurrent modification of state: this should not happen. Report issue at github.com/akka/akka-persistence-dynamodb") | ||
rtk.incl(byPid(range).get(pid)) | ||
} | ||
} | ||
|
||
if (kept.nonEmpty) { scheduleNextCleanup(keepAfter, kept.head.nextTimestamp, timers) } | ||
|
||
cleanerBehavior(range, kept, timers) | ||
|
||
case (rec: Record, replyTo: ActorRef[Nothing]) => | ||
val pid = rec.pid | ||
val nextRecordSet = | ||
byPid(range).get(pid) match { | ||
case null => | ||
if (byPid(range).putIfAbsent(pid, rec) eq null) { | ||
replyTo.unsafeUpcast[Done] ! Done | ||
recordSet.incl(rec) | ||
} else { | ||
context.log.warn( | ||
"Timestamp not updated for persistence ID [{}]. " + | ||
"Report issue at github.com/akka/akka-persistence-dynamodb", | ||
pid) | ||
|
||
// no reply | ||
recordSet | ||
} | ||
|
||
case oldRecord => | ||
if (oldRecord.nextTimestamp.isBefore(rec.nextTimestamp)) { | ||
oldRecord match { | ||
case expected if expected eq oldRecord => | ||
replyTo.unsafeUpcast[Done] ! Done | ||
recordSet.excl(oldRecord).incl(rec) | ||
|
||
case unexpected => | ||
context.log.warn( | ||
"Timestamp not updated for persistence ID [{}]. " + | ||
"Report issue at github.com/akka/akka-persistence-dynamodb", | ||
pid) | ||
recordSet.excl(oldRecord).incl(unexpected) | ||
} | ||
} else { | ||
context.log.warn( | ||
"Ignoring attempt to set timestamp for persistence ID [{}] to earlier. " + | ||
"existing=[{}] attempted=[{}]", | ||
pid, | ||
oldRecord.nextTimestamp, | ||
rec.nextTimestamp) | ||
|
||
replyTo.unsafeUpcast[Done] ! Done | ||
recordSet | ||
} | ||
} | ||
|
||
if (!timers.isTimerActive(Cleanup) && nextRecordSet.nonEmpty) { | ||
scheduleNextCleanup(Instant.now(), rec.nextTimestamp, timers) | ||
} | ||
|
||
cleanerBehavior(range, nextRecordSet, timers) | ||
|
||
case _ => Behaviors.unhandled | ||
} | ||
} | ||
} | ||
|
||
/** INTERNAL API */ | ||
@InternalApi | ||
private[internal] case class Record(pid: String, nextTimestamp: Instant) | ||
|
||
/** INTERNAL API */ | ||
@InternalApi | ||
private[internal] val Cleanup = "Cleanup" | ||
|
||
private object Record { | ||
implicit val ordering: Ordering[Record] = | ||
new Ordering[Record] { | ||
override def compare(x: Record, y: Record): Int = | ||
x.nextTimestamp.compareTo(y.nextTimestamp) match { | ||
case 0 => x.pid.compareTo(y.pid) | ||
case result => result | ||
} | ||
} | ||
} | ||
} | ||
|
||
final class MonotonicTimestamps(system: ActorSystem[_]) extends Extension { | ||
import MonotonicTimestamps.PerPlugin | ||
|
||
private val persistenceExt = Persistence(system) | ||
private val numRanges = | ||
// minimize contention by having a number of ranges that's at least available processors | ||
Runtime.getRuntime.availableProcessors match { | ||
case lt2 if lt2 < 2 => 1 | ||
case gt1024 if gt1024 > 1024 => 1024 | ||
case numProcs => | ||
val clz = Integer.numberOfLeadingZeros(numProcs - 1) | ||
1 << (32 - clz) // next highest power of 2 | ||
} | ||
|
||
private val rawRanges = persistenceExt.sliceRanges(numRanges) | ||
private val starts = rawRanges.map(_.head).toArray | ||
private val rangeForPid = (pid: String) => { | ||
val slice = persistenceExt.sliceForPersistenceId(pid) | ||
|
||
@tailrec | ||
def iter(lo: Int, hi: Int): Int = | ||
if ((lo + 1) >= hi) lo | ||
else { | ||
val pivot = (lo + hi) / 2 | ||
val p = starts(pivot) | ||
|
||
if (p == slice) pivot | ||
else if (p < slice) iter(pivot, hi) | ||
else iter(lo, pivot) | ||
} | ||
|
||
iter(0, starts.length) | ||
} | ||
|
||
private val perPlugin = new ConcurrentHashMap[String, PerPlugin]() | ||
|
||
def minTimestampFor(plugin: String): String => Option[Instant] = { | ||
val pp = | ||
perPlugin.computeIfAbsent(plugin, _ => new PerPlugin(system, plugin, numRanges, rangeForPid)) | ||
|
||
pp.minTimestampFor _ | ||
} | ||
|
||
def recordTimestampFor(plugin: String): (String, Instant) => Future[Done] = { | ||
val pp = | ||
perPlugin.computeIfAbsent(plugin, _ => new PerPlugin(system, plugin, numRanges, rangeForPid)) | ||
|
||
pp.recordTimestampFor _ | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
May just be me, but the implementation looks more complex than what we need. Feel that it could be simpler, but will think through it some more too.
The journal is an actor, so could also track directly. Also see how
writesInProgress
are tracked in the journal implementation.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Haven't looked at the details here. Would it be enough to increase the time in InstantFactory when detecting clock skew? At the point the warning is logged in #110
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, was thinking the same. We could just bump in InstantFactory. That will then be monotonically increasing micros until the current time catches up and it reverts to regular timestamps again. And probably check against a configurable tolerance setting, so it can only be skewed by so much, otherwise error.