cashapp · kyeotic · Sep 4, 2024 · Sep 3, 2024 · Sep 3, 2024 · Sep 3, 2024
diff --git a/docs/guide/query_scan.md b/docs/guide/query_scan.md
@@ -514,19 +514,7 @@ worker can be a thread (in programming languages that support multithreading) or
 system process. To perform a parallel scan, each worker issues its own Scan request with an
 unique `WorkerId`.
 
-=== "Kotlin - SDK 2.x"
-
-    ```kotlin
-    Not supported
-    ```
-
-=== "Java - SDK 2.x"
-
-    ```java
-    Not supported
-    ```
-
-=== "Kotlin - SDK 1.x"
+=== "Kotlin"
 
     ```kotlin
     private val table: MusicTable
@@ -545,7 +533,7 @@ unique `WorkerId`.
     }
     ```
 
-=== "Java - SDK 1.x"
+=== "Java"
 
     ```java
     private final MusicTable table;

diff --git a/samples/guides2/src/main/java/app/cash/tempest2/guides/java/QueryNScan.java b/samples/guides2/src/main/java/app/cash/tempest2/guides/java/QueryNScan.java
@@ -21,21 +21,28 @@
 import app.cash.tempest2.Offset;
 import app.cash.tempest2.Page;
 import app.cash.tempest2.QueryConfig;
+import app.cash.tempest2.ScanConfig;
+import app.cash.tempest2.WorkerId;
 import app.cash.tempest2.musiclibrary.java.AlbumTrack;
 import app.cash.tempest2.musiclibrary.java.MusicTable;
 import java.time.Duration;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Future;
 import software.amazon.awssdk.enhanced.dynamodb.Expression;
 import software.amazon.awssdk.services.dynamodb.model.AttributeValue;
 
 public class QueryNScan {
 
   private final MusicTable table;
+  private final ExecutorService executor;
 
-  public QueryNScan(MusicTable table) {
+  public QueryNScan(MusicTable table, ExecutorService executor) {
     this.table = table;
+    this.executor = executor;
   }
 
   // Query - Key Condition - Partition Key and Entity Type.
@@ -164,5 +171,25 @@ public List<AlbumTrack> loadAllAlbumTracks() {
   }
 
   // Scan - Parallel.
-  // Not supported.
+  public List<AlbumTrack> loadAllAlbumTracks2() {
+    Future<List<AlbumTrack>> segment1 = executor.submit(() -> loadSegment(1));
+    Future<List<AlbumTrack>> segment2 = executor.submit(() -> loadSegment(2));
+    List<AlbumTrack> results = new ArrayList<>();
+    try {
+      results.addAll(segment1.get());
+      results.addAll(segment2.get());
+    } catch (InterruptedException | ExecutionException e) {
+      throw new IllegalStateException("Failed to load tracks", e);
+    }
+    return results;
+  }
+
+  private List<AlbumTrack> loadSegment(int segment) {
+    Page<AlbumTrack.Key, AlbumTrack> page = table.albumTracks().scan(
+        new ScanConfig.Builder()
+            .workerId(new WorkerId(segment, /* totalSegments */ 2))
+            .build()
+    );
+    return page.getContents();
+  }
 }
diff --git a/tempest2/src/main/kotlin/app/cash/tempest2/Scan.kt b/tempest2/src/main/kotlin/app/cash/tempest2/Scan.kt
@@ -27,7 +27,8 @@ interface Scannable<K : Any, I : Any> {
     pageSize: Int = 100,
     consistentRead: Boolean = false,
     filterExpression: Expression? = null,
-    initialOffset: Offset<K>? = null
+    initialOffset: Offset<K>? = null,
+    workerId: WorkerId? = null,
   ): Page<K, I>
 
   // Overloaded functions for Java callers (Kotlin interfaces do not support `@JvmOverloads`).
@@ -51,7 +52,8 @@ interface Scannable<K : Any, I : Any> {
     config.pageSize,
     config.consistentRead,
     config.filterExpression,
-    initialOffset
+    initialOffset,
+    config.workerId
   )
 
   /**
@@ -63,6 +65,7 @@ interface Scannable<K : Any, I : Any> {
     consistentRead: Boolean = false,
     filterExpression: Expression? = null,
     initialOffset: Offset<K>? = null,
+    workerId: WorkerId? = null,
   ): Sequence<Page<K, I>>
 
   // Overloaded functions for Java callers (Kotlin interfaces do not support `@JvmOverloads`).
@@ -87,7 +90,8 @@ interface Scannable<K : Any, I : Any> {
       config.pageSize,
       config.consistentRead,
       config.filterExpression,
-      initialOffset
+      initialOffset,
+      config.workerId
     )
   }
 
@@ -100,6 +104,7 @@ interface Scannable<K : Any, I : Any> {
     consistentRead: Boolean = false,
     filterExpression: Expression? = null,
     initialOffset: Offset<K>? = null,
+    workerId: WorkerId? = null,
   ): Sequence<I>
 
   // Overloaded functions for Java callers (Kotlin interfaces do not support `@JvmOverloads`).
@@ -124,20 +129,59 @@ interface Scannable<K : Any, I : Any> {
       config.pageSize,
       config.consistentRead,
       config.filterExpression,
-      initialOffset
+      initialOffset,
+      config.workerId
     )
   }
 }
 
+/**
+ * By default, the Scan operation processes data sequentially. Amazon DynamoDB returns data to the
+ * application in 1 MB increments, and an application performs additional Scan operations to
+ * retrieve the next 1 MB of data.
+ *
+ * The larger the table or index being scanned, the more time the Scan takes to complete. In
+ * addition, a sequential Scan might not always be able to fully use the provisioned read throughput
+ * capacity: Even though DynamoDB distributes a large table's data across multiple physical
+ * partitions, a Scan operation can only read one partition at a time. For this reason, the
+ * throughput of a Scan is constrained by the maximum throughput of a single partition.
+ *
+ * To address these issues, the Scan operation can logically divide a table or secondary index into
+ * multiple segments, with multiple application workers scanning the segments in parallel. Each
+ * worker can be a thread (in programming languages that support multithreading) or an operating
+ * system process. To perform a parallel scan, each worker issues its own Scan request with an
+ * unique [WorkerId].
+ */
+data class WorkerId(
+  /**
+   * A segment to be scanned by a particular worker. Each worker should use a different value for
+   * Segment.
+   *
+   * Segments are zero-based, so the first number is always 0.
+   */
+  val segment: Int,
+  /**
+   * The total number of segments for the parallel scan. This value must be the same as the number
+   * of workers that your application will use.
+   */
+  val totalSegments: Int
+) {
+  init {
+    require(segment < totalSegments) { "Expect $segment to be less than $totalSegments" }
+  }
+}
+
 data class ScanConfig internal constructor(
   val pageSize: Int,
   val consistentRead: Boolean,
-  val filterExpression: Expression?
+  val filterExpression: Expression?,
+  val workerId: WorkerId?
 ) {
   class Builder {
     private var pageSize = 100
     private var consistentRead = false
     private var filterExpression: Expression? = null
+    private var workerId: WorkerId? = null
 
     fun pageSize(pageSize: Int) = apply { this.pageSize = pageSize }
 
@@ -146,10 +190,13 @@ data class ScanConfig internal constructor(
     fun filterExpression(filterExpression: Expression) =
       apply { this.filterExpression = filterExpression }
 
+    fun workerId(workerId: WorkerId) = apply { this.workerId = workerId }
+
     fun build() = ScanConfig(
       pageSize,
       consistentRead,
-      filterExpression
+      filterExpression,
+      workerId
     )
   }
 }
diff --git a/tempest2/src/main/kotlin/app/cash/tempest2/internal/DynamoDbScannable.kt b/tempest2/src/main/kotlin/app/cash/tempest2/internal/DynamoDbScannable.kt
@@ -21,6 +21,7 @@ import app.cash.tempest2.AsyncScannable
 import app.cash.tempest2.Offset
 import app.cash.tempest2.Page
 import app.cash.tempest2.Scannable
+import app.cash.tempest2.WorkerId
 import kotlinx.coroutines.flow.map
 import kotlinx.coroutines.reactive.asFlow
 import kotlinx.coroutines.reactive.asPublisher
@@ -50,9 +51,10 @@ internal class DynamoDbScannable<K : Any, I : Any, R : Any>(
       pageSize: Int,
       consistentRead: Boolean,
       filterExpression: Expression?,
-      initialOffset: Offset<K>?
+      initialOffset: Offset<K>?,
+      workerId: WorkerId?
     ): Page<K, I> {
-      val request = toScanRequest(consistentRead, pageSize, filterExpression, initialOffset)
+      val request = toScanRequest(consistentRead, pageSize, filterExpression, initialOffset, workerId)
       val page = if (secondaryIndexName != null) {
         dynamoDbTable.index(secondaryIndexName).scan(request)
       } else {
@@ -66,13 +68,14 @@ internal class DynamoDbScannable<K : Any, I : Any, R : Any>(
       pageSize: Int,
       consistentRead: Boolean,
       filterExpression: Expression?,
-      initialOffset: Offset<K>?
+      initialOffset: Offset<K>?,
+      workerId: WorkerId?
     ): Sequence<Page<K, I>> {
       return generateSequence(
-        scan(pageSize, consistentRead, filterExpression, initialOffset)
+        scan(pageSize, consistentRead, filterExpression, initialOffset, workerId)
       ) { page ->
         page.offset?.let { offset ->
-          scan(pageSize, consistentRead, filterExpression, offset)
+          scan(pageSize, consistentRead, filterExpression, offset, workerId)
         }
       }
     }
@@ -81,9 +84,10 @@ internal class DynamoDbScannable<K : Any, I : Any, R : Any>(
       pageSize: Int,
       consistentRead: Boolean,
       filterExpression: Expression?,
-      initialOffset: Offset<K>?
+      initialOffset: Offset<K>?,
+      workerId: WorkerId?
     ): Sequence<I> {
-      return scanAll(pageSize, consistentRead, filterExpression, initialOffset)
+      return scanAll(pageSize, consistentRead, filterExpression, initialOffset, workerId)
         .map { it.contents }
         .flatten()
     }
@@ -99,7 +103,7 @@ internal class DynamoDbScannable<K : Any, I : Any, R : Any>(
       pageSize: Int,
       consistentRead: Boolean,
       filterExpression: Expression?,
-      initialOffset: Offset<K>?
+      initialOffset: Offset<K>?,
     ): Publisher<Page<K, I>> {
       val request = toScanRequest(consistentRead, pageSize, filterExpression, initialOffset)
       return if (secondaryIndexName != null) {
@@ -118,7 +122,8 @@ internal class DynamoDbScannable<K : Any, I : Any, R : Any>(
     consistentRead: Boolean,
     pageSize: Int,
     filterExpression: Expression?,
-    initialOffset: Offset<K>?
+    initialOffset: Offset<K>?,
+    workerId: WorkerId? = null
   ): ScanEnhancedRequest {
     val scan = ScanEnhancedRequest.builder()
       .consistentRead(consistentRead)
@@ -130,6 +135,10 @@ internal class DynamoDbScannable<K : Any, I : Any, R : Any>(
     if (initialOffset != null) {
       scan.exclusiveStartKey(initialOffset.encodeOffset())
     }
+    if (workerId != null) {
+      scan.segment(workerId.segment)
+      scan.totalSegments(workerId.totalSegments)
+    }
     return scan.build()
   }
 

diff --git a/tempest2/src/test/kotlin/app/cash/tempest2/DynamoDbScannableTest.kt b/tempest2/src/test/kotlin/app/cash/tempest2/DynamoDbScannableTest.kt
@@ -105,6 +105,28 @@ class DynamoDbScannableTest {
     )
   }
 
+  @Test
+  fun primaryIndexParallelScan() {
+    musicTable.givenAlbums(THE_WALL)
+
+    val worker1Page1 = musicTable.albumTracks.scan(
+      workerId = WorkerId(0, 2),
+      pageSize = 50,
+      filterExpression = isTrack()
+    )
+    assertThat(worker1Page1.hasMorePages).isFalse()
+
+    val worker2Page1 = musicTable.albumTracks.scan(
+      workerId = WorkerId(1, 2),
+      pageSize = 50,
+      filterExpression = isTrack()
+    )
+    assertThat(worker1Page1.hasMorePages).isFalse()
+
+    assertThat(worker1Page1.trackTitles + worker2Page1.trackTitles)
+      .containsExactlyInAnyOrderElementsOf(THE_WALL.trackTitles)
+  }
+
   @Test
   fun localSecondaryIndex() {
     musicTable.givenAlbums(THE_WALL)
@@ -205,6 +227,73 @@ class DynamoDbScannableTest {
     )
   }
 
+  @Test
+  fun scanAllParallelScan() {
+    musicTable.givenAlbums(
+      THE_DARK_SIDE_OF_THE_MOON,
+      THE_WALL,
+      WHAT_YOU_DO_TO_ME_SINGLE,
+      AFTER_HOURS_EP,
+      LOCKDOWN_SINGLE
+    )
+
+    val worker1Page1 = musicTable.albumInfoByArtist.scanAll(
+      workerId = WorkerId(0, 2),
+      pageSize = 50
+    ).iterator().next()
+    assertThat(worker1Page1.hasMorePages).isFalse()
+
+    val worker2Page1 = musicTable.albumInfoByArtist.scanAll(
+      workerId = WorkerId(1, 2),
+      pageSize = 50
+    ).iterator().next()
+    assertThat(worker2Page1.hasMorePages).isFalse()
+
+    assertThat(worker1Page1.albumTitles + worker2Page1.albumTitles).containsExactlyInAnyOrder(
+      THE_DARK_SIDE_OF_THE_MOON.album_title,
+      THE_WALL.album_title,
+      WHAT_YOU_DO_TO_ME_SINGLE.album_title,
+      AFTER_HOURS_EP.album_title,
+      LOCKDOWN_SINGLE.album_title
+    )
+  }
+
+  @Test
+  fun scanAllParallelScanAndScanConfig() {
+    musicTable.givenAlbums(
+      THE_DARK_SIDE_OF_THE_MOON,
+      THE_WALL,
+      WHAT_YOU_DO_TO_ME_SINGLE,
+      AFTER_HOURS_EP,
+      LOCKDOWN_SINGLE
+    )
+
+    val worker1Page1 = musicTable.albumInfoByArtist.scanAll(
+      ScanConfig.Builder()
+        .workerId(WorkerId(0, 2))
+        .pageSize(50)
+        .build()
+    ).iterator().next()
+    assertThat(worker1Page1.hasMorePages).isFalse()
+
+    val worker2Page1 = musicTable.albumInfoByArtist.scanAll(
+      ScanConfig.Builder()
+        .workerId(WorkerId(1, 2))
+        .pageSize(50)
+        .build()
+    ).iterator().next()
+    assertThat(worker2Page1.hasMorePages).isFalse()
+
+    assertThat(worker1Page1.albumTitles.intersect(worker2Page1.albumTitles)).isEmpty()
+    assertThat(worker1Page1.albumTitles + worker2Page1.albumTitles).containsExactlyInAnyOrder(
+      THE_DARK_SIDE_OF_THE_MOON.album_title,
+      THE_WALL.album_title,
+      WHAT_YOU_DO_TO_ME_SINGLE.album_title,
+      AFTER_HOURS_EP.album_title,
+      LOCKDOWN_SINGLE.album_title
+    )
+  }
+
   @Test
   fun scanAllPagination() {
     musicTable.givenAlbums(