apache
diff --git a/‎fluss-client/src/main/java/org/apache/fluss/client/admin/Admin.java
Lines changed: 2 additions & 0 deletions b/‎fluss-client/src/main/java/org/apache/fluss/client/admin/Admin.java
Lines changed: 2 additions & 0 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/LakeSplitGenerator.java
Lines changed: 38 additions & 52 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/LakeSplitGenerator.java
Lines changed: 38 additions & 52 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/LakeSplitSerializer.java
Lines changed: 6 additions & 4 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/LakeSplitSerializer.java
Lines changed: 6 additions & 4 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/split/LakeSnapshotAndFlussLogSplit.java
Lines changed: 19 additions & 0 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/split/LakeSnapshotAndFlussLogSplit.java
Lines changed: 19 additions & 0 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/split/LakeSnapshotSplit.java
Lines changed: 37 additions & 7 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/split/LakeSnapshotSplit.java
Lines changed: 37 additions & 7 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/state/LakeSnapshotSplitState.java
Lines changed: 1 addition & 0 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/lake/state/LakeSnapshotSplitState.java
Lines changed: 1 addition & 0 deletions
diff --git a/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/FlinkSource.java
Lines changed: 1 addition & 2 deletions b/‎fluss-flink/fluss-flink-common/src/main/java/org/apache/fluss/flink/source/FlinkSource.java
Lines changed: 1 addition & 2 deletions
@@ -31,6 +31,7 @@
 import org.apache.fluss.exception.InvalidReplicationFactorException;
 import org.apache.fluss.exception.InvalidTableException;
 import org.apache.fluss.exception.KvSnapshotNotExistException;
+import org.apache.fluss.exception.LakeTableSnapshotNotExistException;
 import org.apache.fluss.exception.NonPrimaryKeyTableException;
 import org.apache.fluss.exception.PartitionAlreadyExistsException;
 import org.apache.fluss.exception.PartitionNotExistException;
@@ -383,6 +384,7 @@ CompletableFuture<KvSnapshotMetadata> getKvSnapshotMetadata(
      *
      * <ul>
      *   <li>{@link TableNotExistException} if the table does not exist.
+     *   <li>{@link LakeTableSnapshotNotExistException} if no any lake snapshot exist.
      * </ul>
      *
      * @param tablePath the table path of the table.
 
@@ -19,6 +19,7 @@
 
 import org.apache.fluss.client.admin.Admin;
 import org.apache.fluss.client.metadata.LakeSnapshot;
+import org.apache.fluss.exception.LakeTableSnapshotNotExistException;
 import org.apache.fluss.flink.lake.split.LakeSnapshotAndFlussLogSplit;
 import org.apache.fluss.flink.lake.split.LakeSnapshotSplit;
 import org.apache.fluss.flink.source.enumerator.initializer.OffsetsInitializer;
@@ -29,6 +30,7 @@
 import org.apache.fluss.metadata.PartitionInfo;
 import org.apache.fluss.metadata.TableBucket;
 import org.apache.fluss.metadata.TableInfo;
+import org.apache.fluss.utils.ExceptionUtils;
 
 import javax.annotation.Nullable;
 
@@ -75,59 +77,43 @@ public LakeSplitGenerator(
         this.listPartitionSupplier = listPartitionSupplier;
     }
 
-    public List<SourceSplitBase> generateHybridLakeSplits(
-            Map<Long, String> newPartitionNameById,
-            boolean loadLakeSplits,
-            List<LakeSplit> remainingLakeSplits,
-            Map<TableBucket, Long> tableBucketsOffsetState)
-            throws Exception {
-        // get the file store
-        LakeSnapshot lakeSnapshotInfo =
-                flussAdmin.getLatestLakeSnapshot(tableInfo.getTablePath()).get();
+    /**
+     * Return A list of hybrid lake snapshot {@link LakeSnapshotSplit}, {@link
+     * LakeSnapshotAndFlussLogSplit} and the corresponding Fluss {@link LogSplit} based on the lake
+     * snapshot. Return null if no lake snapshot exists.
+     */
+    @Nullable
+    public List<SourceSplitBase> generateHybridLakeFlussSplits() throws Exception {
+        LakeSnapshot lakeSnapshotInfo;
+        try {
+            lakeSnapshotInfo = flussAdmin.getLatestLakeSnapshot(tableInfo.getTablePath()).get();
+        } catch (Exception exception) {
+            if (ExceptionUtils.stripExecutionException(exception)
+                    instanceof LakeTableSnapshotNotExistException) {
+                return null;
+            }
+            throw exception;
+        }
 
         boolean isLogTable = !tableInfo.hasPrimaryKey();
         boolean isPartitioned = tableInfo.isPartitioned();
 
-        Map<String, Map<Integer, List<LakeSplit>>> lakeSplits;
-        if (remainingLakeSplits.isEmpty()) {
-            if (loadLakeSplits) {
-                lakeSplits =
-                        groupLakeSplits(
-                                lakeSource
-                                        .createPlanner(
-                                                (LakeSource.PlannerContext)
-                                                        lakeSnapshotInfo::getSnapshotId)
-                                        .plan());
-            } else {
-                lakeSplits = Collections.emptyMap();
-            }
-        } else {
-            lakeSplits = groupLakeSplits(remainingLakeSplits);
-        }
+        Map<String, Map<Integer, List<LakeSplit>>> lakeSplits =
+                groupLakeSplits(
+                        lakeSource
+                                .createPlanner(
+                                        (LakeSource.PlannerContext) lakeSnapshotInfo::getSnapshotId)
+                                .plan());
 
-        // TODO 注释掉是否有问题, 看单元测试结果
-        //        if (lakeSplits.isEmpty()) {
-        //            return Collections.emptyList();
-        //        }
         Map<TableBucket, Long> tableBucketsOffset = lakeSnapshotInfo.getTableBucketsOffset();
-        if (!tableBucketsOffsetState.isEmpty()) {
-            // TODO tableBucketsOffsetState 未赋值
-            tableBucketsOffset = tableBucketsOffsetState;
-        }
         if (isPartitioned) {
-            Map<Long, String> partitionNameById;
-            if (newPartitionNameById.isEmpty()) {
-                Set<PartitionInfo> partitionInfos = listPartitionSupplier.get();
-                partitionNameById =
-                        partitionInfos.stream()
-                                .collect(
-                                        Collectors.toMap(
-                                                PartitionInfo::getPartitionId,
-                                                PartitionInfo::getPartitionName));
-            } else {
-                partitionNameById = newPartitionNameById;
-            }
-
+            Set<PartitionInfo> partitionInfos = listPartitionSupplier.get();
+            Map<Long, String> partitionNameById =
+                    partitionInfos.stream()
+                            .collect(
+                                    Collectors.toMap(
+                                            PartitionInfo::getPartitionId,
+                                            PartitionInfo::getPartitionName));
             return generatePartitionTableSplit(
                     lakeSplits, isLogTable, tableBucketsOffset, partitionNameById);
         } else {
@@ -157,8 +143,7 @@ private List<SourceSplitBase> generatePartitionTableSplit(
             Map<String, Map<Integer, List<LakeSplit>>> lakeSplits,
             boolean isLogTable,
             Map<TableBucket, Long> tableBucketSnapshotLogOffset,
-            Map<Long, String> partitionNameById)
-            throws Exception {
+            Map<Long, String> partitionNameById) {
         List<SourceSplitBase> splits = new ArrayList<>();
         Map<String, Long> flussPartitionIdByName =
                 partitionNameById.entrySet().stream()
@@ -240,12 +225,10 @@ private List<SourceSplitBase> generateSplit(
             Map<Integer, Long> bucketEndOffset) {
         List<SourceSplitBase> splits = new ArrayList<>();
         if (isLogTable) {
-            int needInitOffsetBucketsNum = bucketCount;
             if (lakeSplits != null) {
                 splits.addAll(toLakeSnapshotSplits(lakeSplits, partitionName, partitionId));
-                needInitOffsetBucketsNum = lakeSplits.size();
             }
-            for (int bucket = 0; bucket < needInitOffsetBucketsNum; bucket++) {
+            for (int bucket = 0; bucket < bucketCount; bucket++) {
                 TableBucket tableBucket =
                         new TableBucket(tableInfo.getTableId(), partitionId, bucket);
                 Long snapshotLogOffset = tableBucketSnapshotLogOffset.get(tableBucket);
@@ -292,11 +275,14 @@ private List<SourceSplitBase> toLakeSnapshotSplits(
             @Nullable String partitionName,
             @Nullable Long partitionId) {
         List<SourceSplitBase> splits = new ArrayList<>();
+        // we may have multiple table buckets; so we need to
+        // introduce an index to make split unique
+        int index = 0;
         for (LakeSplit lakeSplit :
                 lakeSplits.values().stream().flatMap(List::stream).collect(Collectors.toList())) {
             TableBucket tableBucket =
                     new TableBucket(tableInfo.getTableId(), partitionId, lakeSplit.bucket());
-            splits.add(new LakeSnapshotSplit(tableBucket, partitionName, lakeSplit));
+            splits.add(new LakeSnapshotSplit(tableBucket, partitionName, lakeSplit, index++));
         }
         return splits;
     }
 
@@ -48,8 +48,9 @@ public LakeSplitSerializer(SimpleVersionedSerializer<LakeSplit> sourceSplitSeria
 
     public void serialize(DataOutputSerializer out, SourceSplitBase split) throws IOException {
         if (split instanceof LakeSnapshotSplit) {
-            byte[] serializeBytes =
-                    sourceSplitSerializer.serialize(((LakeSnapshotSplit) split).getLakeSplit());
+            LakeSnapshotSplit lakeSplit = (LakeSnapshotSplit) split;
+            out.writeInt(lakeSplit.getSplitIndex());
+            byte[] serializeBytes = sourceSplitSerializer.serialize(lakeSplit.getLakeSplit());
             out.writeInt(serializeBytes.length);
             out.write(serializeBytes);
         } else if (split instanceof LakeSnapshotAndFlussLogSplit) {
@@ -89,12 +90,13 @@ public SourceSplitBase deserialize(
             DataInputDeserializer input)
             throws IOException {
         if (splitKind == LAKE_SNAPSHOT_SPLIT_KIND) {
+            int splitIndex = input.readInt();
             byte[] serializeBytes = new byte[input.readInt()];
             input.read(serializeBytes);
-            LakeSplit fileStoreSourceSplit =
+            LakeSplit lakeSplit =
                     sourceSplitSerializer.deserialize(
                             sourceSplitSerializer.getVersion(), serializeBytes);
-            return new LakeSnapshotSplit(tableBucket, partition, fileStoreSourceSplit);
+            return new LakeSnapshotSplit(tableBucket, partition, lakeSplit, splitIndex);
         } else if (splitKind == LAKE_SNAPSHOT_FLUSS_LOG_SPLIT_KIND) {
             List<LakeSplit> lakeSplits = null;
             if (input.readBoolean()) {
 
@@ -107,4 +107,23 @@ public String splitId() {
     public List<LakeSplit> getLakeSplits() {
         return lakeSnapshotSplits;
     }
+
+    @Override
+    public String toString() {
+        return "LakeSnapshotAndFlussLogSplit{"
+                + "lakeSnapshotSplits="
+                + lakeSnapshotSplits
+                + ", recordOffset="
+                + recordOffset
+                + ", startingOffset="
+                + startingOffset
+                + ", stoppingOffset="
+                + stoppingOffset
+                + ", tableBucket="
+                + tableBucket
+                + ", partitionName='"
+                + partitionName
+                + '\''
+                + '}';
+    }
 }
@@ -32,18 +32,25 @@ public class LakeSnapshotSplit extends SourceSplitBase {
 
     private final long recordsToSplit;
 
+    private final int splitIndex;
+
     public LakeSnapshotSplit(
-            TableBucket tableBucket, @Nullable String partitionName, LakeSplit lakeSplit) {
-        this(tableBucket, partitionName, lakeSplit, 0);
+            TableBucket tableBucket,
+            @Nullable String partitionName,
+            LakeSplit lakeSplit,
+            int splitIndex) {
+        this(tableBucket, partitionName, lakeSplit, splitIndex, 0);
     }
 
     public LakeSnapshotSplit(
             TableBucket tableBucket,
             @Nullable String partitionName,
             LakeSplit lakeSplit,
+            int splitIndex,
             long recordsToSplit) {
         super(tableBucket, partitionName);
         this.lakeSplit = lakeSplit;
+        this.splitIndex = splitIndex;
         this.recordsToSplit = recordsToSplit;
     }
 
@@ -55,14 +62,20 @@ public long getRecordsToSplit() {
         return recordsToSplit;
     }
 
+    public int getSplitIndex() {
+        return splitIndex;
+    }
+
     @Override
     public String splitId() {
         return toSplitId(
-                "lake-snapshot-",
-                new TableBucket(
-                        tableBucket.getTableId(),
-                        tableBucket.getPartitionId(),
-                        lakeSplit.bucket()));
+                        "lake-snapshot-",
+                        new TableBucket(
+                                tableBucket.getTableId(),
+                                tableBucket.getPartitionId(),
+                                lakeSplit.bucket()))
+                + "-"
+                + splitIndex;
     }
 
     @Override
@@ -74,4 +87,21 @@ public boolean isLakeSplit() {
     public byte splitKind() {
         return LAKE_SNAPSHOT_SPLIT_KIND;
     }
+
+    @Override
+    public String toString() {
+        return "LakeSnapshotSplit{"
+                + "lakeSplit="
+                + lakeSplit
+                + ", recordsToSplit="
+                + recordsToSplit
+                + ", splitIndex="
+                + splitIndex
+                + ", tableBucket="
+                + tableBucket
+                + ", partitionName='"
+                + partitionName
+                + '\''
+                + '}';
+    }
 }
@@ -43,6 +43,7 @@ public SourceSplitBase toSourceSplit() {
                 split.getTableBucket(),
                 split.getPartitionName(),
                 split.getLakeSplit(),
+                split.getSplitIndex(),
                 recordsToSplit);
     }
 }
@@ -161,8 +161,7 @@ public SplitEnumerator<SourceSplitBase, SourceEnumeratorState> restoreEnumerator
                 splitEnumeratorContext,
                 sourceEnumeratorState.getAssignedBuckets(),
                 sourceEnumeratorState.getAssignedPartitions(),
-                sourceEnumeratorState.getRemainingLakeSnapshotSplits(),
-                sourceEnumeratorState.getTableBucketsOffset(),
+                sourceEnumeratorState.getRemainingHybridLakeFlussSplits(),
                 offsetsInitializer,
                 scanPartitionDiscoveryIntervalMs,
                 streaming,
Original file line number	Diff line number	Diff line change
`@@ -43,6 +43,7 @@ public SourceSplitBase toSourceSplit() {`
`43`	`43`	`split.getTableBucket(),`
`44`	`44`	`split.getPartitionName(),`
`45`	`45`	`split.getLakeSplit(),`
	`46`	`+ split.getSplitIndex(),`
`46`	`47`	`recordsToSplit);`
`47`	`48`	`}`
`48`	`49`	`}`