|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, software |
| 13 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | + * See the License for the specific language governing permissions and |
| 16 | + * limitations under the License. |
| 17 | + */ |
| 18 | + |
| 19 | +package org.apache.fluss.lake.paimon.flink; |
| 20 | + |
| 21 | +import org.apache.fluss.config.ConfigOptions; |
| 22 | +import org.apache.fluss.config.Configuration; |
| 23 | +import org.apache.fluss.config.MemorySize; |
| 24 | +import org.apache.fluss.lake.paimon.testutils.FlinkPaimonTieringTestBase; |
| 25 | +import org.apache.fluss.metadata.TableBucket; |
| 26 | +import org.apache.fluss.metadata.TablePath; |
| 27 | +import org.apache.fluss.row.InternalRow; |
| 28 | +import org.apache.fluss.server.replica.Replica; |
| 29 | +import org.apache.fluss.server.testutils.FlussClusterExtension; |
| 30 | +import org.apache.fluss.utils.clock.ManualClock; |
| 31 | + |
| 32 | +import org.apache.flink.core.execution.JobClient; |
| 33 | +import org.apache.flink.table.api.EnvironmentSettings; |
| 34 | +import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; |
| 35 | +import org.apache.flink.types.Row; |
| 36 | +import org.apache.flink.util.CloseableIterator; |
| 37 | +import org.junit.jupiter.api.BeforeAll; |
| 38 | +import org.junit.jupiter.api.BeforeEach; |
| 39 | +import org.junit.jupiter.api.Test; |
| 40 | +import org.junit.jupiter.api.extension.RegisterExtension; |
| 41 | + |
| 42 | +import java.time.Duration; |
| 43 | +import java.util.ArrayList; |
| 44 | +import java.util.List; |
| 45 | +import java.util.stream.Collectors; |
| 46 | + |
| 47 | +import static org.apache.flink.core.testutils.CommonTestUtils.waitUtil; |
| 48 | +import static org.apache.fluss.flink.FlinkConnectorOptions.BOOTSTRAP_SERVERS; |
| 49 | +import static org.apache.fluss.flink.source.testutils.FlinkRowAssertionsUtils.assertRowResultsIgnoreOrder; |
| 50 | +import static org.apache.fluss.testutils.DataTestUtils.row; |
| 51 | + |
| 52 | +/** The ITCase for Flink union read from a timestamp. */ |
| 53 | +class FlinkUnionReadFromTimestampITCase extends FlinkPaimonTieringTestBase { |
| 54 | + |
| 55 | + private static final ManualClock CLOCK = new ManualClock(); |
| 56 | + |
| 57 | + @RegisterExtension |
| 58 | + public static final FlussClusterExtension FLUSS_CLUSTER_EXTENSION = |
| 59 | + FlussClusterExtension.builder() |
| 60 | + .setClusterConf(initConfig()) |
| 61 | + .setNumOfTabletServers(3) |
| 62 | + .setClock(CLOCK) |
| 63 | + .build(); |
| 64 | + |
| 65 | + private StreamTableEnvironment streamTEnv; |
| 66 | + |
| 67 | + protected static Configuration initConfig() { |
| 68 | + Configuration configuration = FlinkPaimonTieringTestBase.initConfig(); |
| 69 | + // set file size to 10b to make log segment roll frequently |
| 70 | + configuration.set(ConfigOptions.LOG_SEGMENT_FILE_SIZE, MemorySize.parse("10b")); |
| 71 | + configuration.set(ConfigOptions.REMOTE_LOG_TASK_INTERVAL_DURATION, Duration.ofMillis(100)); |
| 72 | + return configuration; |
| 73 | + } |
| 74 | + |
| 75 | + @BeforeAll |
| 76 | + static void beforeAll() { |
| 77 | + FlinkPaimonTieringTestBase.beforeAll(FLUSS_CLUSTER_EXTENSION.getClientConfig()); |
| 78 | + } |
| 79 | + |
| 80 | + @BeforeEach |
| 81 | + public void beforeEach() { |
| 82 | + super.beforeEach(); |
| 83 | + buildStreamTEnv(); |
| 84 | + } |
| 85 | + |
| 86 | + @Override |
| 87 | + protected FlussClusterExtension getFlussClusterExtension() { |
| 88 | + return FLUSS_CLUSTER_EXTENSION; |
| 89 | + } |
| 90 | + |
| 91 | + @Test |
| 92 | + void testUnionReadFromTimestamp() throws Exception { |
| 93 | + // first of all, start tiering |
| 94 | + JobClient jobClient = buildTieringJob(execEnv); |
| 95 | + try { |
| 96 | + String tableName = "logTable_read_timestamp"; |
| 97 | + TablePath tablePath = TablePath.of(DEFAULT_DB, tableName); |
| 98 | + long tableId = createLogTable(tablePath, 1); |
| 99 | + TableBucket t1Bucket = new TableBucket(tableId, 0); |
| 100 | + |
| 101 | + List<Row> rows = new ArrayList<>(); |
| 102 | + for (int i = 0; i < 10; i++) { |
| 103 | + rows.addAll(writeRows(tablePath, 3)); |
| 104 | + // each round advance 1s to make sure each round of writing has |
| 105 | + // different timestamp |
| 106 | + CLOCK.advanceTime(Duration.ofSeconds(1)); |
| 107 | + } |
| 108 | + assertReplicaStatus(t1Bucket, rows.size()); |
| 109 | + |
| 110 | + Replica t1Replica = FLUSS_CLUSTER_EXTENSION.waitAndGetLeaderReplica(t1Bucket); |
| 111 | + |
| 112 | + // wait util only 2(default keep 2 segments in local) log segments in local |
| 113 | + waitUtil( |
| 114 | + () -> t1Replica.getLogTablet().logSegments().size() == 2, |
| 115 | + Duration.ofMinutes(1), |
| 116 | + "Fail to wait util only 2 segments in local."); |
| 117 | + |
| 118 | + // advance 10 days to mock remote log ttl |
| 119 | + CLOCK.advanceTime(Duration.ofDays(10)); |
| 120 | + // wait util remote log ttl, should can't fetch from remote log for offset 10 |
| 121 | + waitUtil( |
| 122 | + () -> !t1Replica.getLogTablet().canFetchFromRemoteLog(10), |
| 123 | + Duration.ofMinutes(1), |
| 124 | + "Fail to wait log offset 10 ttl from remote log."); |
| 125 | + |
| 126 | + // verify scan from timestamp 0, should read full data |
| 127 | + assertRowResultsIgnoreOrder( |
| 128 | + streamTEnv |
| 129 | + .executeSql( |
| 130 | + "select * from " |
| 131 | + + tableName |
| 132 | + + " /*+ OPTIONS('scan.startup.mode' = 'timestamp',\n" |
| 133 | + + "'scan.startup.timestamp' = '0') */") |
| 134 | + .collect(), |
| 135 | + rows, |
| 136 | + true); |
| 137 | + |
| 138 | + // verify scan from timestamp 2000, shouldn't read the rows written in first two |
| 139 | + // rounds, |
| 140 | + CloseableIterator<Row> actualRows = |
| 141 | + streamTEnv |
| 142 | + .executeSql( |
| 143 | + "select * from " |
| 144 | + + tableName |
| 145 | + + " /*+ OPTIONS('scan.startup.mode' = 'timestamp',\n" |
| 146 | + + "'scan.startup.timestamp' = '2000') */") |
| 147 | + .collect(); |
| 148 | + List<Row> expectedRows = rows.stream().skip(2 * 3).collect(Collectors.toList()); |
| 149 | + assertRowResultsIgnoreOrder(actualRows, expectedRows, true); |
| 150 | + |
| 151 | + // verify scan from earliest |
| 152 | + assertRowResultsIgnoreOrder( |
| 153 | + streamTEnv |
| 154 | + .executeSql( |
| 155 | + "select * from " |
| 156 | + + tableName |
| 157 | + + " /*+ OPTIONS('scan.startup.mode' = 'earliest') */") |
| 158 | + .collect(), |
| 159 | + rows, |
| 160 | + true); |
| 161 | + |
| 162 | + } finally { |
| 163 | + jobClient.cancel(); |
| 164 | + } |
| 165 | + } |
| 166 | + |
| 167 | + private List<Row> writeRows(TablePath tablePath, int rows) throws Exception { |
| 168 | + List<InternalRow> writtenRows = new ArrayList<>(); |
| 169 | + List<Row> flinkRow = new ArrayList<>(); |
| 170 | + for (int i = 0; i < rows; i++) { |
| 171 | + writtenRows.add(row(i, "v" + i)); |
| 172 | + flinkRow.add(Row.of(i, "v" + i)); |
| 173 | + } |
| 174 | + writeRows(tablePath, writtenRows, true); |
| 175 | + return flinkRow; |
| 176 | + } |
| 177 | + |
| 178 | + private void buildStreamTEnv() { |
| 179 | + String bootstrapServers = String.join(",", clientConf.get(ConfigOptions.BOOTSTRAP_SERVERS)); |
| 180 | + // create table environment |
| 181 | + streamTEnv = StreamTableEnvironment.create(execEnv, EnvironmentSettings.inStreamingMode()); |
| 182 | + // crate catalog using sql |
| 183 | + streamTEnv.executeSql( |
| 184 | + String.format( |
| 185 | + "create catalog %s with ('type' = 'fluss', '%s' = '%s')", |
| 186 | + CATALOG_NAME, BOOTSTRAP_SERVERS.key(), bootstrapServers)); |
| 187 | + streamTEnv.executeSql("use catalog " + CATALOG_NAME); |
| 188 | + streamTEnv.executeSql("use " + DEFAULT_DB); |
| 189 | + } |
| 190 | +} |
0 commit comments