The name of the latest metadata.json rewritten to staging location. After the files are
* copied, this will be the root of the copied table.
- *
A list of all files added to the table between startVersion and endVersion, including their
- * original and target paths under the target prefix. This list covers both original and
- * rewritten files, allowing for copying to the target paths to form the copied table.
+ *
A 'copy-plan'. This is a list of all files added to the table between startVersion and
+ * endVersion, including their original and target paths under the target prefix. This list
+ * covers both original and rewritten files, allowing for copying to the target paths from the
+ * copied table.
*
*/
public interface RewriteTablePath extends Action {
@@ -91,9 +92,10 @@ interface Result {
String stagingLocation();
/**
- * Path to a comma-separated list of source and target paths for all files added to the table
- * between startVersion and endVersion, including original data files and metadata files
- * rewritten to staging.
+ * Result file list location. This file contains a 'copy plan', a comma-separated list of all
+ * files added to the table between startVersion and endVersion, including their original and
+ * target paths under the target prefix. This list covers both original and rewritten files,
+ * allowing for copying to the target paths from the copied table.
*/
String fileListLocation();
diff --git a/core/src/main/java/org/apache/iceberg/ManifestLists.java b/core/src/main/java/org/apache/iceberg/ManifestLists.java
index f20a481cf25a..113cb32df3d9 100644
--- a/core/src/main/java/org/apache/iceberg/ManifestLists.java
+++ b/core/src/main/java/org/apache/iceberg/ManifestLists.java
@@ -28,10 +28,10 @@
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
-class ManifestLists {
+public class ManifestLists {
private ManifestLists() {}
- static List read(InputFile manifestList) {
+ public static List read(InputFile manifestList) {
try (CloseableIterable files =
Avro.read(manifestList)
.rename("manifest_file", GenericManifestFile.class.getName())
@@ -50,7 +50,7 @@ static List read(InputFile manifestList) {
}
}
- static ManifestListWriter write(
+ public static ManifestListWriter write(
int formatVersion,
OutputFile manifestListFile,
long snapshotId,
diff --git a/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java
new file mode 100644
index 000000000000..4eea28d956ed
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/RewriteTablePathUtil.java
@@ -0,0 +1,304 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.deletes.PositionDelete;
+import org.apache.iceberg.deletes.PositionDeleteWriter;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.CloseableIterator;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.util.Pair;
+
+public class RewriteTablePathUtil {
+
+ public static List> rewriteManifest(
+ FileIO io,
+ int format,
+ PartitionSpec spec,
+ OutputFile outputFile,
+ ManifestFile manifestFile,
+ Map specsById,
+ String sourcePrefix,
+ String targetPrefix)
+ throws IOException {
+ try (ManifestWriter writer =
+ ManifestFiles.write(format, spec, outputFile, manifestFile.snapshotId());
+ ManifestReader reader =
+ ManifestFiles.read(manifestFile, io, specsById).select(Arrays.asList("*"))) {
+ return StreamSupport.stream(reader.entries().spliterator(), false)
+ .map(entry -> newDataFile(entry, spec, sourcePrefix, targetPrefix, writer))
+ .collect(Collectors.toList());
+ }
+ }
+
+ public static List> rewriteDeleteManifest(
+ FileIO io,
+ int format,
+ PartitionSpec spec,
+ OutputFile outputFile,
+ ManifestFile manifestFile,
+ Map specsById,
+ String sourcePrefix,
+ String targetPrefix,
+ String stagingLocation,
+ PositionDeleteReaderWriter positionDeleteReaderWriter)
+ throws IOException {
+ try (ManifestWriter writer =
+ ManifestFiles.writeDeleteManifest(format, spec, outputFile, manifestFile.snapshotId());
+ ManifestReader reader =
+ ManifestFiles.readDeleteManifest(manifestFile, io, specsById)
+ .select(Arrays.asList("*"))) {
+ return StreamSupport.stream(reader.entries().spliterator(), false)
+ .map(
+ entry -> {
+ try {
+ return newDeleteFile(
+ entry,
+ io,
+ spec,
+ sourcePrefix,
+ targetPrefix,
+ stagingLocation,
+ writer,
+ positionDeleteReaderWriter);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ })
+ .collect(Collectors.toList());
+ }
+ }
+
+ private static Pair newDataFile(
+ ManifestEntry entry,
+ PartitionSpec spec,
+ String sourcePrefix,
+ String targetPrefix,
+ ManifestWriter writer) {
+ DataFile dataFile = entry.file();
+ String sourceDataFilePath = dataFile.location();
+ Preconditions.checkArgument(
+ sourceDataFilePath.startsWith(sourcePrefix),
+ "Encountered data file %s not under the source prefix %s",
+ sourceDataFilePath,
+ sourcePrefix);
+ String targetDataFilePath = newPath(sourceDataFilePath, sourcePrefix, targetPrefix);
+ DataFile newDataFile =
+ DataFiles.builder(spec).copy(entry.file()).withPath(targetDataFilePath).build();
+ appendEntryWithFile(entry, writer, newDataFile);
+ return Pair.of(sourceDataFilePath, newDataFile.location());
+ }
+
+ private static Pair newDeleteFile(
+ ManifestEntry entry,
+ FileIO io,
+ PartitionSpec spec,
+ String sourcePrefix,
+ String targetPrefix,
+ String stagingLocation,
+ ManifestWriter writer,
+ PositionDeleteReaderWriter posDeleteReaderWriter)
+ throws IOException {
+
+ DeleteFile file = entry.file();
+
+ switch (file.content()) {
+ case POSITION_DELETES:
+ DeleteFile posDeleteFile =
+ rewritePositionDeleteFile(
+ io, file, spec, sourcePrefix, stagingLocation, targetPrefix, posDeleteReaderWriter);
+ String targetDeleteFilePath = newPath(file.location(), sourcePrefix, targetPrefix);
+ DeleteFile movedFile =
+ FileMetadata.deleteFileBuilder(spec)
+ .copy(posDeleteFile)
+ .withPath(targetDeleteFilePath)
+ .build();
+ appendEntryWithFile(entry, writer, movedFile);
+ return Pair.of(posDeleteFile.location(), movedFile.location());
+ case EQUALITY_DELETES:
+ DeleteFile eqDeleteFile = newEqualityDeleteFile(file, spec, sourcePrefix, targetPrefix);
+ appendEntryWithFile(entry, writer, eqDeleteFile);
+ return Pair.of(file.location(), eqDeleteFile.location());
+ default:
+ throw new UnsupportedOperationException("Unsupported delete file type: " + file.content());
+ }
+ }
+
+ private static > void appendEntryWithFile(
+ ManifestEntry entry, ManifestWriter writer, F file) {
+
+ switch (entry.status()) {
+ case ADDED:
+ writer.add(file);
+ break;
+ case EXISTING:
+ writer.existing(
+ file, entry.snapshotId(), entry.dataSequenceNumber(), entry.fileSequenceNumber());
+ break;
+ case DELETED:
+ writer.delete(file, entry.dataSequenceNumber(), entry.fileSequenceNumber());
+ break;
+ }
+ }
+
+ public interface PositionDeleteReaderWriter {
+ CloseableIterable reader(InputFile inputFile, FileFormat format, PartitionSpec spec);
+
+ PositionDeleteWriter writer(
+ OutputFile outputFile,
+ FileFormat format,
+ PartitionSpec spec,
+ StructLike partition,
+ Schema rowSchema)
+ throws IOException;
+ }
+
+ private static DeleteFile rewritePositionDeleteFile(
+ FileIO io,
+ DeleteFile current,
+ PartitionSpec spec,
+ String sourcePrefix,
+ String stagingLocation,
+ String targetPrefix,
+ PositionDeleteReaderWriter posDeleteReaderWriter)
+ throws IOException {
+ String path = current.location();
+ if (!path.startsWith(sourcePrefix)) {
+ throw new UnsupportedOperationException(
+ "Expected delete file to be under the source prefix: "
+ + sourcePrefix
+ + " but was "
+ + path);
+ }
+ String newPath = stagingPath(path, stagingLocation);
+
+ OutputFile targetFile = io.newOutputFile(newPath);
+ InputFile sourceFile = io.newInputFile(path);
+
+ try (CloseableIterable reader =
+ posDeleteReaderWriter.reader(sourceFile, current.format(), spec)) {
+ org.apache.iceberg.data.Record record = null;
+ Schema rowSchema = null;
+ CloseableIterator recordIt = reader.iterator();
+
+ if (recordIt.hasNext()) {
+ record = recordIt.next();
+ rowSchema = record.get(2) != null ? spec.schema() : null;
+ }
+
+ PositionDeleteWriter writer =
+ posDeleteReaderWriter.writer(
+ targetFile, current.format(), spec, current.partition(), rowSchema);
+
+ try (writer) {
+ if (record != null) {
+ writer.write(newPositionDeleteRecord(record, sourcePrefix, targetPrefix));
+ }
+
+ while (recordIt.hasNext()) {
+ record = recordIt.next();
+ writer.write(newPositionDeleteRecord(record, sourcePrefix, targetPrefix));
+ }
+ }
+ return writer.toDeleteFile();
+ }
+ }
+
+ private static PositionDelete newPositionDeleteRecord(
+ Record record, String sourcePrefix, String targetPrefix) {
+ PositionDelete delete = PositionDelete.create();
+ String oldPath = (String) record.get(0);
+ String newPath = oldPath;
+ if (oldPath.startsWith(sourcePrefix)) {
+ newPath = newPath(oldPath, sourcePrefix, targetPrefix);
+ }
+ delete.set(newPath, (Long) record.get(1), record.get(2));
+ return delete;
+ }
+
+ private static DeleteFile newEqualityDeleteFile(
+ DeleteFile file, PartitionSpec spec, String sourcePrefix, String targetPrefix) {
+ String path = file.location();
+
+ if (!path.startsWith(sourcePrefix)) {
+ throw new UnsupportedOperationException(
+ "Expected delete file to be under the source prefix: "
+ + sourcePrefix
+ + " but was "
+ + path);
+ }
+ int[] equalityFieldIds = file.equalityFieldIds().stream().mapToInt(Integer::intValue).toArray();
+ String newPath = newPath(path, sourcePrefix, targetPrefix);
+ return FileMetadata.deleteFileBuilder(spec)
+ .ofEqualityDeletes(equalityFieldIds)
+ .copy(file)
+ .withPath(newPath)
+ .withSplitOffsets(file.splitOffsets())
+ .build();
+ }
+
+ private static String newPath(String path, String sourcePrefix, String targetPrefix) {
+ return combinePaths(targetPrefix, relativize(path, sourcePrefix));
+ }
+
+ private static String combinePaths(String absolutePath, String relativePath) {
+ String combined = absolutePath;
+ if (!combined.endsWith("/")) {
+ combined += "/";
+ }
+ combined += relativePath;
+ return combined;
+ }
+
+ private static String fileName(String path) {
+ String filename = path;
+ int lastIndex = path.lastIndexOf(File.separator);
+ if (lastIndex != -1) {
+ filename = path.substring(lastIndex + 1);
+ }
+ return filename;
+ }
+
+ private static String relativize(String path, String prefix) {
+ String toRemove = prefix;
+ if (!toRemove.endsWith("/")) {
+ toRemove += "/";
+ }
+ if (!path.startsWith(toRemove)) {
+ throw new IllegalArgumentException(
+ String.format("Path %s does not start with %s", path, toRemove));
+ }
+ return path.substring(toRemove.length());
+ }
+
+ private static String stagingPath(String originalPath, String stagingLocation) {
+ return stagingLocation + fileName(originalPath);
+ }
+}
diff --git a/core/src/main/java/org/apache/iceberg/TableMetadataUtil.java b/core/src/main/java/org/apache/iceberg/TableMetadataUtil.java
new file mode 100644
index 000000000000..474e28b5e31e
--- /dev/null
+++ b/core/src/main/java/org/apache/iceberg/TableMetadataUtil.java
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.iceberg.TableMetadata.MetadataLogEntry;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Maps;
+
+public class TableMetadataUtil {
+ private TableMetadataUtil() {}
+
+ public static TableMetadata replacePaths(
+ TableMetadata metadata, String sourcePrefix, String targetPrefix) {
+ String newLocation = newPath(metadata.location(), sourcePrefix, targetPrefix);
+ List newSnapshots = updatePathInSnapshots(metadata, sourcePrefix, targetPrefix);
+ List metadataLogEntries =
+ updatePathInMetadataLogs(metadata, sourcePrefix, targetPrefix);
+ long snapshotId =
+ metadata.currentSnapshot() == null ? -1 : metadata.currentSnapshot().snapshotId();
+ Map properties =
+ updateProperties(metadata.properties(), sourcePrefix, targetPrefix);
+
+ return new TableMetadata(
+ null,
+ metadata.formatVersion(),
+ metadata.uuid(),
+ newLocation,
+ metadata.lastSequenceNumber(),
+ metadata.lastUpdatedMillis(),
+ metadata.lastColumnId(),
+ metadata.currentSchemaId(),
+ metadata.schemas(),
+ metadata.defaultSpecId(),
+ metadata.specs(),
+ metadata.lastAssignedPartitionId(),
+ metadata.defaultSortOrderId(),
+ metadata.sortOrders(),
+ properties,
+ snapshotId,
+ newSnapshots,
+ null,
+ metadata.snapshotLog(),
+ metadataLogEntries,
+ metadata.refs(),
+ metadata.statisticsFiles(),
+ metadata.partitionStatisticsFiles(),
+ metadata.changes());
+ }
+
+ private static Map updateProperties(
+ Map tableProperties, String sourcePrefix, String targetPrefix) {
+ Map properties = Maps.newHashMap(tableProperties);
+ updatePathInProperty(properties, sourcePrefix, targetPrefix, TableProperties.OBJECT_STORE_PATH);
+ updatePathInProperty(
+ properties, sourcePrefix, targetPrefix, TableProperties.WRITE_FOLDER_STORAGE_LOCATION);
+ updatePathInProperty(
+ properties, sourcePrefix, targetPrefix, TableProperties.WRITE_DATA_LOCATION);
+ updatePathInProperty(
+ properties, sourcePrefix, targetPrefix, TableProperties.WRITE_METADATA_LOCATION);
+
+ return properties;
+ }
+
+ private static void updatePathInProperty(
+ Map properties,
+ String sourcePrefix,
+ String targetPrefix,
+ String propertyName) {
+ if (properties.containsKey(propertyName)) {
+ properties.put(
+ propertyName, newPath(properties.get(propertyName), sourcePrefix, targetPrefix));
+ }
+ }
+
+ private static List updatePathInMetadataLogs(
+ TableMetadata metadata, String sourcePrefix, String targetPrefix) {
+ List metadataLogEntries =
+ Lists.newArrayListWithCapacity(metadata.previousFiles().size());
+ for (MetadataLogEntry metadataLog : metadata.previousFiles()) {
+ MetadataLogEntry newMetadataLog =
+ new MetadataLogEntry(
+ metadataLog.timestampMillis(),
+ newPath(metadataLog.file(), sourcePrefix, targetPrefix));
+ metadataLogEntries.add(newMetadataLog);
+ }
+ return metadataLogEntries;
+ }
+
+ private static List updatePathInSnapshots(
+ TableMetadata metadata, String sourcePrefix, String targetPrefix) {
+ List newSnapshots = Lists.newArrayListWithCapacity(metadata.snapshots().size());
+ for (Snapshot snapshot : metadata.snapshots()) {
+ String newManifestListLocation =
+ newPath(snapshot.manifestListLocation(), sourcePrefix, targetPrefix);
+ Snapshot newSnapshot =
+ new BaseSnapshot(
+ snapshot.sequenceNumber(),
+ snapshot.snapshotId(),
+ snapshot.parentId(),
+ snapshot.timestampMillis(),
+ snapshot.operation(),
+ snapshot.summary(),
+ snapshot.schemaId(),
+ newManifestListLocation);
+ newSnapshots.add(newSnapshot);
+ }
+ return newSnapshots;
+ }
+
+ private static String newPath(String path, String sourcePrefix, String targetPrefix) {
+ return path.replaceFirst(sourcePrefix, targetPrefix);
+ }
+}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java
index 53ce7418f3ec..b91331b0f70f 100644
--- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/BaseSparkAction.java
@@ -139,6 +139,11 @@ protected Table newStaticTable(TableMetadata metadata, FileIO io) {
return new BaseTable(ops, metadata.metadataFileLocation());
}
+ protected Table newStaticTable(String metadataFileLocation, FileIO io) {
+ StaticTableOperations ops = new StaticTableOperations(metadataFileLocation, io);
+ return new BaseTable(ops, metadataFileLocation);
+ }
+
protected Dataset contentFileDS(Table table) {
return contentFileDS(table, null);
}
diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteTablePathSparkAction.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteTablePathSparkAction.java
new file mode 100644
index 000000000000..e3a64ad8a1a9
--- /dev/null
+++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteTablePathSparkAction.java
@@ -0,0 +1,735 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.iceberg.spark.actions;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.UUID;
+import java.util.stream.Collectors;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.HasTableOperations;
+import org.apache.iceberg.ManifestFile;
+import org.apache.iceberg.ManifestLists;
+import org.apache.iceberg.PartitionSpec;
+import org.apache.iceberg.RewriteTablePathUtil;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.SerializableTable;
+import org.apache.iceberg.Snapshot;
+import org.apache.iceberg.StaticTableOperations;
+import org.apache.iceberg.StructLike;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.TableMetadata;
+import org.apache.iceberg.TableMetadata.MetadataLogEntry;
+import org.apache.iceberg.TableMetadataParser;
+import org.apache.iceberg.TableMetadataUtil;
+import org.apache.iceberg.actions.ImmutableRewriteTablePath;
+import org.apache.iceberg.actions.RewriteTablePath;
+import org.apache.iceberg.avro.Avro;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.data.avro.DataReader;
+import org.apache.iceberg.data.avro.DataWriter;
+import org.apache.iceberg.data.orc.GenericOrcReader;
+import org.apache.iceberg.data.orc.GenericOrcWriter;
+import org.apache.iceberg.data.parquet.GenericParquetReaders;
+import org.apache.iceberg.data.parquet.GenericParquetWriter;
+import org.apache.iceberg.deletes.PositionDeleteWriter;
+import org.apache.iceberg.exceptions.RuntimeIOException;
+import org.apache.iceberg.io.CloseableIterable;
+import org.apache.iceberg.io.DeleteSchemaUtil;
+import org.apache.iceberg.io.FileAppender;
+import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.io.InputFile;
+import org.apache.iceberg.io.OutputFile;
+import org.apache.iceberg.orc.ORC;
+import org.apache.iceberg.parquet.Parquet;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.spark.JobGroupInfo;
+import org.apache.iceberg.util.Pair;
+import org.apache.spark.api.java.function.MapPartitionsFunction;
+import org.apache.spark.broadcast.Broadcast;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Encoder;
+import org.apache.spark.sql.Encoders;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.SaveMode;
+import org.apache.spark.sql.SparkSession;
+import org.apache.spark.sql.functions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import scala.Tuple2;
+
+public class RewriteTablePathSparkAction extends BaseSparkAction
+ implements RewriteTablePath {
+
+ private static final Logger LOG = LoggerFactory.getLogger(RewriteTablePathSparkAction.class);
+ private static final String RESULT_LOCATION = "file-list";
+
+ private String sourcePrefix;
+ private String targetPrefix;
+ private String startVersionName;
+ private String endVersionName;
+ private String stagingDir;
+
+ private final Table table;
+
+ RewriteTablePathSparkAction(SparkSession spark, Table table) {
+ super(spark);
+ this.table = table;
+ }
+
+ @Override
+ protected RewriteTablePath self() {
+ return this;
+ }
+
+ @Override
+ public RewriteTablePath rewriteLocationPrefix(String sPrefix, String tPrefix) {
+ Preconditions.checkArgument(
+ sPrefix != null && !sPrefix.isEmpty(), "Source prefix('%s') cannot be empty.", sPrefix);
+ this.sourcePrefix = sPrefix;
+ this.targetPrefix = tPrefix;
+ return this;
+ }
+
+ @Override
+ public RewriteTablePath startVersion(String sVersion) {
+ Preconditions.checkArgument(
+ sVersion != null && !sVersion.trim().isEmpty(),
+ "Last copied version('%s') cannot be empty.",
+ sVersion);
+ this.startVersionName = sVersion;
+ return this;
+ }
+
+ @Override
+ public RewriteTablePath endVersion(String eVersion) {
+ Preconditions.checkArgument(
+ eVersion != null && !eVersion.trim().isEmpty(),
+ "End version('%s') cannot be empty.",
+ eVersion);
+ this.endVersionName = eVersion;
+ return this;
+ }
+
+ @Override
+ public RewriteTablePath stagingLocation(String stagingLocation) {
+ Preconditions.checkArgument(
+ stagingLocation != null && !stagingLocation.isEmpty(),
+ "Staging location('%s') cannot be empty.",
+ stagingLocation);
+ this.stagingDir = stagingLocation;
+ return this;
+ }
+
+ @Override
+ public Result execute() {
+ validateInputs();
+ JobGroupInfo info = newJobGroupInfo("COPY-TABLE", jobDesc());
+ return withJobGroupInfo(info, this::doExecute);
+ }
+
+ private Result doExecute() {
+ String resultLocation = rebuildMetadata();
+ return ImmutableRewriteTablePath.Result.builder()
+ .stagingLocation(stagingDir)
+ .fileListLocation(resultLocation)
+ .latestVersion(fileName(endVersionName))
+ .build();
+ }
+
+ private void validateInputs() {
+ Preconditions.checkArgument(
+ sourcePrefix != null && !sourcePrefix.isEmpty(),
+ "Source prefix('%s') cannot be empty.",
+ sourcePrefix);
+ Preconditions.checkArgument(
+ targetPrefix != null && !targetPrefix.isEmpty(),
+ "Target prefix('%s') cannot be empty.",
+ targetPrefix);
+ Preconditions.checkArgument(
+ !sourcePrefix.equals(targetPrefix),
+ "Source prefix cannot be the same as target prefix (%s)",
+ sourcePrefix);
+
+ validateAndSetEndVersion();
+ validateAndSetStartVersion();
+
+ if (stagingDir == null) {
+ stagingDir = getMetadataLocation(table) + "copy-table-staging-" + UUID.randomUUID() + "/";
+ } else if (!stagingDir.endsWith("/")) {
+ stagingDir = stagingDir + "/";
+ }
+ }
+
+ private void validateAndSetEndVersion() {
+ TableMetadata tableMetadata = ((HasTableOperations) table).operations().current();
+
+ if (endVersionName == null) {
+ LOG.info("No end version specified. Will stage all files to the latest table version.");
+ Preconditions.checkNotNull(tableMetadata.metadataFileLocation());
+ this.endVersionName = tableMetadata.metadataFileLocation();
+ } else {
+ this.endVersionName = validateVersion(tableMetadata, endVersionName);
+ }
+ }
+
+ private void validateAndSetStartVersion() {
+ TableMetadata tableMetadata = ((HasTableOperations) table).operations().current();
+
+ if (startVersionName != null) {
+ this.startVersionName = validateVersion(tableMetadata, startVersionName);
+ }
+ }
+
+ private String validateVersion(TableMetadata tableMetadata, String versionFileName) {
+ String versionFile = versionFile(tableMetadata, versionFileName);
+
+ Preconditions.checkNotNull(
+ versionFile, "Version file %s does not exist in metadata log.", versionFile);
+ Preconditions.checkArgument(
+ fileExist(versionFile), "Version file %s does not exist.", versionFile);
+ return versionFile;
+ }
+
+ private String versionFile(TableMetadata metadata, String versionFileName) {
+ if (versionInFilePath(metadata.metadataFileLocation(), versionFileName)) {
+ return metadata.metadataFileLocation();
+ }
+
+ for (MetadataLogEntry log : metadata.previousFiles()) {
+ if (versionInFilePath(log.file(), versionFileName)) {
+ return log.file();
+ }
+ }
+ return null;
+ }
+
+ private boolean versionInFilePath(String path, String version) {
+ return fileName(path).equals(version);
+ }
+
+ private String jobDesc() {
+ if (startVersionName != null) {
+ return String.format(
+ "Replacing path prefixes '%s' with '%s' in the metadata files of table %s,"
+ + "up to version '%s'.",
+ sourcePrefix, targetPrefix, table.name(), endVersionName);
+ } else {
+ return String.format(
+ "Replacing path prefixes '%s' with '%s' in the metadata files of table %s,"
+ + "from version '%s' to '%s'.",
+ sourcePrefix, targetPrefix, table.name(), startVersionName, endVersionName);
+ }
+ }
+
+ /**
+ *
+ *
+ *