From bb6dda9354aacc9bc48cf84d7104ed801d01491f Mon Sep 17 00:00:00 2001 From: sivabalan Date: Wed, 13 Apr 2022 14:20:28 -0400 Subject: [PATCH] Fixing fetching partitions in GlueSyncClient --- .../aws/sync/AWSGlueCatalogSyncClient.java | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java index 97e47deed817..81c05ed132a3 100644 --- a/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java +++ b/hudi-aws/src/main/java/org/apache/hudi/aws/sync/AWSGlueCatalogSyncClient.java @@ -94,13 +94,19 @@ public AWSGlueCatalogSyncClient(HiveSyncConfig syncConfig, Configuration hadoopC @Override public List getAllPartitions(String tableName) { try { - GetPartitionsRequest request = new GetPartitionsRequest(); - request.withDatabaseName(databaseName).withTableName(tableName); - GetPartitionsResult result = awsGlue.getPartitions(request); - return result.getPartitions() - .stream() - .map(p -> new Partition(p.getValues(), p.getStorageDescriptor().getLocation())) - .collect(Collectors.toList()); + List partitions = new ArrayList<>(); + String nextToken = null; + do { + GetPartitionsResult result = awsGlue.getPartitions(new GetPartitionsRequest() + .withDatabaseName(databaseName) + .withTableName(tableName) + .withNextToken(nextToken)); + partitions.addAll(result.getPartitions().stream() + .map(p -> new Partition(p.getValues(), p.getStorageDescriptor().getLocation())) + .collect(Collectors.toList())); + nextToken = result.getNextToken(); + } while (nextToken != null); + return partitions; } catch (Exception e) { throw new HoodieGlueSyncException("Failed to get all partitions for table " + tableId(databaseName, tableName), e); }