From de11dddfa456fa50f3adf1fd4da4a588a5308986 Mon Sep 17 00:00:00 2001 From: johnnyaug Date: Mon, 31 Jul 2023 14:54:31 +0300 Subject: [PATCH 1/3] Docs update: metaclient version --- docs/howto/export.md | 2 +- docs/howto/garbage-collection-committed.md | 15 ++++++++------- docs/howto/garbage-collection-uncommitted.md | 5 +++-- docs/howto/garbage-collection.md | 10 +++++----- docs/reference/spark-client.md | 8 ++++---- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/docs/howto/export.md b/docs/howto/export.md index 8e38eb7ac3d..1677111b0ac 100644 --- a/docs/howto/export.md +++ b/docs/howto/export.md @@ -56,7 +56,7 @@ The complete `spark-submit` command would look as follows: spark-submit --conf spark.hadoop.lakefs.api.url=https:///api/v1 \ --conf spark.hadoop.lakefs.api.access_key= \ --conf spark.hadoop.lakefs.api.secret_key= \ - --packages io.lakefs:lakefs-spark-client-301_2.12:0.9.0 \ + --packages io.lakefs:lakefs-spark-client-301_2.12:0.9.1 \ --class io.treeverse.clients.Main export-app example-repo s3://example-bucket/prefix \ --branch=example-branch ``` diff --git a/docs/howto/garbage-collection-committed.md b/docs/howto/garbage-collection-committed.md index ccb6fb1270d..3372094226d 100644 --- a/docs/howto/garbage-collection-committed.md +++ b/docs/howto/garbage-collection-committed.md @@ -11,8 +11,9 @@ has_children: false {: .warning-title } > Deprecation notice -> -> This page describes a deprecated feature. Please visit the new [garbage collection documentation](./garbage-collection.md). +> +> This feature will be supported up to version 0.9.1 of the lakeFS metadata client. It will be discontinued in subsequent versions. +> Please visit the new [garbage collection documentation](./garbage-collection.md). By default, lakeFS keeps all your objects forever. This allows you to travel back in time to previous versions of your data. However, sometimes you may want to hard-delete your objects - namely, delete them from the underlying storage. @@ -115,7 +116,7 @@ spark-submit --class io.treeverse.clients.GarbageCollector \ -c spark.hadoop.lakefs.api.secret_key= \ -c spark.hadoop.fs.s3a.access.key= \ -c spark.hadoop.fs.s3a.secret.key= \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar \ example-repo us-east-1 ``` @@ -128,7 +129,7 @@ spark-submit --class io.treeverse.clients.GarbageCollector \ -c spark.hadoop.lakefs.api.secret_key= \ -c spark.hadoop.fs.s3a.access.key= \ -c spark.hadoop.fs.s3a.secret.key= \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-301/0.9.0/lakefs-spark-client-301-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-301/0.9.1/lakefs-spark-client-301-assembly-0.9.1.jar \ example-repo us-east-1 ``` @@ -144,7 +145,7 @@ spark-submit --class io.treeverse.clients.GarbageCollector \ -c spark.hadoop.lakefs.api.access_key= \ -c spark.hadoop.lakefs.api.secret_key= \ -c spark.hadoop.fs.azure.account.key..dfs.core.windows.net= \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar \ example-repo ``` @@ -161,7 +162,7 @@ spark-submit --class io.treeverse.clients.GarbageCollector \ -c spark.hadoop.fs.azure.account.oauth2.client.id..dfs.core.windows.net= \ -c spark.hadoop.fs.azure.account.oauth2.client.secret..dfs.core.windows.net= \ -c spark.hadoop.fs.azure.account.oauth2.client.endpoint..dfs.core.windows.net=https://login.microsoftonline.com//oauth2/token \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar \ example-repo ``` @@ -189,7 +190,7 @@ spark-submit --class io.treeverse.clients.GarbageCollector \ -c spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem \ -c spark.hadoop.fs.AbstractFileSystem.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS \ -c spark.hadoop.lakefs.gc.do_sweep=false \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar \ example-repo ``` diff --git a/docs/howto/garbage-collection-uncommitted.md b/docs/howto/garbage-collection-uncommitted.md index 9957d86f6be..c1d21ab77f2 100644 --- a/docs/howto/garbage-collection-uncommitted.md +++ b/docs/howto/garbage-collection-uncommitted.md @@ -11,8 +11,9 @@ has_children: false {: .warning-title } > Deprecation notice -> -> This page describes a deprecated feature. Please visit the new [garbage collection documentation](./garbage-collection.md). +> +> This feature will be supported up to version 0.9.1 of the lakeFS metadata client. It will be discontinued in subsequent versions. +> Please visit the new [garbage collection documentation](./garbage-collection.md). Deletion of objects that were never committed was always a difficulty for lakeFS, see [#1933](https://github.com/treeverse/lakeFS/issues/1933) for more details. Examples for diff --git a/docs/howto/garbage-collection.md b/docs/howto/garbage-collection.md index 5ab2e07abce..044a72c17e6 100644 --- a/docs/howto/garbage-collection.md +++ b/docs/howto/garbage-collection.md @@ -116,7 +116,7 @@ spark-submit --class io.treeverse.gc.GarbageCollection \ -c spark.hadoop.lakefs.api.secret_key= \ -c spark.hadoop.fs.s3a.access.key= \ -c spark.hadoop.fs.s3a.secret.key= \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar \ example-repo us-east-1 ``` @@ -129,7 +129,7 @@ spark-submit --class io.treeverse.gc.GarbageCollection \ -c spark.hadoop.lakefs.api.secret_key= \ -c spark.hadoop.fs.s3a.access.key= \ -c spark.hadoop.fs.s3a.secret.key= \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-301/0.9.0/lakefs-spark-client-301-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-301/0.9.1/lakefs-spark-client-301-assembly-0.9.1.jar \ example-repo us-east-1 ``` @@ -145,7 +145,7 @@ spark-submit --class io.treeverse.gc.GarbageCollection \ -c spark.hadoop.lakefs.api.access_key= \ -c spark.hadoop.lakefs.api.secret_key= \ -c spark.hadoop.fs.azure.account.key..dfs.core.windows.net= \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar \ example-repo ``` @@ -162,7 +162,7 @@ spark-submit --class io.treeverse.gc.GarbageCollection \ -c spark.hadoop.fs.azure.account.oauth2.client.id..dfs.core.windows.net= \ -c spark.hadoop.fs.azure.account.oauth2.client.secret..dfs.core.windows.net= \ -c spark.hadoop.fs.azure.account.oauth2.client.endpoint..dfs.core.windows.net=https://login.microsoftonline.com//oauth2/token \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar \ example-repo ``` @@ -190,7 +190,7 @@ spark-submit --class io.treeverse.gc.GarbageCollection \ -c spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem \ -c spark.hadoop.fs.AbstractFileSystem.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS \ -c spark.hadoop.lakefs.gc.do_sweep=false \ - http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar \ + http://treeverse-clients-us-east.s3-website-us-east-1.amazonaws.com/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar \ example-repo ``` diff --git a/docs/reference/spark-client.md b/docs/reference/spark-client.md index fcbf879be87..082ac187457 100644 --- a/docs/reference/spark-client.md +++ b/docs/reference/spark-client.md @@ -33,11 +33,11 @@ Start Spark Shell / PySpark with the `--packages` flag: higher versions. ```bash - spark-shell --packages io.lakefs:lakefs-spark-client-301_2.12:0.9.0 + spark-shell --packages io.lakefs:lakefs-spark-client-301_2.12:0.9.1 ``` Alternatively an assembled jar is available on S3, at - `s3://treeverse-clients-us-east/lakefs-spark-client-301/0.9.0/lakefs-spark-client-301-assembly-0.9.0.jar` + `s3://treeverse-clients-us-east/lakefs-spark-client-301/0.9.1/lakefs-spark-client-301-assembly-0.9.1.jar`
@@ -45,11 +45,11 @@ Start Spark Shell / PySpark with the `--packages` flag: versions and higher Hadoop versions. ```bash - spark-shell --packages io.lakefs:lakefs-spark-client-312-hadoop3-assembly_2.12:0.9.0 + spark-shell --packages io.lakefs:lakefs-spark-client-312-hadoop3-assembly_2.12:0.9.1 ``` Alternatively an assembled jar is available on S3, at - `s3://treeverse-clients-us-east/lakefs-spark-client-312-hadoop3/0.9.0/lakefs-spark-client-312-hadoop3-assembly-0.9.0.jar` + `s3://treeverse-clients-us-east/lakefs-spark-client-312-hadoop3/0.9.1/lakefs-spark-client-312-hadoop3-assembly-0.9.1.jar`
## Configuration From d9ac58c6c37332b8f3af0064139f79d8450a2073 Mon Sep 17 00:00:00 2001 From: Yoni Date: Mon, 31 Jul 2023 15:02:29 +0300 Subject: [PATCH 2/3] Update garbage-collection-committed.md --- docs/howto/garbage-collection-committed.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/howto/garbage-collection-committed.md b/docs/howto/garbage-collection-committed.md index 3372094226d..c7f1a07009d 100644 --- a/docs/howto/garbage-collection-committed.md +++ b/docs/howto/garbage-collection-committed.md @@ -12,7 +12,7 @@ has_children: false {: .warning-title } > Deprecation notice > -> This feature will be supported up to version 0.9.1 of the lakeFS metadata client. It will be discontinued in subsequent versions. +> This feature will be available up to version 0.9.1 of the lakeFS metadata client. It will be discontinued in subsequent versions. > Please visit the new [garbage collection documentation](./garbage-collection.md). By default, lakeFS keeps all your objects forever. This allows you to travel back in time to previous versions of your data. From 3c4d2c1380da8ed5a523513fd5b79d236e48142a Mon Sep 17 00:00:00 2001 From: Yoni Date: Mon, 31 Jul 2023 15:02:54 +0300 Subject: [PATCH 3/3] Update garbage-collection-uncommitted.md --- docs/howto/garbage-collection-uncommitted.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/howto/garbage-collection-uncommitted.md b/docs/howto/garbage-collection-uncommitted.md index c1d21ab77f2..1aaff9ca7e7 100644 --- a/docs/howto/garbage-collection-uncommitted.md +++ b/docs/howto/garbage-collection-uncommitted.md @@ -12,7 +12,7 @@ has_children: false {: .warning-title } > Deprecation notice > -> This feature will be supported up to version 0.9.1 of the lakeFS metadata client. It will be discontinued in subsequent versions. +> This feature will be available up to version 0.9.1 of the lakeFS metadata client. It will be discontinued in subsequent versions. > Please visit the new [garbage collection documentation](./garbage-collection.md). Deletion of objects that were never committed was always a difficulty for lakeFS, see