Fix

apache · Apr 12, 2021 · b39af10 · b39af10
1 parent ad2754b
commit b39af10
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 14 deletions.
diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
@@ -107,7 +107,7 @@ For example, you can control bloom filters and dictionary encodings for ORC data
 The following ORC example will create bloom filter and use dictionary encoding only for `favorite_color`.
 For Parquet, there exists `parquet.bloom.filter.enabled` and `parquet.enable.dictionary`, too.
 To find more detailed information about the extra ORC/Parquet options,
-visit the official Apache ORC/Parquet websites.
+visit the official Apache [ORC](https://orc.apache.org/docs/spark-config.html) / [Parquet](https://github.com/apache/parquet-mr/tree/master/parquet-hadoop) websites.
 
 ORC data source:
 
@@ -172,15 +172,15 @@ Parquet data source:
 
 {% highlight sql %}
 CREATE TABLE users_with_options (
-name STRING,
-favorite_color STRING,
-favorite_numbers array<integer>
+  name STRING,
+  favorite_color STRING,
+  favorite_numbers array<integer>
 ) USING parquet
 OPTIONS (
-`parquet.bloom.filter.enabled#favorite_color` true,
-`parquet.bloom.filter.expected.ndv#favorite_color` 1000000,
-parquet.enable.dictionary true,
-parquet.page.write-checksum.enabled true
+  `parquet.bloom.filter.enabled#favorite_color` true,
+  `parquet.bloom.filter.expected.ndv#favorite_color` 1000000,
+  parquet.enable.dictionary true,
+  parquet.page.write-checksum.enabled true
 )
 {% endhighlight %}
 

diff --git a/...rc/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/...rc/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -1637,15 +1637,16 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
 
   test("SPARK-34562: Bloom filter push down") {
     withTempPath { dir =>
-      val path = dir.toURI.toString
-      spark.range(100).selectExpr("id * 2 AS id").write
-        .option(ParquetOutputFormat.BLOOM_FILTER_ENABLED + "#id", "true")
+      val path = dir.getCanonicalPath
+      spark.range(100).selectExpr("id * 2 AS id")
+        .write
+        .option(ParquetOutputFormat.BLOOM_FILTER_ENABLED + "#id", true)
+        // Disable dictionary because the distinct values less than 40000.
+        .option(ParquetOutputFormat.ENABLE_DICTIONARY, false)
         .parquet(path)
 
       Seq(true, false).foreach { bloomFilterEnabled =>
-        withSQLConf(
-          ParquetInputFormat.DICTIONARY_FILTERING_ENABLED -> "false",
-          ParquetInputFormat.BLOOM_FILTERING_ENABLED -> bloomFilterEnabled.toString) {
+        withSQLConf(ParquetInputFormat.BLOOM_FILTERING_ENABLED -> bloomFilterEnabled.toString) {
           val accu = new NumRowGroupsAcc
           sparkContext.register(accu)