Skip to content

Commit

Permalink
[SPARK-2953] Allow using short names for io compression codecs
Browse files Browse the repository at this point in the history
Instead of requiring "org.apache.spark.io.LZ4CompressionCodec", it is easier for users if Spark just accepts "lz4", "lzf", "snappy".

Author: Reynold Xin <[email protected]>

Closes #1873 from rxin/compressionCodecShortForm and squashes the following commits:

9f50962 [Reynold Xin] Specify short-form compression codec names first.
63f78ee [Reynold Xin] Updated configuration documentation.
47b3848 [Reynold Xin] [SPARK-2953] Allow using short names for io compression codecs

(cherry picked from commit 676f982)
Signed-off-by: Reynold Xin <[email protected]>
  • Loading branch information
rxin committed Aug 13, 2014
1 parent cffd9bb commit 837bf60
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 5 deletions.
11 changes: 9 additions & 2 deletions core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,24 @@ trait CompressionCodec {


private[spark] object CompressionCodec {

private val shortCompressionCodecNames = Map(
"lz4" -> classOf[LZ4CompressionCodec].getName,
"lzf" -> classOf[LZFCompressionCodec].getName,
"snappy" -> classOf[SnappyCompressionCodec].getName)

def createCodec(conf: SparkConf): CompressionCodec = {
createCodec(conf, conf.get("spark.io.compression.codec", DEFAULT_COMPRESSION_CODEC))
}

def createCodec(conf: SparkConf, codecName: String): CompressionCodec = {
val ctor = Class.forName(codecName, true, Utils.getContextOrSparkClassLoader)
val codecClass = shortCompressionCodecNames.getOrElse(codecName.toLowerCase, codecName)
val ctor = Class.forName(codecClass, true, Utils.getContextOrSparkClassLoader)
.getConstructor(classOf[SparkConf])
ctor.newInstance(conf).asInstanceOf[CompressionCodec]
}

val DEFAULT_COMPRESSION_CODEC = classOf[SnappyCompressionCodec].getName
val DEFAULT_COMPRESSION_CODEC = "snappy"
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,33 @@ class CompressionCodecSuite extends FunSuite {
testCodec(codec)
}

test("lz4 compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "lz4")
assert(codec.getClass === classOf[LZ4CompressionCodec])
testCodec(codec)
}

test("lzf compression codec") {
val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName)
assert(codec.getClass === classOf[LZFCompressionCodec])
testCodec(codec)
}

test("lzf compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "lzf")
assert(codec.getClass === classOf[LZFCompressionCodec])
testCodec(codec)
}

test("snappy compression codec") {
val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName)
assert(codec.getClass === classOf[SnappyCompressionCodec])
testCodec(codec)
}

test("snappy compression codec short form") {
val codec = CompressionCodec.createCodec(conf, "snappy")
assert(codec.getClass === classOf[SnappyCompressionCodec])
testCodec(codec)
}
}
8 changes: 5 additions & 3 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -373,10 +373,12 @@ Apart from these, the following properties are also available, and may be useful
</tr>
<tr>
<td><code>spark.io.compression.codec</code></td>
<td>org.apache.spark.io.<br />SnappyCompressionCodec</td>
<td>snappy</td>
<td>
The codec used to compress internal data such as RDD partitions and shuffle outputs.
By default, Spark provides three codecs: <code>org.apache.spark.io.LZ4CompressionCodec</code>,
The codec used to compress internal data such as RDD partitions and shuffle outputs. By default,
Spark provides three codecs: <code>lz4</code>, <code>lzf</code>, and <code>snappy</code>. You
can also use fully qualified class names to specify the codec, e.g.
<code>org.apache.spark.io.LZ4CompressionCodec</code>,
<code>org.apache.spark.io.LZFCompressionCodec</code>,
and <code>org.apache.spark.io.SnappyCompressionCodec</code>.
</td>
Expand Down

0 comments on commit 837bf60

Please sign in to comment.