Skip to content

Commit

Permalink
[SPARK-2736] Update data sample
Browse files Browse the repository at this point in the history
  • Loading branch information
kanzhang committed Aug 13, 2014
1 parent 0be7761 commit 82cc505
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 12 deletions.
14 changes: 7 additions & 7 deletions examples/src/main/python/avro_inputformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@
from pyspark import SparkContext

"""
Read data file users.avro in local Spark dist:
Read data file users.avro in local Spark distro:
$ cd $SPARK_HOME
$ ./bin/spark-submit --driver-class-path /path/to/example/jar ./examples/src/main/python/avro_inputformat.py \
> ./examples/src/main/resources/users.avro
{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'}
{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'}
> examples/src/main/resources/users.avro
{u'favorite_color': None, u'name': u'Alyssa', u'favorite_numbers': [3, 9, 15, 20]}
{u'favorite_color': u'red', u'name': u'Ben', u'favorite_numbers': []}
To read name and favorite_color fields only, specify the following reader schema file:
To read name and favorite_color fields only, specify the following reader schema:
$ cat ./examples/src/main/resources/user.avsc
$ cat examples/src/main/resources/user.avsc
{"namespace": "example.avro",
"type": "record",
"name": "User",
Expand All @@ -41,7 +41,7 @@
}
$ ./bin/spark-submit --driver-class-path /path/to/example/jar ./examples/src/main/python/avro_inputformat.py \
> ./examples/src/main/resources/users.avro ./examples/src/main/resources/user.avsc
> examples/src/main/resources/users.avro examples/src/main/resources/user.avsc
{u'favorite_color': None, u'name': u'Alyssa'}
{u'favorite_color': u'red', u'name': u'Ben'}
"""
Expand Down
Binary file modified examples/src/main/resources/users.avro
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,19 @@ import org.apache.spark.SparkException
/**
* Implementation of [[org.apache.spark.api.python.Converter]] that converts
* an Avro Record wrapped in an AvroKey (or AvroValue) to a Java Map. It tries
* to work with all 3 Avro data models.
* to work with all 3 Avro data mappings (Generic, Specific and Reflect).
*/
class AvroWrapperToJavaConverter extends Converter[Any, Any] {
override def convert(obj: Any): Any = obj.asInstanceOf[AvroWrapper[_]].datum() match {
case record: IndexedRecord => unpackRecord(record)
case other => throw new SparkException(
s"Unsupported top-level Avro data type ${other.getClass.getName}")
override def convert(obj: Any): Any = {
if (obj == null) {
return null
}
obj.asInstanceOf[AvroWrapper[_]].datum() match {
case null => null
case record: IndexedRecord => unpackRecord(record)
case other => throw new SparkException(
s"Unsupported top-level Avro data type ${other.getClass.getName}")
}
}

def unpackRecord(obj: Any): JMap[String, Any] = {
Expand Down

0 comments on commit 82cc505

Please sign in to comment.