diff --git a/examples/src/main/python/avro_inputformat.py b/examples/src/main/python/avro_inputformat.py index 496690201b02c..e902ae29753c0 100644 --- a/examples/src/main/python/avro_inputformat.py +++ b/examples/src/main/python/avro_inputformat.py @@ -20,17 +20,17 @@ from pyspark import SparkContext """ -Read data file users.avro in local Spark dist: +Read data file users.avro in local Spark distro: $ cd $SPARK_HOME $ ./bin/spark-submit --driver-class-path /path/to/example/jar ./examples/src/main/python/avro_inputformat.py \ -> ./examples/src/main/resources/users.avro -{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'} -{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'} +> examples/src/main/resources/users.avro +{u'favorite_color': None, u'name': u'Alyssa', u'favorite_numbers': [3, 9, 15, 20]} +{u'favorite_color': u'red', u'name': u'Ben', u'favorite_numbers': []} -To read name and favorite_color fields only, specify the following reader schema file: +To read name and favorite_color fields only, specify the following reader schema: -$ cat ./examples/src/main/resources/user.avsc +$ cat examples/src/main/resources/user.avsc {"namespace": "example.avro", "type": "record", "name": "User", @@ -41,7 +41,7 @@ } $ ./bin/spark-submit --driver-class-path /path/to/example/jar ./examples/src/main/python/avro_inputformat.py \ -> ./examples/src/main/resources/users.avro ./examples/src/main/resources/user.avsc +> examples/src/main/resources/users.avro examples/src/main/resources/user.avsc {u'favorite_color': None, u'name': u'Alyssa'} {u'favorite_color': u'red', u'name': u'Ben'} """ diff --git a/examples/src/main/resources/users.avro b/examples/src/main/resources/users.avro index 29fba6fd57f01..27c526ab114b2 100644 Binary files a/examples/src/main/resources/users.avro and b/examples/src/main/resources/users.avro differ diff --git a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala index def8bdc8e318d..1b25983a38453 100644 --- a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala +++ b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala @@ -33,13 +33,19 @@ import org.apache.spark.SparkException /** * Implementation of [[org.apache.spark.api.python.Converter]] that converts * an Avro Record wrapped in an AvroKey (or AvroValue) to a Java Map. It tries - * to work with all 3 Avro data models. + * to work with all 3 Avro data mappings (Generic, Specific and Reflect). */ class AvroWrapperToJavaConverter extends Converter[Any, Any] { - override def convert(obj: Any): Any = obj.asInstanceOf[AvroWrapper[_]].datum() match { - case record: IndexedRecord => unpackRecord(record) - case other => throw new SparkException( - s"Unsupported top-level Avro data type ${other.getClass.getName}") + override def convert(obj: Any): Any = { + if (obj == null) { + return null + } + obj.asInstanceOf[AvroWrapper[_]].datum() match { + case null => null + case record: IndexedRecord => unpackRecord(record) + case other => throw new SparkException( + s"Unsupported top-level Avro data type ${other.getClass.getName}") + } } def unpackRecord(obj: Any): JMap[String, Any] = {