From 82cc505a7ebc6b5082207ac9c560c9793a470c5c Mon Sep 17 00:00:00 2001 From: Kan Zhang Date: Tue, 12 Aug 2014 19:03:31 -0700 Subject: [PATCH] [SPARK-2736] Update data sample --- examples/src/main/python/avro_inputformat.py | 14 +++++++------- examples/src/main/resources/users.avro | Bin 313 -> 334 bytes .../pythonconverters/AvroConverters.scala | 16 +++++++++++----- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/examples/src/main/python/avro_inputformat.py b/examples/src/main/python/avro_inputformat.py index 496690201b02c..e902ae29753c0 100644 --- a/examples/src/main/python/avro_inputformat.py +++ b/examples/src/main/python/avro_inputformat.py @@ -20,17 +20,17 @@ from pyspark import SparkContext """ -Read data file users.avro in local Spark dist: +Read data file users.avro in local Spark distro: $ cd $SPARK_HOME $ ./bin/spark-submit --driver-class-path /path/to/example/jar ./examples/src/main/python/avro_inputformat.py \ -> ./examples/src/main/resources/users.avro -{u'favorite_color': None, u'favorite_number': 256, u'name': u'Alyssa'} -{u'favorite_color': u'red', u'favorite_number': 7, u'name': u'Ben'} +> examples/src/main/resources/users.avro +{u'favorite_color': None, u'name': u'Alyssa', u'favorite_numbers': [3, 9, 15, 20]} +{u'favorite_color': u'red', u'name': u'Ben', u'favorite_numbers': []} -To read name and favorite_color fields only, specify the following reader schema file: +To read name and favorite_color fields only, specify the following reader schema: -$ cat ./examples/src/main/resources/user.avsc +$ cat examples/src/main/resources/user.avsc {"namespace": "example.avro", "type": "record", "name": "User", @@ -41,7 +41,7 @@ } $ ./bin/spark-submit --driver-class-path /path/to/example/jar ./examples/src/main/python/avro_inputformat.py \ -> ./examples/src/main/resources/users.avro ./examples/src/main/resources/user.avsc +> examples/src/main/resources/users.avro examples/src/main/resources/user.avsc {u'favorite_color': None, u'name': u'Alyssa'} {u'favorite_color': u'red', u'name': u'Ben'} """ diff --git a/examples/src/main/resources/users.avro b/examples/src/main/resources/users.avro index 29fba6fd57f01d42baee7b8a24bce4ec70909405..27c526ab114b2f42f6d4e13325c373706ba0f880 100644 GIT binary patch delta 158 zcmdnVbdE{LKPiimMJ%zbC||EQIU_YU@xer)>WPQ7b#xS}l}aiLQkATLjLeeM++rY0 zDKoD`Nk;)Dp_Euulvt@$J8_m&K(SJ7Y^?~`pyd3N)MSpl(wrQIyvcR<|2kP$KYG~t kR=@kgZWaR`$DGRI;zTA6HX%6;1~#YEJO;L+)D#Sr02OpPpa1{> delta 130 zcmX@dw3A85KPiimMJ%zbC||EQIU_YUao0qlYW2*#5+xl4rM%Ld9Hm$gBQZBs$x1;f zEwL=WD6=FrKCd)4DYa-~oz-MUMq!I{-^EhdD)z9+I5K`a?z4Ltiw=)tPGxa%B0~cU S6Pr_N9s?f(TTyBXx;g+KqAZ30 diff --git a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala index def8bdc8e318d..1b25983a38453 100644 --- a/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala +++ b/examples/src/main/scala/org/apache/spark/examples/pythonconverters/AvroConverters.scala @@ -33,13 +33,19 @@ import org.apache.spark.SparkException /** * Implementation of [[org.apache.spark.api.python.Converter]] that converts * an Avro Record wrapped in an AvroKey (or AvroValue) to a Java Map. It tries - * to work with all 3 Avro data models. + * to work with all 3 Avro data mappings (Generic, Specific and Reflect). */ class AvroWrapperToJavaConverter extends Converter[Any, Any] { - override def convert(obj: Any): Any = obj.asInstanceOf[AvroWrapper[_]].datum() match { - case record: IndexedRecord => unpackRecord(record) - case other => throw new SparkException( - s"Unsupported top-level Avro data type ${other.getClass.getName}") + override def convert(obj: Any): Any = { + if (obj == null) { + return null + } + obj.asInstanceOf[AvroWrapper[_]].datum() match { + case null => null + case record: IndexedRecord => unpackRecord(record) + case other => throw new SparkException( + s"Unsupported top-level Avro data type ${other.getClass.getName}") + } } def unpackRecord(obj: Any): JMap[String, Any] = {