Skip to content

Commit

Permalink
[SPARK-49342][SQL] Make TO_AVRO SQL function 'jsonFormatSchema' argum…
Browse files Browse the repository at this point in the history
…ent optional

### What changes were proposed in this pull request?

This PR make the `TO_AVRO` SQL function `jsonFormatSchema` argument optional.

For example, now it is possible to just call it with a single input argument:

```
create table t as
  select named_struct('u', named_struct('member0', member0, 'member1', member1)) as s
  from values (1, null), (null,  'a') tab(member0, member1);

select length(to_avro(s)) > 0 from t;

> true
  true
```

### Why are the changes needed?

This makes the `TO_AVRO` SQL function easier to use.

### Does this PR introduce _any_ user-facing change?

Yes, see above.

### How was this patch tested?

This PR adds unit test coverge.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes apache#47834 from dtenedor/to-avro-schema-optional.

Authored-by: Daniel Tenedorio <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
  • Loading branch information
dtenedor authored and MaxGekk committed Aug 23, 2024
1 parent 97e0a88 commit a18ba89
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,10 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
spark.sql(s"select from_avro(result, '$jsonFormatSchema', map()).u from ($toAvroSql)"),
Seq(Row(Row(1, null)),
Row(Row(null, "a"))))
// The 'jsonFormatSchema' argument of the 'to_avro' function is optional.
checkAnswer(
spark.sql(s"select length(to_avro(s)) > 0 from t"),
Seq(Row(true), Row(true)))

// Negative tests.
checkError(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,12 +131,14 @@ case class FromAvro(child: Expression, jsonFormatSchema: Expression, options: Ex
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
_FUNC_(child, jsonFormatSchema) - Converts a Catalyst binary input value into its corresponding
Avro format result.
_FUNC_(child[, jsonFormatSchema]) - Converts a Catalyst binary input value into its
corresponding Avro format result.
""",
examples = """
Examples:
> SELECT _FUNC_(s, '{"type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }]}', MAP()) IS NULL FROM (SELECT NULL AS s);
> SELECT _FUNC_(s, '{"type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }]}') IS NULL FROM (SELECT NULL AS s);
[true]
> SELECT _FUNC_(s) IS NULL FROM (SELECT NULL AS s);
[true]
""",
group = "misc_funcs",
Expand All @@ -145,6 +147,9 @@ case class FromAvro(child: Expression, jsonFormatSchema: Expression, options: Ex
// scalastyle:on line.size.limit
case class ToAvro(child: Expression, jsonFormatSchema: Expression)
extends BinaryExpression with RuntimeReplaceable {

def this(child: Expression) = this(child, Literal(null))

override def left: Expression = child

override def right: Expression = jsonFormatSchema
Expand All @@ -157,6 +162,9 @@ case class ToAvro(child: Expression, jsonFormatSchema: Expression)
jsonFormatSchema.dataType match {
case _: StringType if jsonFormatSchema.foldable =>
TypeCheckResult.TypeCheckSuccess
case _: NullType =>
// The 'jsonFormatSchema' argument is optional.
TypeCheckResult.TypeCheckSuccess
case _ =>
TypeCheckResult.TypeCheckFailure(
"The second argument of the TO_AVRO SQL function must be a constant string " +
Expand Down

0 comments on commit a18ba89

Please sign in to comment.