Skip to content

Commit

Permalink
[SPARK-45225][SQL][FOLLOW-UP] XML: Fix nested XSD file path resolution
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
This PR adds support to correctly resolve the path of nested XSD provided with `rowValidationXSDPath` option and `XSDToSchema` API.

### Why are the changes needed?
Nested XSD were not resolved correctly.

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Added a new test

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #46235 from sandip-db/xml_nested_xsd.

Authored-by: Sandip Agarwala <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
  • Loading branch information
sandip-db authored and HyukjinKwon committed Apr 26, 2024
1 parent 3451e66 commit e04ac56
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ object ValidatorUtil extends Logging {
val in = openSchemaFile(new Path(key))
try {
val schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
schemaFactory.newSchema(new StreamSource(in))
schemaFactory.newSchema(new StreamSource(in, key))
} finally {
in.close()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ object XSDToSchema extends Logging{
def read(xsdPath: Path): StructType = {
val in = ValidatorUtil.openSchemaFile(xsdPath)
val xmlSchemaCollection = new XmlSchemaCollection()
xmlSchemaCollection.setBaseUri(xsdPath.getParent.toString)
xmlSchemaCollection.setBaseUri(xsdPath.toString)
val xmlSchema = xmlSchemaCollection.read(new InputStreamReader(in))
getStructType(xmlSchema)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1206,14 +1206,16 @@ class XmlSuite
}

test("test XSD validation") {
val basketDF = spark.read
.option("rowTag", "basket")
.option("inferSchema", true)
.option("rowValidationXSDPath", getTestResourcePath(resDir + "basket.xsd")
.replace("file:/", "/"))
.xml(getTestResourcePath(resDir + "basket.xml"))
// Mostly checking it doesn't fail
assert(basketDF.selectExpr("entry[0].key").head().getLong(0) === 9027)
Seq("basket.xsd", "include-example/first.xsd").foreach { xsdFile =>
val basketDF = spark.read
.option("rowTag", "basket")
.option("inferSchema", true)
.option("rowValidationXSDPath", getTestResourcePath(resDir + xsdFile)
.replace("file:/", "/"))
.xml(getTestResourcePath(resDir + "basket.xml"))
// Mostly checking it doesn't fail
assert(basketDF.selectExpr("entry[0].key").head().getLong(0) === 9027)
}
}

test("test XSD validation with validation error") {
Expand Down

0 comments on commit e04ac56

Please sign in to comment.