Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix OOM on malformed input #99

Merged
merged 2 commits into from
Jun 12, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions src/main/scala/scala/xml/parsing/MarkupParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
}
if (1 != elemCount) {
reportSyntaxError("document must contain exactly one element")
Console.println(children.toList)
//Console.println(children.toList)
}

doc.children = children
Expand Down Expand Up @@ -389,7 +389,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
def xComment: NodeSeq = {
val sb: StringBuilder = new StringBuilder()
xToken("--")
while (true) {
while (!eof) {
if (ch == '-' && { sb.append(ch); nextch(); ch == '-' }) {
sb.length = sb.length - 1
nextch()
Expand All @@ -398,7 +398,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
} else sb.append(ch)
nextch()
}
throw FatalError("this cannot happen")
throw truncatedError("broken comment")
}

/* todo: move this into the NodeBuilder class */
Expand Down Expand Up @@ -678,10 +678,10 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {

def markupDecl1() = {
def doInclude() = {
xToken('['); while (']' != ch) markupDecl(); nextch() // ']'
xToken('['); while (']' != ch && !eof) markupDecl(); nextch() // ']'
}
def doIgnore() = {
xToken('['); while (']' != ch) nextch(); nextch() // ']'
xToken('['); while (']' != ch && !eof) nextch(); nextch() // ']'
}
if ('?' == ch) {
nextch()
Expand Down Expand Up @@ -747,7 +747,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {

case _ =>
curInput.reportError(pos, "unexpected character '" + ch + "', expected some markupdecl")
while (ch != '>')
while (ch != '>' && !eof)
nextch()
}
}
Expand Down Expand Up @@ -780,7 +780,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
def intSubset() {
//Console.println("(DEBUG) intSubset()")
xSpace()
while (']' != ch)
while (']' != ch && !eof)
markupDecl()
}

Expand All @@ -792,7 +792,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
xSpace()
val n = xName
xSpace()
while ('>' != ch) {
while ('>' != ch && !eof) {
//Console.println("["+ch+"]")
putChar(ch)
nextch()
Expand All @@ -817,7 +817,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
var attList: List[AttrDecl] = Nil

// later: find the elemDecl for n
while ('>' != ch) {
while ('>' != ch && !eof) {
val aname = xName
xSpace()
// could be enumeration (foo,bar) parse this later :-/
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/scala/xml/parsing/MarkupParserCommon.scala
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ private[scala] trait MarkupParserCommon extends TokenTests {
while (true) {
if (ch == head && peek(rest))
return handler(positioner(), sb.toString)
else if (ch == SU)
else if (ch == SU || eof)
truncatedError("") // throws TruncatedXMLControl in compiler

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't seem like adding the eof here triggers an OOM for any of your tests. I couldn't find an example that triggered the OOM for an XML processing instruction (PI) or a CDATA section -- they're the only ones that use xTakeUntil . I guess it's ok to cargo-cult an eof check everywhere, but it seems you were trying to avoid that.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ashawley Thank you for the review and comment! It is significant to add this eof. This is necessary in test of CDATA section <broken><![CDATA[A. Without it, the following OOM occur:

Exception in thread "XMLEventReader" java.lang.OutOfMemoryError: Java heap space
        at java.util.Arrays.copyOf(Arrays.java:3332)
        at java.lang.AbstractStringBuilder.expandCapacity(AbstractStringBuilder.java:137)
        at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:121)
        at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:622)
        at java.lang.StringBuilder.append(StringBuilder.java:202)
        at scala.collection.mutable.StringBuilder.append(StringBuilder.scala:267)
        at scala.xml.parsing.MarkupParserCommon$class.xTakeUntil(MarkupParserCommon.scala:253)
        at scala.xml.pull.XMLEventReader$Parser.xTakeUntil(XMLEventReader.scala:60)
        at scala.xml.parsing.MarkupParser$class.xCharData(MarkupParser.scala:379)
        at scala.xml.pull.XMLEventReader$Parser.xCharData(XMLEventReader.scala:60)
        at scala.xml.parsing.MarkupParser$class.content1(MarkupParser.scala:424)
        at scala.xml.pull.XMLEventReader$Parser.content1(XMLEventReader.scala:60)
        at scala.xml.parsing.MarkupParser$class.content(MarkupParser.scala:459)
        at scala.xml.pull.XMLEventReader$Parser.content(XMLEventReader.scala:60)
        at scala.xml.parsing.MarkupParser$class.element1(MarkupParser.scala:588)
        at scala.xml.pull.XMLEventReader$Parser.element1(XMLEventReader.scala:60)
        at scala.xml.parsing.MarkupParser$class.content1(MarkupParser.scala:433)
        at scala.xml.pull.XMLEventReader$Parser.content1(XMLEventReader.scala:60)
        at scala.xml.parsing.MarkupParser$class.document(MarkupParser.scala:247)
        at scala.xml.pull.XMLEventReader$Parser.document(XMLEventReader.scala:60)
        at scala.xml.pull.XMLEventReader$Parser$$anonfun$run$1.apply(XMLEventReader.scala:96)
        at scala.xml.pull.XMLEventReader$Parser$$anonfun$run$1.apply(XMLEventReader.scala:96)
        at scala.xml.pull.ProducerConsumerIterator$class.interruptibly(XMLEventReader.scala:125)
        at scala.xml.pull.XMLEventReader.interruptibly(XMLEventReader.scala:27)
        at scala.xml.pull.XMLEventReader$Parser.run(XMLEventReader.scala:96)
        at java.lang.Thread.run(Thread.java:745)

It turned out that error checking is rudimentary, however, you are quite right. I fix some tests.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@shimamoto Ok, cool! I thought that was odd. Thanks for fixing up the unit tests!

sb append ch
Expand Down
134 changes: 116 additions & 18 deletions src/test/scala/scala/xml/pull/XMLEventReaderTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@ package scala.xml
package pull

import org.junit.Test
import org.junit.Ignore
import org.junit.runner.RunWith
import org.junit.runners.JUnit4
import org.junit.Assert.assertTrue
import org.junit.Assert.assertFalse
import org.junit.Assert.assertEquals
import org.junit.Assert.{assertFalse, assertTrue}

import scala.io.Source
import scala.xml.parsing.FatalError

class XMLEventReaderTest {

val src = Source.fromString("<hello><world/>!</hello>")

private def toSource(s: String) = new Source {
val iter = s.iterator
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) {}
}

@Test
def pull: Unit = {
val er = new XMLEventReader(src)
Expand Down Expand Up @@ -44,17 +45,114 @@ class XMLEventReaderTest {
@Test
def issue35: Unit = {
val broken = "<broken attribute='is truncated"
val x = new Source {
val iter = broken.iterator
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) {}
}
val r = new XMLEventReader(x)
val r = new XMLEventReader(toSource(broken))

assertTrue(r.next.isInstanceOf[EvElemStart])
}

@Test(expected = classOf[FatalError])
def malformedCDATA: Unit = {
val data = "<broken><![CDATA[A"
val r = new XMLEventReader(toSource(data))

assertTrue(r.next.isInstanceOf[EvElemStart])
// error when returning EvText of CDATA
r.next
}

@Test(expected = classOf[FatalError])
def malformedComment1: Unit = {
val data = "<!"
val r = new XMLEventReader(toSource(data))

// error when returning EvComment
r.next
}

@Test(expected = classOf[FatalError])
def malformedComment2: Unit = {
val data = "<!-- comment "
val r = new XMLEventReader(toSource(data))

// error when returning EvComment
r.next
}

@Test
def malformedDTD1: Unit = {
// broken ELEMENT
val data =
"""<?xml version="1.0" encoding="utf-8"?>
|<!DOCTYPE broken [
| <!ELE
""".stripMargin
val r = new XMLEventReader(toSource(data))

assertFalse(r.hasNext)
}

@Test
def malformedDTD2: Unit = {
val data =
"""<!DOCTYPE broken [
| <!ELEMENT data (#PCDATA)>
""".stripMargin
val r = new XMLEventReader(toSource(data))

assertFalse(r.hasNext)
}

@Test
def malformedDTD3: Unit = {
// broken ATTLIST
val data =
"""<!DOCTYPE broken [
| <!ATTL
""".stripMargin
val r = new XMLEventReader(toSource(data))

assertFalse(r.hasNext)
}

@Test
def malformedDTD4: Unit = {
// unexpected declaration
val data =
"""<!DOCTYPE broken [
| <!UNEXPECTED
""".stripMargin
val r = new XMLEventReader(toSource(data))

assertFalse(r.hasNext)
}

@Test(expected = classOf[Exception])
def missingTagTest: Unit = {
val data=
@Test
def malformedDTD5: Unit = {
val data =
"""<!DOCTYPE broken [
| <!ENTITY % foo 'INCLUDE'>
| <![%foo;[
""".stripMargin
val r = new XMLEventReader(toSource(data))

assertFalse(r.hasNext)
}

@Test
def malformedDTD6: Unit = {
val data =
"""<!DOCTYPE broken [
| <!ENTITY % foo 'IGNORE'>
| <![%foo;[
""".stripMargin
val r = new XMLEventReader(toSource(data))

assertFalse(r.hasNext)
}

@Test(expected = classOf[Exception])
def missingTagTest: Unit = {
val data=
"""<?xml version="1.0" ?>
|<verbosegc xmlns="http://www.ibm.com/j9/verbosegc">
|
Expand All @@ -66,8 +164,8 @@ class XMLEventReaderTest {
|</exclusive-start>
|""".stripMargin

val er = new XMLEventReader(Source.fromString(data))
while(er.hasNext) er.next()
er.stop()
}
val er = new XMLEventReader(toSource(data))
while(er.hasNext) er.next()
er.stop()
}
}