From 8741abc5a96217fabd0cbf63d60b6ccb173cc213 Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Sun, 9 Jun 2024 17:23:45 +0500
Subject: [PATCH 1/4] Move bang type checks outside of loop

(Review in whitespace changes ignored mode)
---
 src/reader/mod.rs | 55 ++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/reader/mod.rs b/src/reader/mod.rs
index 63a42574..759ba6aa 100644
--- a/src/reader/mod.rs
+++ b/src/reader/mod.rs
@@ -974,32 +974,35 @@ impl BangType {
     /// - `chunk`: data read on current iteration and not yet consumed from reader
     #[inline(always)]
     fn parse<'b>(&self, buf: &[u8], chunk: &'b [u8]) -> Option<(&'b [u8], usize)> {
-        for i in memchr::memchr_iter(b'>', chunk) {
-            match self {
-                // Need to read at least 6 symbols (`!---->`) for properly finished comment
-                // <!----> - XML comment
-                //  012345 - i
-                Self::Comment if buf.len() + i > 4 => {
-                    if chunk[..i].ends_with(b"--") {
-                        // We cannot strip last `--` from the buffer because we need it in case of
-                        // check_comments enabled option. XML standard requires that comment
-                        // will not end with `--->` sequence because this is a special case of
-                        // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
-                        return Some((&chunk[..i], i + 1)); // +1 for `>`
-                    }
-                    // End sequence `-|->` was splitted at |
-                    //        buf --/   \-- chunk
-                    if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
-                        return Some((&chunk[..i], i + 1)); // +1 for `>`
-                    }
-                    // End sequence `--|>` was splitted at |
-                    //         buf --/   \-- chunk
-                    if i == 0 && buf.ends_with(b"--") {
-                        return Some((&[], i + 1)); // +1 for `>`
+        match self {
+            Self::Comment => {
+                for i in memchr::memchr_iter(b'>', chunk) {
+                    // Need to read at least 6 symbols (`!---->`) for properly finished comment
+                    // <!----> - XML comment
+                    //  012345 - i
+                    if buf.len() + i > 4 {
+                        if chunk[..i].ends_with(b"--") {
+                            // We cannot strip last `--` from the buffer because we need it in case of
+                            // check_comments enabled option. XML standard requires that comment
+                            // will not end with `--->` sequence because this is a special case of
+                            // `--` in the comment (https://www.w3.org/TR/xml11/#sec-comments)
+                            return Some((&chunk[..i], i + 1)); // +1 for `>`
+                        }
+                        // End sequence `-|->` was splitted at |
+                        //        buf --/   \-- chunk
+                        if i == 1 && buf.ends_with(b"-") && chunk[0] == b'-' {
+                            return Some((&chunk[..i], i + 1)); // +1 for `>`
+                        }
+                        // End sequence `--|>` was splitted at |
+                        //         buf --/   \-- chunk
+                        if i == 0 && buf.ends_with(b"--") {
+                            return Some((&[], i + 1)); // +1 for `>`
+                        }
                     }
                 }
-                Self::Comment => {}
-                Self::CData => {
+            }
+            Self::CData => {
+                for i in memchr::memchr_iter(b'>', chunk) {
                     if chunk[..i].ends_with(b"]]") {
                         return Some((&chunk[..i], i + 1)); // +1 for `>`
                     }
@@ -1014,7 +1017,9 @@ impl BangType {
                         return Some((&[], i + 1)); // +1 for `>`
                     }
                 }
-                Self::DocType => {
+            }
+            Self::DocType => {
+                for i in memchr::memchr_iter(b'>', chunk) {
                     let content = &chunk[..i];
                     let balance = memchr::memchr2_iter(b'<', b'>', content)
                         .map(|p| if content[p] == b'<' { 1i32 } else { -1 })

From 89e5360f5a00f4cda36080e03e0dc46f94273ec0 Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Thu, 4 Jul 2024 22:01:39 +0500
Subject: [PATCH 2/4] Add test for lowercase <![cdata[]]>

failures (6):
    syntax::cdata::lowercase::ns_reader::async_tokio
    syntax::cdata::lowercase::ns_reader::borrowed
    syntax::cdata::lowercase::ns_reader::buffered
    syntax::cdata::lowercase::reader::async_tokio
    syntax::cdata::lowercase::reader::borrowed
    syntax::cdata::lowercase::reader::buffered
---
 tests/reader-errors.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/reader-errors.rs b/tests/reader-errors.rs
index 18c08a23..34880c15 100644
--- a/tests/reader-errors.rs
+++ b/tests/reader-errors.rs
@@ -343,6 +343,8 @@ mod syntax {
         err!(unclosed24("<![CDATA[]h") => SyntaxError::UnclosedCData);
         err!(unclosed25("<![CDATA[]>") => SyntaxError::UnclosedCData);
 
+        err!(lowercase("<![cdata[]]>") => SyntaxError::UnclosedCData);
+
         ok!(normal1("<![CDATA[]]>")     => 12: Event::CData(BytesCData::new("")));
         ok!(normal2("<![CDATA[]]>rest") => 12: Event::CData(BytesCData::new("")));
     }

From 6dec2b22c2cd47f814854e5a06f173e4a21269d7 Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Thu, 4 Jul 2024 00:40:40 +0500
Subject: [PATCH 3/4] Only uppercase `<![CDATA[` starts CDATA section

---
 Changelog.md        |  5 +++++
 src/reader/state.rs | 10 +++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/Changelog.md b/Changelog.md
index 7232379b..1c364f60 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -17,8 +17,13 @@
 
 ### Bug Fixes
 
+- [#781]: Fix conditions to start CDATA section. Only uppercase `<![CDATA[` can start it.
+  Previously any case was allowed.
+
 ### Misc Changes
 
+[#781]: https://github.com/tafia/quick-xml/pull/781
+
 
 ## 0.35.0 -- 2024-06-29
 
diff --git a/src/reader/state.rs b/src/reader/state.rs
index 2f3e8512..0119022d 100644
--- a/src/reader/state.rs
+++ b/src/reader/state.rs
@@ -128,7 +128,11 @@ impl ReaderState {
                     self.decoder(),
                 )))
             }
-            BangType::CData if uncased_starts_with(buf, b"![CDATA[") => {
+            // XML requires uppercase only:
+            // https://www.w3.org/TR/xml11/#sec-cdata-sect
+            // Even HTML5 required uppercase only:
+            // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
+            BangType::CData if buf.starts_with(b"![CDATA[") => {
                 debug_assert!(buf.ends_with(b"]]"));
                 Ok(Event::CData(BytesCData::wrap(
                     // Cut of `![CDATA[` and `]]` from start and end
@@ -136,6 +140,10 @@ impl ReaderState {
                     self.decoder(),
                 )))
             }
+            // XML requires uppercase only, but we will check that on validation stage:
+            // https://www.w3.org/TR/xml11/#sec-prolog-dtd
+            // HTML5 allows mixed case for doctype declarations:
+            // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
             BangType::DocType if uncased_starts_with(buf, b"!DOCTYPE") => {
                 match buf[8..].iter().position(|&b| !is_whitespace(b)) {
                     Some(start) => Ok(Event::DocType(BytesText::wrap(

From b71cf7c92b40303d883c2d3411b6d9f07288240c Mon Sep 17 00:00:00 2001
From: Mingun <alexander_sergey@mail.ru>
Date: Thu, 4 Jul 2024 00:45:06 +0500
Subject: [PATCH 4/4] Use only uppercase DOCTYPE in tests where possible as
 required by the specification

See https://www.w3.org/TR/xml11/#NT-doctypedecl
---
 tests/fuzzing.rs       | 2 +-
 tests/reader-config.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/fuzzing.rs b/tests/fuzzing.rs
index deca382a..2740763c 100644
--- a/tests/fuzzing.rs
+++ b/tests/fuzzing.rs
@@ -51,7 +51,7 @@ fn fuzz_101() {
 
 #[test]
 fn fuzz_empty_doctype() {
-    let data: &[u8] = b"<!doctype  \n    >";
+    let data: &[u8] = b"<!DOCTYPE  \n    >";
     let mut reader = Reader::from_reader(data);
     let mut buf = Vec::new();
     assert!(matches!(
diff --git a/tests/reader-config.rs b/tests/reader-config.rs
index bd2465e3..781bca8c 100644
--- a/tests/reader-config.rs
+++ b/tests/reader-config.rs
@@ -471,7 +471,7 @@ mod trim_markup_names_in_closing_tags {
 }
 
 const XML: &str = " \t\r\n\
-<!doctype root \t\r\n> \t\r\n\
+<!DOCTYPE root \t\r\n> \t\r\n\
 <root \t\r\n> \t\r\n\
     <empty \t\r\n/> \t\r\n\
     text \t\r\n\