Skip to content

Commit

Permalink
web-xml: Added doctype parsing.
Browse files Browse the repository at this point in the history
  • Loading branch information
sleepy-monax committed Apr 16, 2024
1 parent bf16ef4 commit 0ed35a0
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/web/web-dom/document-type.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ struct DocumentType : public Node {
String publicId;
String systemId;

DocumentType() = default;

DocumentType(String name, String publicId, String systemId)
: name(name), publicId(publicId), systemId(systemId) {
}
Expand Down
62 changes: 61 additions & 1 deletion src/web/web-xml/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,12 +249,44 @@ Res<> Parser::_parseProlog(Io::SScan &s, Dom::Node &parent) {
while (_parseMisc(s, parent) and not s.ended())
;

// TODO: Parse doctype declaration
if (auto doctype = _parseDoctype(s)) {
parent.appendChild(doctype.unwrap());
while (_parseMisc(s, parent) and not s.ended())
;
}

rollback.disarm();
return Ok();
}

static constexpr auto RE_DOCTYPE_START = "<!DOCTYPE"_re;

Res<Strong<Dom::DocumentType>> Parser::_parseDoctype(Io::SScan &s) {
// doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>'
auto rollback = s.rollbackPoint();

logDebug("Parsing doctype declaration");

if (not s.skip(RE_DOCTYPE_START))
return Error::invalidData("expected '<!DOCTYPE'");

auto docType = makeStrong<Dom::DocumentType>();

try$(_parseS(s));

docType->name = try$(_parseName(s));

try$(_parseS(s));
(void)_parseExternalId(s, *docType);

try$(_parseS(s));
if (not s.skip('>'))
return Error::invalidData("expected '>'");

rollback.disarm();
return Ok(docType);
}

// 2.9 MARK: Standalone Document Declaration
// https://www.w3.org/TR/xml/#sec-rmd

Expand Down Expand Up @@ -530,4 +562,32 @@ Res<Rune> Parser::_parseReference(Io::SScan &s) {
return Error::invalidData("expected reference");
}

// 4.2 MARK: Entity Declarations
// https://www.w3.org/TR/xml/#sec-entity-decl

Res<> Parser::_parseExternalId(Io::SScan &s, Dom::DocumentType &docType) {
// ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
logDebug("Parsing external ID");

auto rollback = s.rollbackPoint();

if (s.skip("SYSTEM"_re)) {
try$(_parseS(s));
// NOSPEC: We are parsing the system literal as att value
docType.systemId = try$(_parseAttValue(s));
rollback.disarm();
return Ok();
} else if (s.skip("PUBLIC"_re)) {
// NOSPEC: We are parsing the public and system literals as att values
try$(_parseS(s));
docType.publicId = try$(_parseAttValue(s));
try$(_parseS(s));
docType.systemId = try$(_parseAttValue(s));
rollback.disarm();
return Ok();
} else {
return Error::invalidData("expected 'SYSTEM' or 'PUBLIC'");
}
}

} // namespace Web::Xml
5 changes: 5 additions & 0 deletions src/web/web-xml/parser.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <web-dom/comment.h>
#include <web-dom/document-type.h>
#include <web-dom/document.h>
#include <web-dom/element.h>
#include <web-dom/text.h>
Expand Down Expand Up @@ -44,6 +45,8 @@ struct Parser {

Res<> _parseProlog(Io::SScan &s, Dom::Node &parent);

Res<Strong<Dom::DocumentType>> _parseDoctype(Io::SScan &s);

Res<Strong<Dom::Element>> _parseElement(Io::SScan &s, Ns ns);

Res<Strong<Dom::Element>> _parseStartTag(Io::SScan &s, Ns ns);
Expand All @@ -65,6 +68,8 @@ struct Parser {
Res<Rune> _parseEntityRef(Io::SScan &s);

Res<Rune> _parseReference(Io::SScan &s);

Res<> _parseExternalId(Io::SScan &s, Dom::DocumentType &docType);
};

} // namespace Web::Xml
10 changes: 10 additions & 0 deletions src/web/web-xml/tests/test-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,14 @@ test$("parse-comment") {
return Ok();
}

test$("parse-doctype") {
auto s = Io::SScan("<!DOCTYPE html><html></html>");
auto p = Parser();
auto doc = try$(p.parse(s, Web::HTML));
auto first = doc->firstChild();
auto doctype = try$(first.cast<Dom::DocumentType>());
expect$(doctype->name == "html");
return Ok();
}

} // namespace Web::Xml::Tests

0 comments on commit 0ed35a0

Please sign in to comment.