Skip to content

Commit

Permalink
Improve DTD scanner to be more tolerant + fix existing doctype scanner
Browse files Browse the repository at this point in the history
test (see #231).
  • Loading branch information
angelozerr committed Nov 26, 2018
1 parent d9295ea commit 81832b5
Show file tree
Hide file tree
Showing 14 changed files with 386 additions and 360 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.eclipse.lsp4xml.commons.TextDocument;
import org.eclipse.lsp4xml.dom.parser.Constants;
import org.eclipse.lsp4xml.uriresolver.URIResolverExtensionManager;
import org.eclipse.lsp4xml.utils.DOMUtils;
import org.eclipse.lsp4xml.utils.StringUtils;
import org.w3c.dom.CDATASection;
import org.w3c.dom.DOMConfiguration;
Expand Down Expand Up @@ -743,4 +744,16 @@ public URIResolverExtensionManager getResolverExtensionManager() {
return resolverExtensionManager;
}

/**
* Returns true if the XML document is a DTD and false otherwise.
*
* @return true if the XML document is a DTD and false otherwise.
*/
public boolean isDTD() {
String uri = this.getDocumentURI();
if (DOMUtils.isDTD(uri)) {
return true;
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ public enum DocumentTypeKind {
int publicIdEnd = -1;
int systemIdStart = -1;
int systemIdEnd = -1;
int startInternalDTD = -1;
int endInternalDTD = -1;
Integer startInternalSubset;
Integer endInternalSubset;

private String name;
private String kind;
Expand Down Expand Up @@ -129,15 +129,28 @@ public NamedNodeMap getEntities() {
*/
@Override
public String getInternalSubset() {
if (internalSubset == null && startInternalDTD != -1 && endInternalDTD != -1) {
internalSubset = getSubstring(startInternalDTD, endInternalDTD);
if (internalSubset == null && startInternalSubset != null && endInternalSubset != null) {
internalSubset = getSubstring(startInternalSubset + 1, endInternalSubset);
}
return internalSubset;
}

void setInternalSubset(int start, int end) {
startInternalDTD = start;
endInternalDTD = end;
/**
* Returns the start offset of internal subset and null otherwise.
*
* @return the start offset of internal subset and null otherwise.
*/
public Integer getStartInternalSubset() {
return startInternalSubset;
}

/**
* Returns the end offset of internal subset and null otherwise.
*
* @return the end offset of internal subset and null otherwise.
*/
public Integer getEndInternalSubset() {
return endInternalSubset;
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import org.w3c.dom.Entity;

/**
* DOM Entity.
* DOM Entity declaration <!ENTITY
*
*/
public class DOMEntity extends DOMNode implements Entity {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.eclipse.lsp4xml.commons.BadLocationException;
import org.eclipse.lsp4xml.commons.TextDocument;
import org.eclipse.lsp4xml.dom.parser.Scanner;
import org.eclipse.lsp4xml.dom.parser.ScannerState;
import org.eclipse.lsp4xml.dom.parser.TokenType;
import org.eclipse.lsp4xml.dom.parser.XMLScanner;
import org.eclipse.lsp4xml.uriresolver.URIResolverExtensionManager;
Expand Down Expand Up @@ -47,8 +46,7 @@ public DOMDocument parse(String text, String uri, URIResolverExtensionManager re
public DOMDocument parse(TextDocument document, URIResolverExtensionManager resolverExtensionManager) {
boolean isDTD = DOMUtils.isDTD(document.getUri());
String text = document.getText();
Scanner scanner = isDTD ? XMLScanner.createScanner(text, 0, ScannerState.WithinInternalDTD)
: XMLScanner.createScanner(text);
Scanner scanner = XMLScanner.createScanner(text, 0, isDTD);
DOMDocument xmlDocument = new DOMDocument(document, resolverExtensionManager);

DOMNode curr = isDTD ? new DOMDocumentType(0, text.length(), xmlDocument) : xmlDocument;
Expand Down Expand Up @@ -266,99 +264,97 @@ public DOMDocument parse(TextDocument document, URIResolverExtensionManager reso
break;
}

/**
_____ ____ _____ _________ _______ ______ _______ _______ _____
| __ \ / __ \ / ____|__ __\ \ / / __ \| ____| / / __ \__ __| __ \
| | | | | | | | | | \ \_/ /| |__) | |__ / /| | | | | | | | | |
| | | | | | | | | | \ / | ___/| __| / / | | | | | | | | | |
| |__| | |__| | |____ | | | | | | | |____ / / | |__| | | | | |__| |
|_____/ \____/ \_____| |_| |_| |_| |______/_/ |_____/ |_| |_____/
*/
// DTD

case StartDoctypeTag: {
case DTDStartDoctypeTag: {
DOMDocumentType doctype = xmlDocument.createDocumentType(scanner.getTokenOffset(), text.length());
curr.addChild(doctype);
doctype.parent = curr;
curr = doctype;
break;
}

case DoctypeName: {
case DTDDoctypeName: {
DOMDocumentType doctype = (DOMDocumentType) curr;
doctype.setName(scanner.getTokenOffset(), scanner.getTokenEnd());
break;
}

case DocTypeKindPUBLIC: {
case DTDDocTypeKindPUBLIC: {
DOMDocumentType doctype = (DOMDocumentType) curr;
doctype.setKind(scanner.getTokenOffset(), scanner.getTokenEnd());
break;
}

case DocTypeKindSYSTEM: {
case DTDDocTypeKindSYSTEM: {
DOMDocumentType doctype = (DOMDocumentType) curr;
doctype.setKind(scanner.getTokenOffset(), scanner.getTokenEnd());
break;
}

case DoctypePublicId: {
case DTDDoctypePublicId: {
DOMDocumentType doctype = (DOMDocumentType) curr;
doctype.setPublicId(scanner.getTokenOffset(), scanner.getTokenEnd());
break;
}

case DoctypeSystemId: {
case DTDDoctypeSystemId: {
DOMDocumentType doctype = (DOMDocumentType) curr;
doctype.setSystemId(scanner.getTokenOffset(), scanner.getTokenEnd());
break;
}

case InternalDTDContent: {
case DTDStartInternalSubset: {
DOMDocumentType doctype = (DOMDocumentType) curr;
doctype.setInternalSubset(scanner.getTokenOffset(), scanner.getTokenEnd());
doctype.startInternalSubset = scanner.getTokenOffset();
break;
}

case StartElementDTD: {
case DTDEndInternalSubset: {
DOMDocumentType doctype = (DOMDocumentType) curr;
doctype.endInternalSubset = scanner.getTokenOffset();
break;
}

case DTDStartElementDecl: {
DTDElementDecl child = new DTDElementDecl(scanner.getTokenOffset(), text.length(),
(DOMDocumentType) curr);
curr.addChild(child);
curr = child;
break;
}

case ElementDTDName: {
case DTDElementDeclName: {
DTDElementDecl element = (DTDElementDecl) curr;
element.name = scanner.getTokenText();
break;
}

case StartAttlistDTD: {
DTDAttList child = new DTDAttList(scanner.getTokenOffset(), text.length(),
case DTDStartAttlistDecl: {
DTDAttlistDecl child = new DTDAttlistDecl(scanner.getTokenOffset(), text.length(),
(DOMDocumentType) curr);
curr.addChild(child);
curr = child;
break;
}

case StartEntityDTD: {
DOMEntity child = new DOMEntity(scanner.getTokenOffset(), text.length(),
(DOMDocumentType) curr);

case DTDStartEntity: {
DOMEntity child = new DOMEntity(scanner.getTokenOffset(), text.length(), (DOMDocumentType) curr);
curr.addChild(child);
curr = child;
break;
}
case EndDTDTag: {

case DTDEndTag: {
if ((curr.isDTDElementDecl() || curr.isDTDAttList() || curr.isEntity()) && curr.parent != null) {
curr.end = scanner.getTokenEnd();
lastClosed = curr;
curr = curr.parent;
}
break;
break;
}

case EndDoctypeTag: {
case DTDEndDoctypeTag: {
((DOMDocumentType) curr).setEnd(scanner.getTokenEnd());
curr.closed = true;
curr = curr.parent;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,18 @@
package org.eclipse.lsp4xml.dom;

/**
* DTD Attribute List declaration <!ATTRIBUTES
* DTD Attribute List declaration <!ATTLIST
*
* @see https://www.w3.org/TR/REC-xml/#attdecls
*
*/
public class DTDAttList extends DOMNode {
public class DTDAttlistDecl extends DOMNode {

private final DOMDocumentType ownerDTDDocument;

String name;

public DTDAttList(int start, int end, DOMDocumentType ownerDTDDocument) {
public DTDAttlistDecl(int start, int end, DOMDocumentType ownerDTDDocument) {
super(start, end, ownerDTDDocument.getOwnerDocument());
this.ownerDTDDocument = ownerDTDDocument;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

/**
* DTD Element Declaration <!ELEMENT
*
* @see https://www.w3.org/TR/REC-xml/#dt-eldecl
*
*/
public class DTDElementDecl extends DOMNode {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,16 @@
*
*/
public enum ScannerState {
WithinContent, AfterOpeningStartTag, AfterOpeningEndTag, WithinProlog, WithinDoctype, WithinTag, WithinEndTag,
WithinComment, AfterAttributeName, BeforeAttributeValue, WithinCDATA, AfterClosingCDATATag, StartCDATATag, AfterPrologOpen, PrologOrPI,
WithinPI, AfterDoctypeName, AfterDoctypePUBLIC, AfterDoctypeSYSTEM, AfterDoctypePublicId, AfterDoctypeSystemId, AfterInternalDTDStartBracket, WithinInternalDTD,
WithinElementDTD, WithinAttlistDTD, WithinDTDEntity, AfterElementDTDName, WithinElementDTDContent, AfterAttlistName, AfterAttlistDeclName, AfterAttlistDTDElementName, AfterAttlistDTDAttributeName, AfterAttlistDTDAttributeType, AfterDTDEntityName, AfterDTDEntityKind, WithinDTDTag, IncorrectDTDTagFormat
WithinContent, AfterOpeningStartTag, AfterOpeningEndTag, WithinProlog, WithinTag, WithinEndTag,
WithinComment, AfterAttributeName, BeforeAttributeValue, WithinCDATA, AfterClosingCDATATag, StartCDATATag,
AfterPrologOpen, PrologOrPI, WithinPI,

// DTD
DTDWithinDoctype, DTDAfterDoctypeName, DTDAfterDoctypePUBLIC, DTDAfterDoctypeSYSTEM,
DTDAfterDoctypePublicId, DTDAfterDoctypeSystemId, DTDAfterInternalStartBracket,

DTDWithinContent, DTDWithinElement, DTDWithinAttlist, DTDWithinEntity, DTDAfterElementName, DTDWithinElementContent,
DTDAfterAttlistName, DTDAfterAttlistDeclName, DTDAfterAttlistElementName, DTDAfterAttlistAttributeName,
DTDAfterAttlistAttributeType, DTDAfterEntityName, DTDAfterEntityKind, DTDWithinTag, DTDIncorrectTagFormat

}
Original file line number Diff line number Diff line change
Expand Up @@ -42,34 +42,41 @@ public enum TokenType {
Whitespace,
Unknown,
EOS,
StartDoctypeTag,
DoctypeName,
DocTypeKindPUBLIC,
DocTypeKindSYSTEM,
DoctypePublicId,
DoctypeSystemId,
InternalDTDStart,
EndDoctypeTag,
EndInternalDTD,
InternalDTDContent,
StartElementDTD,
StartAttlistDTD,
StartEntityDTD,
ElementDTDName,
StartElementDTDContent,
ElementDTDCategory,
ElementDTDContent,
ElementDTDContentComma,
EndElementDTDContent,
AttlistDeclName,
AttlistDTDAttributeValue,
AttlistDTDType,
AttlistDTDElementName,
AttlistDTDAttributeName,
// DTD
DTDStartDoctypeTag,
DTDDoctypeName,
DTDDocTypeKindPUBLIC,
DTDDocTypeKindSYSTEM,
DTDDoctypePublicId,
DTDDoctypeSystemId,
DTDEndDoctypeTag,
DTDStartInternalSubset,
DTDEndInternalSubset,
// DTD Element declaration
DTDStartElementDecl,
DTDElementDeclName,
DTDStartElementContent,
DTDElementCategory,
DTDElementContent,
DTDElementContentComma,
DTDEndElementContent,

// DTD AttrList declaration
DTDStartAttlistDecl,
DTDAttlistElementName,
DTDAttlistDeclName,
DTDAttlistAttributeValue,
DTDAttlistType,
DTDAttlistAttributeName,

// DTD Entity
DTDStartEntity,
DTDEntityName,
DTDEntityValue,
DTDEntityKind,
DTDEntityURL,

DTDUndefineTag,
EndDTDTag, DTDTagExcessContent
DTDEndTag,
DTDTagExcessContent;
}
Loading

0 comments on commit 81832b5

Please sign in to comment.