Skip to content

Commit

Permalink
XML entities declared in a DTD are marked undeclared after XML file
Browse files Browse the repository at this point in the history
change

Fixes redhat-developer/vscode-xml#234

Signed-off-by: azerr <[email protected]>
  • Loading branch information
angelozerr committed Apr 21, 2020
1 parent 7dd53e2 commit 47fe693
Show file tree
Hide file tree
Showing 6 changed files with 334 additions and 172 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,11 @@ protected Range toLSPRange(XMLLocator location, String key, Object[] arguments,
}
return null;
}

@Override
protected boolean isIgnoreFatalError(String key) {
// Don't stop the validation when there are
// * EntityNotDeclared error
return DTDErrorCode.EntityNotDeclared.name().equals(key);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
/*******************************************************************************
* Copyright (c) 2020 Red Hat Inc. and others.
* All rights reserved. This program and the accompanying materials
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v20.html
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Red Hat Inc. - initial API and implementation
*******************************************************************************/
package org.eclipse.lemminx.extensions.contentmodel.participants.diagnostics;

import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.text.MessageFormat;

import org.apache.xerces.impl.Constants;
import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.impl.dtd.DTDGrammar;
import org.apache.xerces.impl.dtd.XMLDTDDescription;
import org.apache.xerces.impl.dtd.XMLEntityDecl;
import org.apache.xerces.impl.validation.ValidationManager;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.xni.Augmentations;
import org.apache.xerces.xni.NamespaceContext;
import org.apache.xerces.xni.XMLLocator;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.eclipse.lemminx.commons.BadLocationException;
import org.eclipse.lemminx.dom.DOMDocument;
import org.eclipse.lemminx.dom.DOMDocumentType;
import org.eclipse.lemminx.extensions.contentmodel.participants.DTDErrorCode;
import org.eclipse.lsp4j.DiagnosticSeverity;
import org.eclipse.lsp4j.Range;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;

/**
* Extension of Xerces SAX Parser to fix some Xerces bugs:
*
* <ul>
* <li>[BUG 1]: when the DTD file path is wrong on DOCTYPE, Xerces breaks all
* validation like syntax validation</li>
* <li>[BUG 2]: when Xerces XML grammar pool is used, the second validation
* ignore the existing of entities. See
* https://github.com/redhat-developer/vscode-xml/issues/234</li>
* </ul>
*
* @author Angelo ZERR
*
*/
public class LSPSAXParser extends SAXParser {

private static final String DTD_NOT_FOUND = "Cannot find DTD ''{0}''.\nCreate the DTD file or configure an XML catalog for this DTD.";

protected static final String VALIDATION_MANAGER = Constants.XERCES_PROPERTY_PREFIX
+ Constants.VALIDATION_MANAGER_PROPERTY;

protected static final String ENTITY_MANAGER = Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_MANAGER_PROPERTY;

private final DOMDocument document;

private final LSPErrorReporterForXML reporter;

private final XMLGrammarPool grammarPool;

public LSPSAXParser(DOMDocument document, LSPErrorReporterForXML reporter, XMLParserConfiguration config,
XMLGrammarPool grammarPool) {
super(config);
this.document = document;
this.reporter = reporter;
this.grammarPool = grammarPool;
init(reporter);
}

private void init(LSPErrorReporterForXML reporter) {
try {
// Add LSP error reporter to fill LSP diagnostics from Xerces errors
super.setProperty("http://apache.org/xml/properties/internal/error-reporter", reporter);
super.setFeature("http://apache.org/xml/features/continue-after-fatal-error", false); //$NON-NLS-1$
super.setFeature("http://xml.org/sax/features/namespace-prefixes", true); //$NON-NLS-1$
super.setFeature("http://xml.org/sax/features/namespaces", true); //$NON-NLS-1$
super.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", true);
} catch (SAXNotRecognizedException | SAXNotSupportedException e) {
// Should never occur.
}
}

private XMLLocator locator;

@Override
public void startDocument(XMLLocator locator, String encoding, NamespaceContext namespaceContext,
Augmentations augs) throws XNIException {
this.locator = locator;
super.startDocument(locator, encoding, namespaceContext, augs);
}

@Override
public void doctypeDecl(String rootElement, String publicId, String systemId, Augmentations augs)
throws XNIException {
if (systemId != null) {
// There a declared DTD in the DOCTYPE
// <!DOCTYPE root-element SYSTEM "./extended.dtd" []>
String eid = null;
try {
eid = XMLEntityManager.expandSystemId(systemId, locator.getExpandedSystemId(), false);
} catch (java.io.IOException e) {
}
if (!isDTDExists(eid)) {
// The declared DTD doesn't exist
// <!DOCTYPE root-element SYSTEM "./dtd-doesnt-exist.dtd" []>
try {
// Report the error
DOMDocumentType docType = document.getDoctype();
Range range = new Range(document.positionAt(docType.getSystemIdNode().getStart()),
document.positionAt(docType.getSystemIdNode().getEnd()));
reporter.addDiagnostic(range, MessageFormat.format(DTD_NOT_FOUND, eid), DiagnosticSeverity.Error,
DTDErrorCode.dtd_not_found.getCode());
} catch (BadLocationException e) {
// Do nothing
}

// FIX [BUG 1]
// To avoid breaking the validation (ex : syntax validation) we mark
// the cache DTD as true to avoid having an IOException error which breaks the
// validation.
// boolean readExternalSubset must be false in
// Xerces
// https://github.com/apache/xerces2-j/blob/e5a239b96fd2cff6566a29e7a4a3a4a2bbf9b0d4/src/org/apache/xerces/impl/XMLDocumentScannerImpl.java#L950
ValidationManager fValidationManager = (ValidationManager) fConfiguration
.getProperty(VALIDATION_MANAGER);
if (fValidationManager != null) {
fValidationManager.setCachedDTD(true);
}
} else {
if (grammarPool != null) {
// FIX [BUG 2]
// DTD exists, get the DTD grammar from the cache
XMLEntityManager entityManager = (XMLEntityManager) fConfiguration.getProperty(ENTITY_MANAGER);
XMLDTDDescription grammarDesc = new XMLDTDDescription(publicId, systemId,
locator.getExpandedSystemId(), eid, rootElement);
DTDGrammar grammar = (DTDGrammar) grammarPool.retrieveGrammar(grammarDesc);
if (grammar != null) {
// The DTD grammar is in cache, we need to fill XML entity manager with the
// entities declared in the cached DTD grammar
fillEntities(grammar, entityManager);
}
}
}
}
super.doctypeDecl(rootElement, publicId, systemId, augs);
}

private static boolean isDTDExists(String expandedSystemId) {
if (expandedSystemId == null || expandedSystemId.isEmpty()) {
return true;
}
try {
URL location = new URL(expandedSystemId);
URLConnection connect = location.openConnection();
if (!(connect instanceof HttpURLConnection)) {
InputStream stream = connect.getInputStream();
stream.close();
}
} catch (Exception e) {
return false;
}
return true;
}

/**
* Fill entities from the given DTD grammar to the given entity manager.
*
* @param grammar the DTD grammar
* @param entityManager the entitymanager to update with entities of the DTD
* grammar.
*/
private static void fillEntities(DTDGrammar grammar, XMLEntityManager entityManager) {
int index = 0;
XMLEntityDecl entityDecl = new XMLEntityDecl() {

@Override
public void setValues(String name, String publicId, String systemId, String baseSystemId, String notation,
String value, boolean isPE, boolean inExternal) {
if (inExternal) {
// Only entities declared in the cached DTD grammar must be added in the XML
// entity manager.
entityManager.addInternalEntity(name, value);
}
};
};
while (grammar.getEntityDecl(index, entityDecl)) {
index++;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2018 Angelo ZERR
* Copyright (c) 2018-2020 Angelo ZERR
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v2.0
* which accompanies this distribution, and is available at
Expand All @@ -14,25 +14,18 @@

import java.io.IOException;
import java.io.StringReader;
import java.text.MessageFormat;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.apache.xerces.impl.XMLEntityManager;
import org.apache.xerces.parsers.SAXParser;
import org.apache.xerces.xni.XNIException;
import org.apache.xerces.xni.grammars.XMLGrammarPool;
import org.apache.xerces.xni.parser.XMLEntityResolver;
import org.apache.xerces.xni.parser.XMLInputSource;
import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.eclipse.lemminx.commons.BadLocationException;
import org.eclipse.lemminx.dom.DOMDocument;
import org.eclipse.lemminx.dom.DOMDocumentType;
import org.eclipse.lemminx.dom.DOMElement;
import org.eclipse.lemminx.extensions.contentmodel.participants.DTDErrorCode;
import org.eclipse.lemminx.extensions.contentmodel.settings.ContentModelSettings;
import org.eclipse.lemminx.extensions.contentmodel.settings.XMLValidationSettings;
import org.eclipse.lemminx.services.extensions.diagnostics.LSPContentHandler;
Expand All @@ -57,8 +50,6 @@ public class XMLValidator {

private static final Logger LOGGER = Logger.getLogger(XMLValidator.class.getName());

private static final String DTD_NOT_FOUND = "Cannot find DTD ''{0}''.\nCreate the DTD file or configure an XML catalog for this DTD.";

public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityResolver,
List<Diagnostic> diagnostics, ContentModelSettings contentModelSettings, XMLGrammarPool grammarPool,
CancelChecker monitor) {
Expand All @@ -74,14 +65,9 @@ public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityR
}

final LSPErrorReporterForXML reporter = new LSPErrorReporterForXML(document, diagnostics);
boolean externalDTDValid = checkExternalDTD(document, reporter, configuration);
SAXParser parser = new SAXParser(configuration);
// Add LSP error reporter to fill LSP diagnostics from Xerces errors
parser.setProperty("http://apache.org/xml/properties/internal/error-reporter", reporter);
parser.setFeature("http://apache.org/xml/features/continue-after-fatal-error", false); //$NON-NLS-1$
parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true /* document.hasNamespaces() */); //$NON-NLS-1$
parser.setFeature("http://xml.org/sax/features/namespaces", true /* document.hasNamespaces() */); //$NON-NLS-1$

SAXParser parser = new LSPSAXParser(document, reporter, configuration, grammarPool);

// Add LSP content handler to stop XML parsing if monitor is canceled.
parser.setContentHandler(new LSPContentHandler(monitor));

Expand All @@ -99,9 +85,7 @@ public static void doDiagnostics(DOMDocument document, XMLEntityResolver entityR
} else {
hasGrammar = false; // validation for Schema was disabled
}

parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", externalDTDValid);
parser.setFeature("http://xml.org/sax/features/validation", hasGrammar && externalDTDValid); //$NON-NLS-1$
parser.setFeature("http://xml.org/sax/features/validation", hasGrammar); //$NON-NLS-1$

// Parse XML
String content = document.getText();
Expand Down Expand Up @@ -148,81 +132,6 @@ private static boolean isDisableOnlyDTDValidation(DOMDocument document) {
return !docType.getChildren().stream().anyMatch(node -> node.isDTDElementDecl() || node.isDTDAttListDecl());
}

/**
* Returns true if the given document has a valid DTD (or doesn't define a DTD)
* and false otherwise.
*
* @param document the DOM document
* @param reporter the reporter
* @param configuration the configuration
* @return true if the given document has a valid DTD (or doesn't define a DTD)
* and false otherwise.
*/
private static boolean checkExternalDTD(DOMDocument document, LSPErrorReporterForXML reporter,
XMLParserConfiguration configuration) {
if (!document.hasDTD()) {
return true;
}
DOMDocumentType docType = document.getDoctype();
if (docType.getKindNode() == null) {
return true;
}

// When XML is bound with a DTD path which doesn't exist, Xerces throws an
// IOException which breaks the validation of XML syntax instead of reporting it
// (like XML Schema). Here we parse only the
// DOCTYPE to catch this error. If there is an error
// the next validation with be disabled by using
// http://xml.org/sax/features/validation &
// http://apache.org/xml/features/nonvalidating/load-external-dtd (disable uses
// of DTD for validation)

// Parse only the DOCTYPE of the DOM document

int end = document.getDoctype().getEnd();
String xml = document.getText().substring(0, end);
xml += "<root/>";
try {

// Customize the entity manager to collect the error when DTD doesn't exist.
XMLEntityManager entityManager = new XMLEntityManager() {
@Override
public String setupCurrentEntity(String name, XMLInputSource xmlInputSource, boolean literal,
boolean isExternal) throws IOException, XNIException {
// Catch the setupCurrentEntity method which throws an IOException when DTD is
// not found
try {
return super.setupCurrentEntity(name, xmlInputSource, literal, isExternal);
} catch (IOException e) {
// Report the DTD invalid error
try {
Range range = new Range(document.positionAt(docType.getSystemIdNode().getStart()),
document.positionAt(docType.getSystemIdNode().getEnd()));
reporter.addDiagnostic(range,
MessageFormat.format(DTD_NOT_FOUND, xmlInputSource.getSystemId()),
DiagnosticSeverity.Error, DTDErrorCode.dtd_not_found.getCode());
} catch (BadLocationException e1) {
// Do nothing
}
throw e;
}
}
};
entityManager.reset(configuration);

SAXParser parser = new SAXParser(configuration);
parser.setProperty("http://apache.org/xml/properties/internal/entity-manager", entityManager);
parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", true);

parseXML(xml, document.getDocumentURI(), parser);
} catch (SAXException | CancellationException exception) {
// ignore error
} catch (IOException e) {
return false;
}
return true;
}

/**
* Warn if XML document is not bound to a grammar according the settings
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,18 @@ public String reportError(XMLLocator location, String domain, String key, Object
return null;
}

if (severity == SEVERITY_FATAL_ERROR && !fContinueAfterFatalError) {
if (severity == SEVERITY_FATAL_ERROR && !fContinueAfterFatalError && !isIgnoreFatalError(key)) {
XMLParseException parseException = (exception != null) ? new XMLParseException(location, message, exception)
: new XMLParseException(location, message);
throw parseException;
}
return message;
}

protected boolean isIgnoreFatalError(String key) {
return false;
}

public boolean addDiagnostic(Range adjustedRange, String message, DiagnosticSeverity severity, String key) {
Diagnostic d = new Diagnostic(adjustedRange, message, severity, source, key);
if (diagnostics.contains(d)) {
Expand Down
Loading

0 comments on commit 47fe693

Please sign in to comment.