Improve performance and memory by caching XML Schema / DTD

Fixes #534 Signed-off-by: azerr <[email protected]>
eclipse · Oct 10, 2019 · 8924e2e · 8924e2e
1 parent 802ce50
commit 8924e2e
Show file tree

Hide file tree

Showing 7 changed files with 364 additions and 57 deletions.
diff --git a/...g/eclipse/lsp4xml/extensions/contentmodel/participants/diagnostics/LSPXMLGrammarPool.java b/...g/eclipse/lsp4xml/extensions/contentmodel/participants/diagnostics/LSPXMLGrammarPool.java
@@ -0,0 +1,256 @@
+/*******************************************************************************
+* Copyright (c) 2019 Red Hat Inc. and others.
+* All rights reserved. This program and the accompanying materials
+* which accompanies this distribution, and is available at
+* http://www.eclipse.org/legal/epl-v20.html
+*
+* Contributors:
+*     Red Hat Inc. - initial API and implementation
+*******************************************************************************/
+package org.eclipse.lsp4xml.extensions.contentmodel.participants.diagnostics;
+
+import org.apache.xerces.impl.dtd.DTDGrammar;
+import org.apache.xerces.impl.xs.SchemaGrammar;
+import org.apache.xerces.xni.grammars.Grammar;
+import org.apache.xerces.xni.grammars.XMLGrammarDescription;
+import org.apache.xerces.xni.grammars.XMLGrammarPool;
+import org.eclipse.lsp4xml.extensions.contentmodel.model.FilesChangedTracker;
+import org.eclipse.lsp4xml.extensions.dtd.utils.DTDUtils;
+import org.eclipse.lsp4xml.extensions.xsd.utils.XSDUtils;
+
+import com.google.common.base.Objects;
+
+/**
+ * LSP XML grammar pool.
+ * 
+ * @author Angelo ZERR
+ *
+ */
+public class LSPXMLGrammarPool implements XMLGrammarPool {
+
+	private static final int TABLE_SIZE = 11;
+
+	/** Grammars. */
+	private final Entry[] fGrammars;
+
+	// the number of grammars in the pool
+	private int fGrammarCount = 0;
+
+	public LSPXMLGrammarPool() {
+		this(TABLE_SIZE);
+	}
+
+	public LSPXMLGrammarPool(int initialCapacity) {
+		fGrammars = new Entry[initialCapacity];
+	}
+
+	@Override
+	public Grammar[] retrieveInitialGrammarSet(String grammarType) {
+		// To avoid having trouble with xsi:noNamespaceSchemaLocation, we return nothing
+		// because in the case of xsi:noNamespaceSchemaLocation
+		// it's the first XML Schema which was registered as
+		// xs:noNamespaceSchemaLocation which is used.
+		return null;
+	}
+
+	@Override
+	public void cacheGrammars(String grammarType, Grammar[] grammars) {
+		for (int i = 0; i < grammars.length; i++) {
+			putGrammar(grammars[i]);
+		}
+	}
+
+	@Override
+	public Grammar retrieveGrammar(XMLGrammarDescription desc) {
+		return getGrammar(desc);
+	}
+
+	private void putGrammar(Grammar grammar) {
+		synchronized (fGrammars) {
+			XMLGrammarDescription desc = grammar.getGrammarDescription();
+			int hash = hashCode(desc);
+			int index = (hash & 0x7FFFFFFF) % fGrammars.length;
+			for (Entry entry = fGrammars[index]; entry != null; entry = entry.next) {
+				if (entry.hash == hash && equals(entry.desc, desc)) {
+					entry.grammar = grammar;
+					return;
+				}
+			}
+			// create a new entry
+			Entry entry = new Entry(hash, desc, grammar, fGrammars[index]);
+			fGrammars[index] = entry;
+			fGrammarCount++;
+		}
+	}
+
+	/**
+	 * Returns the grammar associated to the specified grammar description.
+	 * Currently, the root element name is used as the key for DTD grammars and the
+	 * target namespace is used as the key for Schema grammars.
+	 *
+	 * @param desc The Grammar Description.
+	 */
+	private Grammar getGrammar(XMLGrammarDescription desc) {
+		synchronized (fGrammars) {
+			int hash = hashCode(desc);
+			int index = (hash & 0x7FFFFFFF) % fGrammars.length;
+			for (Entry entry = fGrammars[index]; entry != null; entry = entry.next) {
+				if ((entry.hash == hash) && equals(entry.desc, desc)) {
+					if (entry.isDirty()) {
+						removeGrammar(entry.desc);
+						return null;
+					}
+					return entry.grammar;
+				}
+			}
+			return null;
+		}
+	}
+
+	/**
+	 * Removes the grammar associated to the specified grammar description from the
+	 * grammar pool and returns the removed grammar. Currently, the root element
+	 * name is used as the key for DTD grammars and the target namespace is used as
+	 * the key for Schema grammars.
+	 *
+	 * @param desc The Grammar Description.
+	 * @return The removed grammar.
+	 */
+	public Grammar removeGrammar(XMLGrammarDescription desc) {
+		synchronized (fGrammars) {
+			int hash = hashCode(desc);
+			int index = (hash & 0x7FFFFFFF) % fGrammars.length;
+			for (Entry entry = fGrammars[index], prev = null; entry != null; prev = entry, entry = entry.next) {
+				if ((entry.hash == hash) && equals(entry.desc, desc)) {
+					if (prev != null) {
+						prev.next = entry.next;
+					} else {
+						fGrammars[index] = entry.next;
+					}
+					Grammar tempGrammar = entry.grammar;
+					entry.grammar = null;
+					fGrammarCount--;
+					return tempGrammar;
+				}
+			}
+			return null;
+		}
+	}
+
+	/**
+	 * Returns true if the grammar pool contains a grammar associated to the
+	 * specified grammar description. Currently, the root element name is used as
+	 * the key for DTD grammars and the target namespace is used as the key for
+	 * Schema grammars.
+	 *
+	 * @param desc The Grammar Description.
+	 */
+	public boolean containsGrammar(XMLGrammarDescription desc) {
+		synchronized (fGrammars) {
+			int hash = hashCode(desc);
+			int index = (hash & 0x7FFFFFFF) % fGrammars.length;
+			for (Entry entry = fGrammars[index]; entry != null; entry = entry.next) {
+				if ((entry.hash == hash) && equals(entry.desc, desc)) {
+					if (entry.isDirty()) {
+						removeGrammar(entry.desc);
+						return false;
+					}
+					return true;
+				}
+			}
+			return false;
+		}
+	}
+
+	@Override
+	public void lockPool() {
+	}
+
+	@Override
+	public void unlockPool() {
+	}
+
+	@Override
+	public void clear() {
+		for (int i = 0; i < fGrammars.length; i++) {
+			if (fGrammars[i] != null) {
+				fGrammars[i].clear();
+				fGrammars[i] = null;
+			}
+		}
+		fGrammarCount = 0;
+	}
+
+	/**
+	 * This method checks whether two grammars are the same. Currently, we compare
+	 * the root element names for DTD grammars and the target namespaces for Schema
+	 * grammars. The application can override this behaviour and add its own logic.
+	 *
+	 * @param desc1 The grammar description
+	 * @param desc2 The grammar description of the grammar to be compared to
+	 * @return True if the grammars are equal, otherwise false
+	 */
+	public boolean equals(XMLGrammarDescription desc1, XMLGrammarDescription desc2) {
+		String systemId1 = desc1.getExpandedSystemId();
+		String systemId2 = desc2.getExpandedSystemId();
+		if (systemId1 != null && systemId2 != null) {
+			return Objects.equal(systemId1, systemId2);
+		}
+		return false; // desc1.equals(desc2);
+	}
+
+	/**
+	 * Returns the hash code value for the given grammar description.
+	 *
+	 * @param desc The grammar description
+	 * @return The hash code value
+	 */
+	public int hashCode(XMLGrammarDescription desc) {
+		return desc.hashCode();
+	}
+
+	/**
+	 * This class is a grammar pool entry. Each entry acts as a node in a linked
+	 * list.
+	 */
+	protected static final class Entry {
+		public int hash;
+		public XMLGrammarDescription desc;
+		public Grammar grammar;
+		public Entry next;
+		private final FilesChangedTracker tracker;
+
+		protected Entry(int hash, XMLGrammarDescription desc, Grammar grammar, Entry next) {
+			this.hash = hash;
+			this.desc = desc;
+			this.grammar = grammar;
+			this.next = next;
+			this.tracker = create(grammar);
+		}
+
+		private static FilesChangedTracker create(Grammar grammar) {
+			if (grammar instanceof SchemaGrammar) {
+				return XSDUtils.createFilesChangedTracker((SchemaGrammar) grammar);
+			}
+			if (grammar instanceof DTDGrammar) {
+				return DTDUtils.createFilesChangedTracker((DTDGrammar) grammar);
+			}
+			return null;
+		}
+
+		public boolean isDirty() {
+			return tracker != null ? tracker.isDirty() : true;
+		}
+
+		// clear this entry; useful to promote garbage collection
+		// since reduces reference count of objects to be destroyed
+		protected void clear() {
+			desc = null;
+			grammar = null;
+			if (next != null) {
+				next.clear();
+				next = null;
+			}
+		}
+	}
+}
diff --git a/...e/lsp4xml/extensions/contentmodel/participants/diagnostics/LSPXMLParserConfiguration.java b/...e/lsp4xml/extensions/contentmodel/participants/diagnostics/LSPXMLParserConfiguration.java
@@ -12,6 +12,7 @@
 import org.apache.xerces.impl.dtd.XMLDTDValidator;
 import org.apache.xerces.parsers.XIncludeAwareParserConfiguration;
 import org.apache.xerces.xni.XNIException;
+import org.apache.xerces.xni.grammars.XMLGrammarPool;
 import org.apache.xerces.xni.parser.XMLComponentManager;
 import org.apache.xerces.xni.parser.XMLConfigurationException;
 import org.eclipse.lsp4xml.extensions.contentmodel.settings.XMLValidationSettings;
@@ -21,14 +22,22 @@
  * 
  * <ul>
  * <li>disable only DTD validation if required</li>
+ * <li>disable doctype declaration according validation settings</li>
+ * <li>disable external entities according validation settings</li>
+ * <li>manage a custom grammar pool to retrieve compiled XML Schema/DTD from a
+ * given XML file path</li>
  * </ul>
  *
  */
 class LSPXMLParserConfiguration extends XIncludeAwareParserConfiguration {
 
+	// the Grammar Pool to be shared similarly
+	private static final XMLGrammarPool fStaticGrammarPool = new LSPXMLGrammarPool();
+
 	private final boolean disableDTDValidation;
 
 	public LSPXMLParserConfiguration(boolean disableDTDValidation, XMLValidationSettings validationSettings) {
+		super(null, fStaticGrammarPool);
 		this.disableDTDValidation = disableDTDValidation;
 		// Disable DOCTYPE declaration if settings is set to true.
 		boolean disallowDocTypeDecl = validationSettings != null ? validationSettings.isDisallowDocTypeDecl() : false;

diff --git a/....lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/dtd/contentmodel/CMDTDDocument.java b/....lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/dtd/contentmodel/CMDTDDocument.java
@@ -34,6 +34,7 @@
 import org.eclipse.lsp4xml.extensions.contentmodel.model.CMDocument;
 import org.eclipse.lsp4xml.extensions.contentmodel.model.CMElementDeclaration;
 import org.eclipse.lsp4xml.extensions.contentmodel.model.FilesChangedTracker;
+import org.eclipse.lsp4xml.extensions.dtd.utils.DTDUtils;
 import org.eclipse.lsp4xml.utils.URIUtils;
 
 /**
@@ -145,37 +146,10 @@ public void endContentModel(Augmentations augs) throws XNIException {
 	@Override
 	public Grammar loadGrammar(XMLInputSource source) throws IOException, XNIException {
 		grammar = (DTDGrammar) super.loadGrammar(source);
-		this.tracker = new FilesChangedTracker();
-		updateFilesChangedTracker();
+		this.tracker = DTDUtils.createFilesChangedTracker(grammar);
 		return grammar;
 	}
 
-	/**
-	 * Update files tracker by adding DTD
-	 */
-	private void updateFilesChangedTracker() {
-		Set<DTDGrammar> trackedGrammars = new HashSet<>();
-		updateTracker(grammar, trackedGrammars, tracker);
-	}
-
-	private static void updateTracker(DTDGrammar grammar, Set<DTDGrammar> trackedGrammars,
-			FilesChangedTracker tracker) {
-		if (grammar == null || trackedGrammars.contains(grammar)) {
-			return;
-		}
-		trackedGrammars.add(grammar);
-		// Track the grammar
-		String dtdURI = getDTDURI(grammar);
-		if (dtdURI != null && URIUtils.isFileResource(dtdURI)) {
-			// The DTD is a file, track when file changed
-			tracker.addFileURI(dtdURI);
-		}
-	}
-
-	private static String getDTDURI(DTDGrammar grammar) {
-		return grammar.getGrammarDescription().getExpandedSystemId();
-	}
-
 	public void loadInternalDTD(String internalSubset, String baseSystemId, String systemId)
 			throws XNIException, IOException {
 		// Load empty DTD grammar

diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/dtd/utils/DTDUtils.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/dtd/utils/DTDUtils.java
@@ -10,17 +10,22 @@
 package org.eclipse.lsp4xml.extensions.dtd.utils;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
 
+import org.apache.xerces.impl.dtd.DTDGrammar;
 import org.eclipse.lsp4j.jsonrpc.CancelChecker;
 import org.eclipse.lsp4xml.dom.DOMDocumentType;
 import org.eclipse.lsp4xml.dom.DOMNode;
 import org.eclipse.lsp4xml.dom.DTDAttlistDecl;
 import org.eclipse.lsp4xml.dom.DTDDeclNode;
 import org.eclipse.lsp4xml.dom.DTDDeclParameter;
 import org.eclipse.lsp4xml.dom.DTDElementDecl;
+import org.eclipse.lsp4xml.extensions.contentmodel.model.FilesChangedTracker;
+import org.eclipse.lsp4xml.utils.URIUtils;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
@@ -181,4 +186,28 @@ private static boolean isValid(DTDElementDecl elementDecl) {
 		return elementDecl.getNameParameter() != null;
 	}
 
+	public static FilesChangedTracker createFilesChangedTracker(DTDGrammar grammar) {
+		FilesChangedTracker tracker = new FilesChangedTracker();
+		Set<DTDGrammar> trackedGrammars = new HashSet<>();
+		updateTracker(grammar, trackedGrammars, tracker);
+		return tracker;
+	}
+
+	private static void updateTracker(DTDGrammar grammar, Set<DTDGrammar> trackedGrammars,
+			FilesChangedTracker tracker) {
+		if (grammar == null || trackedGrammars.contains(grammar)) {
+			return;
+		}
+		trackedGrammars.add(grammar);
+		// Track the grammar
+		String dtdURI = getDTDURI(grammar);
+		if (dtdURI != null && URIUtils.isFileResource(dtdURI)) {
+			// The DTD is a file, track when file changed
+			tracker.addFileURI(dtdURI);
+		}
+	}
+
+	private static String getDTDURI(DTDGrammar grammar) {
+		return grammar.getGrammarDescription().getExpandedSystemId();
+	}
 }