From 9071ea41d92e59fb9fdf73f38769f5370b497cd5 Mon Sep 17 00:00:00 2001 From: Nikolas Komonen Date: Mon, 4 Nov 2019 15:49:12 -0500 Subject: [PATCH] Fuzzy Element name codeaction Handles regular element names and ones with prefixes Fixes #589 Refer to #589 for code to test Signed-off-by: Nikolas Komonen --- .../lsp4xml/commons/CodeActionFactory.java | 23 + .../participants/DTDErrorCode.java | 4 +- .../participants/XMLSchemaErrorCode.java | 7 +- .../participants/XMLSyntaxErrorCode.java | 4 +- .../cvc_complex_type_2_4_aCodeAction.java | 152 +++++++ .../diagnostics/XMLValidator.java | 2 +- .../xsd/participants/XSDErrorCode.java | 4 +- .../lsp4xml/services/XMLDefinition.java | 4 +- .../lsp4xml/services/XMLLanguageService.java | 2 +- .../lsp4xml/utils/LevenshteinDistance.java | 400 ++++++++++++++++++ .../lsp4xml/utils/XMLPositionUtility.java | 107 ++++- .../java/org/eclipse/lsp4xml/XMLAssert.java | 4 +- .../XMLSchemaDiagnosticsTest.java | 78 +++- .../xsd/fuzzyCodeAction/FuzzySchemaA.xsd | 7 + .../xsd/fuzzyCodeAction/FuzzySchemaB.xsd | 12 + 15 files changed, 775 insertions(+), 35 deletions(-) create mode 100644 org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/codeactions/cvc_complex_type_2_4_aCodeAction.java create mode 100644 org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/LevenshteinDistance.java create mode 100644 org.eclipse.lsp4xml/src/test/resources/xsd/fuzzyCodeAction/FuzzySchemaA.xsd create mode 100644 org.eclipse.lsp4xml/src/test/resources/xsd/fuzzyCodeAction/FuzzySchemaB.xsd diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/commons/CodeActionFactory.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/commons/CodeActionFactory.java index ec13db93a..08fb3b3cf 100644 --- a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/commons/CodeActionFactory.java +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/commons/CodeActionFactory.java @@ -10,8 +10,11 @@ */ package org.eclipse.lsp4xml.commons; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; +import java.util.List; import org.eclipse.lsp4j.CodeAction; import org.eclipse.lsp4j.CodeActionKind; @@ -85,4 +88,24 @@ public static CodeAction replace(String title, Range range, String replaceText, insertContentAction.setEdit(workspaceEdit); return insertContentAction; } + + public static CodeAction replaceAt(String title, String replaceText, TextDocumentItem document, + Diagnostic diagnostic, Collection ranges) { + CodeAction insertContentAction = new CodeAction(title); + insertContentAction.setKind(CodeActionKind.QuickFix); + insertContentAction.setDiagnostics(Arrays.asList(diagnostic)); + + VersionedTextDocumentIdentifier versionedTextDocumentIdentifier = new VersionedTextDocumentIdentifier( + document.getUri(), document.getVersion()); + ArrayList edits = new ArrayList(); + for (Range range : ranges) { + TextEdit edit = new TextEdit(range, replaceText); + edits.add(edit); + } + TextDocumentEdit textDocumentEdit = new TextDocumentEdit(versionedTextDocumentIdentifier, edits); + WorkspaceEdit workspaceEdit = new WorkspaceEdit(Collections.singletonList(Either.forLeft(textDocumentEdit))); + + insertContentAction.setEdit(workspaceEdit); + return insertContentAction; + } } diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/DTDErrorCode.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/DTDErrorCode.java index 90434d88c..78f31221c 100644 --- a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/DTDErrorCode.java +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/DTDErrorCode.java @@ -117,7 +117,7 @@ public static Range toLSPRange(XMLLocator location, DTDErrorCode code, Object[] case MSG_REQUIRED_ATTRIBUTE_NOT_SPECIFIED: case MSG_ELEMENT_NOT_DECLARED: case MSG_CONTENT_INVALID: { - return XMLPositionUtility.selectStartTag(offset, document); + return XMLPositionUtility.selectStartTagName(offset, document); } case MSG_ATTRIBUTE_NOT_DECLARED: { return XMLPositionUtility.selectAttributeValueAt(getString(arguments[1]), offset, document); @@ -134,7 +134,7 @@ public static Range toLSPRange(XMLLocator location, DTDErrorCode code, Object[] case MSG_ELEMENT_WITH_ID_REQUIRED: { DOMElement element = document.getDocumentElement(); if (element != null) { - return XMLPositionUtility.selectStartTag(element); + return XMLPositionUtility.selectStartTagName(element); } } case IDREFSInvalid: diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/XMLSchemaErrorCode.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/XMLSchemaErrorCode.java index f230497f4..2c34d58e4 100644 --- a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/XMLSchemaErrorCode.java +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/XMLSchemaErrorCode.java @@ -27,6 +27,7 @@ import org.eclipse.lsp4xml.extensions.contentmodel.participants.codeactions.cvc_attribute_3CodeAction; import org.eclipse.lsp4xml.extensions.contentmodel.participants.codeactions.cvc_complex_type_2_1CodeAction; import org.eclipse.lsp4xml.extensions.contentmodel.participants.codeactions.cvc_complex_type_2_3CodeAction; +import org.eclipse.lsp4xml.extensions.contentmodel.participants.codeactions.cvc_complex_type_2_4_aCodeAction; import org.eclipse.lsp4xml.extensions.contentmodel.participants.codeactions.cvc_complex_type_3_2_2CodeAction; import org.eclipse.lsp4xml.extensions.contentmodel.participants.codeactions.cvc_complex_type_4CodeAction; import org.eclipse.lsp4xml.extensions.contentmodel.participants.codeactions.cvc_enumeration_validCodeAction; @@ -134,7 +135,7 @@ public static Range toLSPRange(XMLLocator location, XMLSchemaErrorCode code, Obj case cvc_complex_type_4: case src_element_3: case TargetNamespace_2: - return XMLPositionUtility.selectStartTag(offset, document); + return XMLPositionUtility.selectStartTagName(offset, document); case cvc_complex_type_3_2_2: { String attrName = getString(arguments[1]); return XMLPositionUtility.selectAttributeNameFromGivenNameAt(attrName, offset, document); @@ -237,13 +238,15 @@ public static Range toLSPRange(XMLLocator location, XMLSchemaErrorCode code, Obj } } case cvc_type_3_1_2: - return XMLPositionUtility.selectStartTag(offset, document); + return XMLPositionUtility.selectStartTagName(offset, document); default: } return null; } public static void registerCodeActionParticipants(Map codeActions) { + codeActions.put(cvc_complex_type_2_4_a.getCode(), new cvc_complex_type_2_4_aCodeAction()); + codeActions.put(cvc_complex_type_2_4_c.getCode(), new cvc_complex_type_2_4_aCodeAction()); codeActions.put(cvc_complex_type_2_3.getCode(), new cvc_complex_type_2_3CodeAction()); codeActions.put(cvc_complex_type_4.getCode(), new cvc_complex_type_4CodeAction()); codeActions.put(cvc_type_3_1_1.getCode(), new cvc_type_3_1_1CodeAction()); diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/XMLSyntaxErrorCode.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/XMLSyntaxErrorCode.java index c6b8d97a9..434aabfdd 100644 --- a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/XMLSyntaxErrorCode.java +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/XMLSyntaxErrorCode.java @@ -104,7 +104,7 @@ public static Range toLSPRange(XMLLocator location, XMLSyntaxErrorCode code, Obj case ElementPrefixUnbound: case ElementUnterminated: case RootElementTypeMustMatchDoctypedecl: - return XMLPositionUtility.selectStartTag(offset, document); + return XMLPositionUtility.selectStartTagName(offset, document); case EqRequiredInAttribute: { String attrName = getString(arguments[1]); return XMLPositionUtility.selectAttributeNameFromGivenNameAt(attrName, offset, document); @@ -157,7 +157,7 @@ public static Range toLSPRange(XMLLocator location, XMLSyntaxErrorCode code, Obj */ return XMLPositionUtility.selectPreviousNodesEndTag(offset, document); case CustomETag: - return XMLPositionUtility.selectEndTag(offset, document); + return XMLPositionUtility.selectEndTagName(offset, document); case ETagRequired: { String tag = getString(arguments[0]); return XMLPositionUtility.selectChildEndTag(tag, offset, document); diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/codeactions/cvc_complex_type_2_4_aCodeAction.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/codeactions/cvc_complex_type_2_4_aCodeAction.java new file mode 100644 index 000000000..062f4c7b3 --- /dev/null +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/contentmodel/participants/codeactions/cvc_complex_type_2_4_aCodeAction.java @@ -0,0 +1,152 @@ +/******************************************************************************* +* Copyright (c) 2019 Red Hat Inc. and others. +* All rights reserved. This program and the accompanying materials +* which accompanies this distribution, and is available at +* http://www.eclipse.org/legal/l-v20.html +* +* Contributors: +* Red Hat Inc. - initial API and implementation +*******************************************************************************/ +package org.eclipse.lsp4xml.extensions.contentmodel.participants.codeactions; + +import java.text.Collator; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.TreeSet; + +import org.eclipse.lsp4j.CodeAction; +import org.eclipse.lsp4j.Diagnostic; +import org.eclipse.lsp4j.Range; +import org.eclipse.lsp4xml.commons.CodeActionFactory; +import org.eclipse.lsp4xml.dom.DOMDocument; +import org.eclipse.lsp4xml.dom.DOMElement; +import org.eclipse.lsp4xml.dom.DOMNode; +import org.eclipse.lsp4xml.extensions.contentmodel.model.CMDocument; +import org.eclipse.lsp4xml.extensions.contentmodel.model.CMElementDeclaration; +import org.eclipse.lsp4xml.extensions.contentmodel.model.ContentModelManager; +import org.eclipse.lsp4xml.services.extensions.ICodeActionParticipant; +import org.eclipse.lsp4xml.services.extensions.IComponentProvider; +import org.eclipse.lsp4xml.settings.XMLFormattingOptions; +import org.eclipse.lsp4xml.utils.LevenshteinDistance; +import org.eclipse.lsp4xml.utils.XMLPositionUtility; + +/** + * cvc_complex_type_2_4_a + */ +public class cvc_complex_type_2_4_aCodeAction implements ICodeActionParticipant { + + private static final float MAX_DISTANCE_DIFF_RATIO = 0.4f; + + @Override + public void doCodeAction(Diagnostic diagnostic, Range range, DOMDocument document, List codeActions, + XMLFormattingOptions formattingSettings, IComponentProvider componentProvider) { + try { + int offset = document.offsetAt(diagnostic.getRange().getStart()); + DOMNode node = document.findNodeAt(offset); + if (node != null && node.isElement()) { + // Get element from the diagnostic + DOMElement element = (DOMElement) node; + String localName = element.getLocalName(); + + Collection possibleElements = getPossibleElements(element, componentProvider); + if (possibleElements != null) { + + // When added to these collections, the names will be ordered alphabetically + Collection otherElementNames = new TreeSet(Collator.getInstance()); + Collection similarElementNames = new TreeSet(Collator.getInstance()); + + // Try to collect similar names coming from tag name + for (CMElementDeclaration possibleElement : possibleElements) { + String possibleElementName = possibleElement.getName(); + if (isSimilar(possibleElementName, localName)) { + similarElementNames.add(possibleElementName); + } else { + otherElementNames.add(possibleElementName); + } + } + + // Create ranges for the replace. + boolean selectLocalNameOnly = element.getPrefix() != null; + List ranges = new ArrayList<>(); + Range startRange, endRange; + if(selectLocalNameOnly) { + startRange = XMLPositionUtility.selectStartTagLocalName(element); + endRange = XMLPositionUtility.selectEndTagLocalName(element); + } + else { + startRange = XMLPositionUtility.selectStartTagName(element); + endRange = XMLPositionUtility.selectEndTagName(element); + } + ranges.add(startRange); + + if (endRange != null) { + ranges.add(endRange); + } + + if (!similarElementNames.isEmpty()) { + // // Add code actions for each similar elements + for (String elementName : similarElementNames) { + CodeAction similarCodeAction = CodeActionFactory.replaceAt( + "Did you mean '" + elementName + "'?", elementName, document.getTextDocument(), + diagnostic, ranges); + codeActions.add(similarCodeAction); + } + } else { + // Add code actions for each possible elements + for (String elementName : otherElementNames) { + CodeAction otherCodeAction = CodeActionFactory.replaceAt( + "Replace with '" + elementName + "'", elementName, document.getTextDocument(), + diagnostic, ranges); + codeActions.add(otherCodeAction); + } + } + } + } + + } catch (Exception e) { + // Do nothing + } + } + + /** + * Returns the possible elements for the given DOM element. + * + * @param element the DOM element + * @param componentProvider the component provider + * @return the possible elements for the given DOM element. + * @throws Exception + */ + private static Collection getPossibleElements(DOMElement element, + IComponentProvider componentProvider) throws Exception { + ContentModelManager contentModelManager = componentProvider.getComponent(ContentModelManager.class); + + String prefix = element.getPrefix(); + DOMElement parentElement = element.getParentElement(); + String parentPrefix = parentElement.getPrefix(); + // check if prefix is the same than the parent profix + if (prefix != null && !prefix.equals(parentPrefix)) { + // We are in the case + // diagnos Range range = null; DOMElement documentElement = document.getDocumentElement(); if (documentElement != null) { - range = XMLPositionUtility.selectStartTag(documentElement); + range = XMLPositionUtility.selectStartTagName(documentElement); } if (range == null) { range = new Range(new Position(0, 0), new Position(0, 0)); diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/xsd/participants/XSDErrorCode.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/xsd/participants/XSDErrorCode.java index e96c05a4f..76080fb0e 100644 --- a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/xsd/participants/XSDErrorCode.java +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/extensions/xsd/participants/XSDErrorCode.java @@ -104,7 +104,7 @@ public static Range toLSPRange(XMLLocator location, XSDErrorCode code, Object[] List children = parent.getChildrenWithAttributeValue("name", nameValue); if (children.isEmpty()) { - return XMLPositionUtility.selectStartTag(offset, document); + return XMLPositionUtility.selectStartTagName(offset, document); } offset = children.get(0).getStart() + 1; @@ -128,7 +128,7 @@ public static Range toLSPRange(XMLLocator location, XSDErrorCode code, Object[] case src_element_2_1: case src_element_3: case src_import_1_2: - return XMLPositionUtility.selectStartTag(offset, document); + return XMLPositionUtility.selectStartTagName(offset, document); case s4s_att_not_allowed: { String attrName = getString(arguments[1]); return XMLPositionUtility.selectAttributeNameFromGivenNameAt(attrName, offset, document); diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/services/XMLDefinition.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/services/XMLDefinition.java index 9c1ef0aeb..b3d38ecc2 100644 --- a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/services/XMLDefinition.java +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/services/XMLDefinition.java @@ -76,8 +76,8 @@ private static void findStartEndTagDefinition(IDefinitionRequest request, List publishDiagnostics) { String uri = document.getDocumentURI(); DOMElement documentElement = document.getDocumentElement(); - Range range = XMLPositionUtility.selectStartTag(documentElement); + Range range = XMLPositionUtility.selectStartTagName(documentElement); List diagnostics = new ArrayList<>(); diagnostics.add(new Diagnostic(range, message, severity, "XML")); publishDiagnostics.accept(new PublishDiagnosticsParams(uri, diagnostics)); diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/LevenshteinDistance.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/LevenshteinDistance.java new file mode 100644 index 000000000..6ba835f1e --- /dev/null +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/LevenshteinDistance.java @@ -0,0 +1,400 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +// package org.apache.commons.text.similarity; +package org.eclipse.lsp4xml.utils; + +import java.util.Arrays; + +/** + * An algorithm for measuring the difference between two character sequences. + * + *

+ * This class is a copy/paste of https://github.com/apache/commons-text/blob/8e31edfe60027e374129ae9c92050dfecd5321e4/src/main/java/org/apache/commons/text/similarity/LevenshteinDistance.java + *

+ * + *

+ * This is the number of changes needed to change one sequence into another, + * where each change is a single character modification (deletion, insertion + * or substitution). + *

+ * + *

+ * This code has been adapted from Apache Commons Lang 3.3. + *

+ * + * @since 1.0 + */ +public class LevenshteinDistance /*implements EditDistance*/ { + + /** + * Default instance. + */ + private static final LevenshteinDistance DEFAULT_INSTANCE = new LevenshteinDistance(); + + /** + * Threshold. + */ + private final Integer threshold; + + /** + *

+ * This returns the default instance that uses a version + * of the algorithm that does not use a threshold parameter. + *

+ * + * @see LevenshteinDistance#getDefaultInstance() + */ + public LevenshteinDistance() { + this(null); + } + + /** + *

+ * If the threshold is not null, distance calculations will be limited to a maximum length. + * If the threshold is null, the unlimited version of the algorithm will be used. + *

+ * + * @param threshold + * If this is null then distances calculations will not be limited. + * This may not be negative. + */ + public LevenshteinDistance(final Integer threshold) { + if (threshold != null && threshold < 0) { + throw new IllegalArgumentException("Threshold must not be negative"); + } + this.threshold = threshold; + } + + /** + *

Find the Levenshtein distance between two Strings.

+ * + *

A higher score indicates a greater distance.

+ * + *

The previous implementation of the Levenshtein distance algorithm + * was from http://www.merriampark.com/ld.htm

+ * + *

Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError + * which can occur when my Java implementation is used with very large strings.
+ * This implementation of the Levenshtein distance algorithm + * is from http://www.merriampark.com/ldjava.htm

+ * + *
+     * distance.apply(null, *)             = IllegalArgumentException
+     * distance.apply(*, null)             = IllegalArgumentException
+     * distance.apply("","")               = 0
+     * distance.apply("","a")              = 1
+     * distance.apply("aaapppp", "")       = 7
+     * distance.apply("frog", "fog")       = 1
+     * distance.apply("fly", "ant")        = 3
+     * distance.apply("elephant", "hippo") = 7
+     * distance.apply("hippo", "elephant") = 7
+     * distance.apply("hippo", "zzzzzzzz") = 8
+     * distance.apply("hello", "hallo")    = 1
+     * 
+ * + * @param left the first string, must not be null + * @param right the second string, must not be null + * @return result distance, or -1 + * @throws IllegalArgumentException if either String input {@code null} + */ + // @Override + public Integer apply(final CharSequence left, final CharSequence right) { + if (threshold != null) { + return limitedCompare(left, right, threshold); + } + return unlimitedCompare(left, right); + } + + /** + * Gets the default instance. + * + * @return The default instance + */ + public static LevenshteinDistance getDefaultInstance() { + return DEFAULT_INSTANCE; + } + + /** + * Gets the distance threshold. + * + * @return The distance threshold + */ + public Integer getThreshold() { + return threshold; + } + + /** + * Find the Levenshtein distance between two CharSequences if it's less than or + * equal to a given threshold. + * + *

+ * This implementation follows from Algorithms on Strings, Trees and + * Sequences by Dan Gusfield and Chas Emerick's implementation of the + * Levenshtein distance algorithm from http://www.merriampark.com/ld.htm + *

+ * + *
+     * limitedCompare(null, *, *)             = IllegalArgumentException
+     * limitedCompare(*, null, *)             = IllegalArgumentException
+     * limitedCompare(*, *, -1)               = IllegalArgumentException
+     * limitedCompare("","", 0)               = 0
+     * limitedCompare("aaapppp", "", 8)       = 7
+     * limitedCompare("aaapppp", "", 7)       = 7
+     * limitedCompare("aaapppp", "", 6))      = -1
+     * limitedCompare("elephant", "hippo", 7) = 7
+     * limitedCompare("elephant", "hippo", 6) = -1
+     * limitedCompare("hippo", "elephant", 7) = 7
+     * limitedCompare("hippo", "elephant", 6) = -1
+     * 
+ * + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null + * @param threshold the target threshold, must not be negative + * @return result distance, or -1 + */ + private static int limitedCompare(CharSequence left, CharSequence right, final int threshold) { // NOPMD + if (left == null || right == null) { + throw new IllegalArgumentException("CharSequences must not be null"); + } + if (threshold < 0) { + throw new IllegalArgumentException("Threshold must not be negative"); + } + + /* + * This implementation only computes the distance if it's less than or + * equal to the threshold value, returning -1 if it's greater. The + * advantage is performance: unbounded distance is O(nm), but a bound of + * k allows us to reduce it to O(km) time by only computing a diagonal + * stripe of width 2k + 1 of the cost table. It is also possible to use + * this to compute the unbounded Levenshtein distance by starting the + * threshold at 1 and doubling each time until the distance is found; + * this is O(dm), where d is the distance. + * + * One subtlety comes from needing to ignore entries on the border of + * our stripe eg. p[] = |#|#|#|* d[] = *|#|#|#| We must ignore the entry + * to the left of the leftmost member We must ignore the entry above the + * rightmost member + * + * Another subtlety comes from our stripe running off the matrix if the + * strings aren't of the same size. Since string s is always swapped to + * be the shorter of the two, the stripe will always run off to the + * upper right instead of the lower left of the matrix. + * + * As a concrete example, suppose s is of length 5, t is of length 7, + * and our threshold is 1. In this case we're going to walk a stripe of + * length 3. The matrix would look like so: + * + *
+         *    1 2 3 4 5
+         * 1 |#|#| | | |
+         * 2 |#|#|#| | |
+         * 3 | |#|#|#| |
+         * 4 | | |#|#|#|
+         * 5 | | | |#|#|
+         * 6 | | | | |#|
+         * 7 | | | | | |
+         * 
+ * + * Note how the stripe leads off the table as there is no possible way + * to turn a string of length 5 into one of length 7 in edit distance of + * 1. + * + * Additionally, this implementation decreases memory usage by using two + * single-dimensional arrays and swapping them back and forth instead of + * allocating an entire n by m matrix. This requires a few minor + * changes, such as immediately returning when it's detected that the + * stripe has run off the matrix and initially filling the arrays with + * large values so that entries we don't compute are ignored. + * + * See Algorithms on Strings, Trees and Sequences by Dan Gusfield for + * some discussion. + */ + + int n = left.length(); // length of left + int m = right.length(); // length of right + + // if one string is empty, the edit distance is necessarily the length + // of the other + if (n == 0) { + return m <= threshold ? m : -1; + } else if (m == 0) { + return n <= threshold ? n : -1; + } + + if (n > m) { + // swap the two strings to consume less memory + final CharSequence tmp = left; + left = right; + right = tmp; + n = m; + m = right.length(); + } + + // the edit distance cannot be less than the length difference + if (m - n > threshold) { + return -1; + } + + int[] p = new int[n + 1]; // 'previous' cost array, horizontally + int[] d = new int[n + 1]; // cost array, horizontally + int[] tempD; // placeholder to assist in swapping p and d + + // fill in starting table values + final int boundary = Math.min(n, threshold) + 1; + for (int i = 0; i < boundary; i++) { + p[i] = i; + } + // these fills ensure that the value above the rightmost entry of our + // stripe will be ignored in following loop iterations + Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE); + Arrays.fill(d, Integer.MAX_VALUE); + + // iterates through t + for (int j = 1; j <= m; j++) { + final char rightJ = right.charAt(j - 1); // jth character of right + d[0] = j; + + // compute stripe indices, constrain to array size + final int min = Math.max(1, j - threshold); + final int max = j > Integer.MAX_VALUE - threshold ? n : Math.min( + n, j + threshold); + + // ignore entry left of leftmost + if (min > 1) { + d[min - 1] = Integer.MAX_VALUE; + } + + // iterates through [min, max] in s + for (int i = min; i <= max; i++) { + if (left.charAt(i - 1) == rightJ) { + // diagonally left and up + d[i] = p[i - 1]; + } else { + // 1 + minimum of cell to the left, to the top, diagonally + // left and up + d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]); + } + } + + // copy current distance counts to 'previous row' distance counts + tempD = p; + p = d; + d = tempD; + } + + // if p[n] is greater than the threshold, there's no guarantee on it + // being the correct + // distance + if (p[n] <= threshold) { + return p[n]; + } + return -1; + } + + /** + *

Find the Levenshtein distance between two Strings.

+ * + *

A higher score indicates a greater distance.

+ * + *

The previous implementation of the Levenshtein distance algorithm + * was from + * https://web.archive.org/web/20120526085419/http://www.merriampark.com/ldjava.htm

+ * + *

This implementation only need one single-dimensional arrays of length s.length() + 1

+ * + *
+     * unlimitedCompare(null, *)             = IllegalArgumentException
+     * unlimitedCompare(*, null)             = IllegalArgumentException
+     * unlimitedCompare("","")               = 0
+     * unlimitedCompare("","a")              = 1
+     * unlimitedCompare("aaapppp", "")       = 7
+     * unlimitedCompare("frog", "fog")       = 1
+     * unlimitedCompare("fly", "ant")        = 3
+     * unlimitedCompare("elephant", "hippo") = 7
+     * unlimitedCompare("hippo", "elephant") = 7
+     * unlimitedCompare("hippo", "zzzzzzzz") = 8
+     * unlimitedCompare("hello", "hallo")    = 1
+     * 
+ * + * @param left the first CharSequence, must not be null + * @param right the second CharSequence, must not be null + * @return result distance, or -1 + * @throws IllegalArgumentException if either CharSequence input is {@code null} + */ + private static int unlimitedCompare(CharSequence left, CharSequence right) { + if (left == null || right == null) { + throw new IllegalArgumentException("CharSequences must not be null"); + } + + /* + This implementation use two variable to record the previous cost counts, + So this implementation use less memory than previous impl. + */ + + int n = left.length(); // length of left + int m = right.length(); // length of right + + if (n == 0) { + return m; + } else if (m == 0) { + return n; + } + + if (n > m) { + // swap the input strings to consume less memory + final CharSequence tmp = left; + left = right; + right = tmp; + n = m; + m = right.length(); + } + + final int[] p = new int[n + 1]; + + // indexes into strings left and right + int i; // iterates through left + int j; // iterates through right + int upperLeft; + int upper; + + char rightJ; // jth character of right + int cost; // cost + + for (i = 0; i <= n; i++) { + p[i] = i; + } + + for (j = 1; j <= m; j++) { + upperLeft = p[0]; + rightJ = right.charAt(j - 1); + p[0] = j; + + for (i = 1; i <= n; i++) { + upper = p[i]; + cost = left.charAt(i - 1) == rightJ ? 0 : 1; + // minimum of cell to the left+1, to the top+1, diagonally left and up +cost + p[i] = Math.min(Math.min(p[i - 1] + 1, p[i] + 1), upperLeft + cost); + upperLeft = upper; + } + } + + return p[n]; + } + +} \ No newline at end of file diff --git a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/XMLPositionUtility.java b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/XMLPositionUtility.java index 9bf409ab9..d9773f499 100644 --- a/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/XMLPositionUtility.java +++ b/org.eclipse.lsp4xml/src/main/java/org/eclipse/lsp4xml/utils/XMLPositionUtility.java @@ -247,28 +247,65 @@ static DOMNode findUnclosedChildNode(String childTag, List children) { public static Range selectRootStartTag(DOMDocument document) { DOMNode root = document.getDocumentElement(); - return selectStartTag(root); + return selectStartTagName(root); } - public static Range selectStartTag(int offset, DOMDocument document) { + public static Range selectStartTagName(int offset, DOMDocument document) { DOMNode element = document.findNodeAt(offset); if (element != null) { - return selectStartTag(element); + return selectStartTagName(element); } return null; } /** - * Returns the range of the start tag of the given element and null + * Returns the range of the start tag name (excludes the '<') of the given element and null * otherwise. * * @param element the DOM element * @return the range of the start tag of the given element and null * otherwise. */ - public static Range selectStartTag(DOMNode element) { - int startOffset = element.getStart() + 1; // < - int endOffset = startOffset + getStartTagLength(element); + public static Range selectStartTagName(DOMNode element) { + return selectStartTagName(element, false); + } + + /** + * Returns the range of a tag's local name. If the tag does not have a prefix, implying + * it doesn't have a local name, it will return null. + * @param element + * @return + */ + public static Range selectStartTagLocalName(DOMNode element) { + return selectStartTagName(element, true); + } + + /** + * Returns the range of the start tag name (excludes the '<') of the given element and null + * otherwise. + * + * If suffixOnly is true then it will try to return the range of the localName/suffix. Else + * it will return null. + * + * @param element the DOM element + * @param suffixOnly select the suffix portion, only when a prefix exists + * @return the range of the start tag of the given element and null + * otherwise. + */ + private static Range selectStartTagName(DOMNode element, boolean localNameOnly) { + int initialStartOffset = element.getStart() + 1; // < + int finalStartOffset = initialStartOffset; + if(localNameOnly) { + String prefix = element.getPrefix(); + if(prefix != null) { + finalStartOffset += prefix.length() + 1; // skips prefix name and ':' + } + else { + return null; + } + } + + int endOffset = initialStartOffset + getStartTagLength(element); if (element.isProcessingInstruction() || element.isProlog()) { // in the case of prolog or processing instruction, tag is equals to "xml" // without '?' -> element name and null + * otherwise. + * + * @param element the DOM element + * @return the range of the end tag of the given element and null + * otherwise. + */ + public static Range selectEndTagName(DOMElement element) { + return selectEndTagName(element, false); + } + + /** + * Returns the range of the end tag of the given LOCAL element name and null + * otherwise. + * + * @param element the DOM element + * @return the range of the end tag of the given element and null + * otherwise. + */ + public static Range selectEndTagLocalName(DOMElement element) { + return selectEndTagName(element, true); + } + /** * Returns the range of the end tag of the given element and null * otherwise. @@ -316,11 +377,21 @@ public static Range selectEndTag(int offset, DOMDocument document) { * @return the range of the end tag of the given element and null * otherwise. */ - public static Range selectEndTag(DOMElement element) { + public static Range selectEndTagName(DOMElement element, boolean localNameOnly) { if (element.hasEndTag()) { - int startOffset = element.getEndTagOpenOffset() + 2; // <\ - int endOffset = startOffset + getStartTagLength(element); - return createRange(startOffset, endOffset, element.getOwnerDocument()); + int initialStartOffset = element.getEndTagOpenOffset() + 2; // <\ + int finalStartOffset = initialStartOffset; + if(localNameOnly) { + String prefix = element.getPrefix(); + if(prefix != null) { + finalStartOffset += prefix.length() + 1; // skips prefix and ':' + } + else { + return null; + } + } + int endOffset = initialStartOffset + getStartTagLength(element); + return createRange(finalStartOffset, endOffset, element.getOwnerDocument()); } return null; } @@ -419,7 +490,7 @@ public static Range selectPreviousNodesEndTag(int offset, DOMDocument document) if (node != null) { DOMElement element = (DOMElement) node; if (element.isClosed() && !element.isEndTagClosed()) { - return selectEndTag(element.getEnd(), document); + return selectEndTagName(element.getEnd(), document); } } @@ -428,7 +499,7 @@ public static Range selectPreviousNodesEndTag(int offset, DOMDocument document) char c = document.getText().charAt(i); while (i >= 0) { if (c == '>') { - return selectEndTag(i, document); + return selectEndTagName(i, document); } i--; c = document.getText().charAt(i); @@ -466,7 +537,7 @@ public static Range createRange(int startOffset, int endOffset, DOMDocument docu public static LocationLink createLocationLink(DOMRange origin, DOMRange target) { Range originSelectionRange = null; if (origin instanceof DOMElement) { - originSelectionRange = selectStartTag((DOMElement) origin); + originSelectionRange = selectStartTagName((DOMElement) origin); } else { originSelectionRange = XMLPositionUtility.createRange(origin); } @@ -535,7 +606,7 @@ public static Range selectContent(int offset, DOMDocument document) { return createRange(element.getStartTagCloseOffset() + 1, element.getEndTagOpenOffset(), document); } // node has NO content (ex: , select the start tag - return selectStartTag(node); + return selectStartTagName(node); } else if (node.isText()) { DOMText text = (DOMText) node; return createRange(text.getStartContent(), text.getEndContent(), document); diff --git a/org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/XMLAssert.java b/org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/XMLAssert.java index a282d413b..c7ddf2755 100644 --- a/org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/XMLAssert.java +++ b/org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/XMLAssert.java @@ -524,7 +524,7 @@ public static void assertCodeActions(List actual, CodeAction... expe Assert.assertArrayEquals(expected, actual.toArray()); } - public static CodeAction ca(Diagnostic d, TextEdit te) { + public static CodeAction ca(Diagnostic d, TextEdit... te) { CodeAction codeAction = new CodeAction(); codeAction.setTitle(""); codeAction.setDiagnostics(Arrays.asList(d)); @@ -533,7 +533,7 @@ public static CodeAction ca(Diagnostic d, TextEdit te) { 0); TextDocumentEdit textDocumentEdit = new TextDocumentEdit(versionedTextDocumentIdentifier, - Collections.singletonList(te)); + Arrays.asList(te)); WorkspaceEdit workspaceEdit = new WorkspaceEdit(Collections.singletonList(Either.forLeft(textDocumentEdit))); codeAction.setEdit(workspaceEdit); return codeAction; diff --git a/org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/extensions/contentmodel/XMLSchemaDiagnosticsTest.java b/org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/extensions/contentmodel/XMLSchemaDiagnosticsTest.java index b07088d96..8a221b70d 100644 --- a/org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/extensions/contentmodel/XMLSchemaDiagnosticsTest.java +++ b/org.eclipse.lsp4xml/src/test/java/org/eclipse/lsp4xml/extensions/contentmodel/XMLSchemaDiagnosticsTest.java @@ -16,6 +16,7 @@ import static org.eclipse.lsp4xml.XMLAssert.testCodeActionsFor; import org.eclipse.lsp4j.Diagnostic; +import org.eclipse.lsp4j.TextEdit; import org.eclipse.lsp4xml.XMLAssert; import org.eclipse.lsp4xml.extensions.contentmodel.participants.XMLSchemaErrorCode; import org.eclipse.lsp4xml.extensions.contentmodel.settings.ContentModelSettings; @@ -90,10 +91,10 @@ public void cvc_type_4_Multiple_attributes() throws Exception { @Test public void cvc_complex_type_2_4_a() throws Exception { - String xml = "\r\n" - + // + " xsi:schemaLocation=\"http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd\">\r\n"+ // " \r\n" + // <- error ""; @@ -481,6 +482,75 @@ public void schema_reference_4_withSchemaLocation() { d(0, 1, 0, 9, XMLSchemaErrorCode.cvc_elt_1_a)); } + @Test + public void fuzzyElementNameCodeActionTest() throws Exception { + String xml = + "\r\n" + + " \r\n" + + " \r\n" + // should be 'module' + " \r\n" + + ""; + Diagnostic diagnostic = d(4, 7, 4, 13, XMLSchemaErrorCode.cvc_complex_type_2_4_a, + "Invalid element name:\n - bodule\n\nOne of the following is expected:\n - module\n\nError indicated by:\n {http://maven.apache.org/POM/4.0.0}\nwith code:"); + testDiagnosticsFor(xml, diagnostic); + + testCodeActionsFor(xml, diagnostic, ca(diagnostic, te(4, 7, 4, 13, "module"), te(4, 16, 4, 22, "module"))); + } + + @Test + public void fuzzyElementNamesWithOtherOptionsCodeActionTest() throws Exception { + String xml = + "\r\n" + + " \r\n" + + " \r\n" + // does not fuzzy match any, so provide code action for all possible + " \r\n" + + ""; + Diagnostic diagnostic = d(4, 7, 4, 16, XMLSchemaErrorCode.cvc_complex_type_2_4_a, + "Invalid element name:\n - XXXXXXXXX\n\nOne of the following is expected:\n - system\n - url\n - notifiers\n\nError indicated by:\n {http://maven.apache.org/POM/4.0.0}\nwith code:"); + testDiagnosticsFor(xml, diagnostic); + + testCodeActionsFor(xml, diagnostic, ca(diagnostic, te(4, 7, 4, 16, "notifiers"), te(4, 19, 4, 28, "notifiers")), ca(diagnostic, te(4, 7, 4, 16, "system"), te(4, 19, 4, 28, "system")), ca(diagnostic, te(4, 7, 4, 16, "url"), te(4, 19, 4, 28, "url"))); + } + + @Test + public void fuzzyElementNamesWithPrefix() throws Exception { + String xml = + " \n" + + " \n" + + ""; + Diagnostic diagnostic = d(6, 5, 6, 16, XMLSchemaErrorCode.cvc_complex_type_2_4_c, + "cvc-complex-type.2.4.c: The matching wildcard is strict, but no declaration can be found for element 'camel:beani'."); + testDiagnosticsFor(xml, diagnostic); + + testCodeActionsFor(xml, diagnostic, ca(diagnostic, te(6, 11, 6, 16, "bean"), te(6, 25, 6, 30, "bean")), + ca(diagnostic, te(6, 11, 6, 16, "beanio"), te(6, 25, 6, 30, "beanio"))); + } + + @Test + public void fuzzyElementNamesWithPrefixAndNoMatch() throws Exception { + String xml = + "\n" + + " \n" + + ""; + Diagnostic diagnostic = d(5, 4, 5, 17, XMLSchemaErrorCode.cvc_complex_type_2_4_c, "cvc-complex-type.2.4.c: The matching wildcard is strict, but no declaration can be found for element 'schemaA:XXXXX'."); + testDiagnosticsFor(xml, diagnostic); + testCodeActionsFor(xml, diagnostic, ca(diagnostic, te(5, 12, 5, 17, "AElement1"), te(5, 28, 5, 33, "AElement1")), + ca(diagnostic, te(5, 12, 5, 17, "AElement2"), te(5, 28, 5, 33, "AElement2"))); + } + private static void testDiagnosticsFor(String xml, Diagnostic... expected) { XMLAssert.testDiagnosticsFor(xml, "src/test/resources/catalogs/catalog.xml", expected); } @@ -491,3 +561,5 @@ private static void testDiagnosticsDisabledValidation(String xml) { } } + + \ No newline at end of file diff --git a/org.eclipse.lsp4xml/src/test/resources/xsd/fuzzyCodeAction/FuzzySchemaA.xsd b/org.eclipse.lsp4xml/src/test/resources/xsd/fuzzyCodeAction/FuzzySchemaA.xsd new file mode 100644 index 000000000..885e7a35b --- /dev/null +++ b/org.eclipse.lsp4xml/src/test/resources/xsd/fuzzyCodeAction/FuzzySchemaA.xsd @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/org.eclipse.lsp4xml/src/test/resources/xsd/fuzzyCodeAction/FuzzySchemaB.xsd b/org.eclipse.lsp4xml/src/test/resources/xsd/fuzzyCodeAction/FuzzySchemaB.xsd new file mode 100644 index 000000000..f5f037bf4 --- /dev/null +++ b/org.eclipse.lsp4xml/src/test/resources/xsd/fuzzyCodeAction/FuzzySchemaB.xsd @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file