From ba87bcc577bc28a61cda509aef45d24b4e1bf338 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Wed, 19 Dec 2012 09:27:30 -0500 Subject: [PATCH 01/14] add an initial QuickOpen test. --- src/search/QuickOpen.js | 1 + test/UnitTestSuite.js | 1 + test/spec/QuickOpen-test.js | 62 +++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+) create mode 100644 test/spec/QuickOpen-test.js diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index 1c05a166e09..f8d36fd41b4 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -23,6 +23,7 @@ /*jslint vars: true, plusplus: true, devel: true, nomen: true, indent: 4, maxerr: 50 */ /*global define, $, window, setTimeout */ +/*unittests: QuickOpen */ /* * Displays an auto suggest pop-up list of files to allow the user to quickly navigate to a file and lines diff --git a/test/UnitTestSuite.js b/test/UnitTestSuite.js index 7a0c57e7e07..d9838080b5f 100644 --- a/test/UnitTestSuite.js +++ b/test/UnitTestSuite.js @@ -47,6 +47,7 @@ define(function (require, exports, module) { require("spec/NativeFileSystem-test"); require("spec/PreferencesManager-test"); require("spec/ProjectManager-test"); + require("spec/QuickOpen-test"); require("spec/UpdateNotification-test"); require("spec/ViewUtils-test"); require("spec/WorkingSetView-test"); diff --git a/test/spec/QuickOpen-test.js b/test/spec/QuickOpen-test.js new file mode 100644 index 00000000000..d8cb870118a --- /dev/null +++ b/test/spec/QuickOpen-test.js @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012 Adobe Systems Incorporated. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + + +/*jslint vars: true, plusplus: true, devel: true, browser: true, nomen: true, indent: 4, maxerr: 50 */ +/*global define: false, require: false, describe: false, it: false, expect: false, beforeEach: false, afterEach: false, waitsFor: false, runs: false */ +/*unittests: QuickOpen */ + +define(function (require, exports, module) { + 'use strict'; + + var QuickOpen = require("search/QuickOpen"); + + describe("QuickOpen", function () { + describe("stringMatch", function () { + var stringMatch = QuickOpen.stringMatch; + + it("should return appropriate matching ranges", function () { + var result; + expect(stringMatch("foo/bar/baz.js", "bingo")).toBeUndefined(); + result = stringMatch("foo/bar/baz.js", "fbb.js"); + expect(result).not.toBeUndefined(); + expect(result.matchGoodness).toBeLessThan(-100); + var ranges = result.stringRanges; + expect(ranges.length).toBe(7); + + // verify the important bit of the ranges + var range = ranges.shift(); + expect(range.text).toBe("f"); + expect(range.matched).toBe(true); + + range = ranges.shift(); + expect(range.text).toBe("oo/"); + expect(range.matched).toBe(false); + + range = ranges.shift(); + expect(range.text).toBe("b"); + expect(range.matched).toBe(true); + }); + }); + }); +}); From 4f66e1b674b9628d8f7ea0d941bc7783c8e32763 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Wed, 19 Dec 2012 21:54:09 -0500 Subject: [PATCH 02/14] added longestCommonSubstring to quickopen file. this is just a checkpoint. my current plan is to remove that function because it slows things down too much --- src/search/QuickOpen.js | 58 ++++++++++++++++++++++++++++------ test/spec/QuickOpen-test.js | 62 +++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+), 9 deletions(-) diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index f8d36fd41b4..c8399574825 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -22,7 +22,7 @@ */ /*jslint vars: true, plusplus: true, devel: true, nomen: true, indent: 4, maxerr: 50 */ -/*global define, $, window, setTimeout */ +/*global define, $, window, setTimeout, ArrayBuffer, Int8Array */ /*unittests: QuickOpen */ /* @@ -468,6 +468,45 @@ define(function (require, exports, module) { function _boostForUpperCase(c) { return c.toUpperCase() === c ? 50 : 0; } + + // based on the dynamic programming algorithm here: + // http://en.wikipedia.org/wiki/Longest_common_substring_problem + function _longestCommonSubstring(str1, str2) { + var lengths = new Int8Array(new ArrayBuffer(str1.length * str2.length)); + var maxlength = 0; + var substring = ""; + var jlength = str2.length; + var i, j, index, pos1, pos2; + for (i = 0; i < str1.length; i++) { + for (j = 0; j < str2.length; j++) { + if (str1[i] === str2[j]) { + index = i * jlength + j; + if (i === 0 || j === 0) { + lengths[index] = 1; + } else { + lengths[index] = lengths[(i - 1) * jlength + j - 1] + 1; + } + + if (lengths[index] > maxlength) { + maxlength = lengths[index]; + substring += str1[i]; + pos1 = i - maxlength + 1; + pos2 = j - maxlength + 1; + } + } + } + } + + if (maxlength === 0) { + return undefined; + } else { + return { + length: maxlength, + position1: pos1, + position2: pos2 + }; + } + } /** * Performs matching of a string based on a query, and scores @@ -512,7 +551,7 @@ define(function (require, exports, module) { // Keeps track of the most specific segment that the current stringRange // is associated with. var rangeSegment = 0; - + // addToStringRanges is used when we transition between matched and unmatched // parts of the string. function addToStringRanges(numberOfCharacters, matched) { @@ -1020,11 +1059,12 @@ define(function (require, exports, module) { CommandManager.register(Strings.CMD_GOTO_DEFINITION, Commands.NAVIGATE_GOTO_DEFINITION, doDefinitionSearch); CommandManager.register(Strings.CMD_GOTO_LINE, Commands.NAVIGATE_GOTO_LINE, doGotoLine); - exports.beginSearch = beginSearch; - exports.addQuickOpenPlugin = addQuickOpenPlugin; - exports.SearchResult = SearchResult; - exports.stringMatch = stringMatch; - exports.basicMatchSort = basicMatchSort; - exports.multiFieldSort = multiFieldSort; - exports.highlightMatch = highlightMatch; + exports.beginSearch = beginSearch; + exports.addQuickOpenPlugin = addQuickOpenPlugin; + exports.SearchResult = SearchResult; + exports.stringMatch = stringMatch; + exports.basicMatchSort = basicMatchSort; + exports.multiFieldSort = multiFieldSort; + exports.highlightMatch = highlightMatch; + exports._longestCommonSubstring = _longestCommonSubstring; }); diff --git a/test/spec/QuickOpen-test.js b/test/spec/QuickOpen-test.js index d8cb870118a..7547039b783 100644 --- a/test/spec/QuickOpen-test.js +++ b/test/spec/QuickOpen-test.js @@ -32,6 +32,38 @@ define(function (require, exports, module) { var QuickOpen = require("search/QuickOpen"); describe("QuickOpen", function () { + describe("longestCommonSubstring", function () { + var lcs = QuickOpen._longestCommonSubstring; + + it("should find the right substrings", function () { + expect(lcs("Sub", "longestCommonSubstring")).toEqual({ + length: 3, + position1: 0, + position2: 13 + }); + }); + + it("should work the other way around", function () { + expect(lcs("longestCommonSubstring", "Sub")).toEqual({ + length: 3, + position1: 13, + position2: 0 + }); + }); + + it("should handle substrings properly", function () { + expect(lcs("longestCommonSubstring", "randomjunkSubmorejunk")).toEqual({ + length: 3, + position1: 13, + position2: 10 + }); + }); + + it("should handle no matches at all", function () { + expect(lcs("longestCommonSubstring", "zzz")).toBeUndefined(); + }); + }); + describe("stringMatch", function () { var stringMatch = QuickOpen.stringMatch; @@ -57,6 +89,36 @@ define(function (require, exports, module) { expect(range.text).toBe("b"); expect(range.matched).toBe(true); }); + + var goodRelativeOrdering = function (query, testStrings) { + var lastScore = -20000; + var goodOrdering = true; + testStrings.forEach(function (str) { + var result = stringMatch(str, query); + + // note that matchGoodness is expressed in negative numbers + if (result.matchGoodness < lastScore) { + goodOrdering = false; + } + lastScore = result.matchGoodness; + }); + return goodOrdering; + }; + + it("should place QuickOpen well relative to other quicks", function () { + expect(goodRelativeOrdering("quick", [ + "src/search/QuickOpen.js", + "test/spec/QuickOpen-test.js", + "samples/root/Getting Started/screenshots/brackets-quick-edit.png", + "src/extensions/default/QuickOpenCSS/main.js" + ])).toBe(true); + }); + it("should find the right spec/live", function () { + expect(goodRelativeOrdering("spec/live", [ + "test/spec/LiveDevelopment-test.js", + "test/spec/LiveDevelopment-chrome-user-data/Default/VisitedLinks" + ])).toBe(true); + }); }); }); }); From e654d09bf7a5ac4084e1235919263c95b93b757e Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Thu, 20 Dec 2012 14:59:40 -0500 Subject: [PATCH 03/14] fix 2068: rework quickopen logic to produce better results Created a new QuickOpen matching algorithm that searches for matches among "special characters" (path markers, camelCase changes) and does so left-to-right rather than right-to-left as in the old algorithm. The new algorithm does still give an added bonus to matches that occur within the filename, which is checked first. Also, there is now a collection of tests to try out the QuickOpen logic and ensure that it is working sanely. The scores look pretty sane now. In this commit, I added some code to help debug scores and was able to get the scores to be pretty sane and the results look quite nice. I also removed the dead code. fix a display bug when there is no query added comments for new QuickOpen algorithm. Note that in the process I spotted a bug and added a failing test, which I have not had a chance to fix yet. partial fix for a bug in the new quickopen logic fixed the bug with strings that are longer than the final segment fix an off-by-one problem that left an initial character match out of the last segment --- src/search/QuickOpen.js | 675 +++++++++++++++++++++++++----------- test/spec/QuickOpen-test.js | 280 +++++++++++++-- 2 files changed, 730 insertions(+), 225 deletions(-) diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index c8399574825..e5ac3a788d6 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -427,245 +427,505 @@ define(function (require, exports, module) { }; /** - * Helper functions for stringMatch score calculation. + * Helper functions for stringMatch at the heart of the QuickOpen + * matching. */ - /** - * The current scoring gives a boost for matches in the "most specific" (generally farthest right) - * segment of the string being tested against the query. + /* + * Identifies the "special" characters in the given string. + * Special characters for matching purposes are: + * + * * the first character + * * "/" and the character following the "/" + * * "." and "-" + * * an uppercase character that follows a lowercase one (think camelCase) + * + * The returned object contains an array called "specials". This array is + * a list of indexes into the original string where all of the special + * characters are. It also has a property "lastSegmentSpecialsIndex" which + * is an index into the specials array that denotes which location is the + * beginning of the last path segment. (This is used to allow scanning of + * the last segment's specials separately.) + * + * @param {string} input string to break apart (e.g. filename that is being searched) + * @return {Object} */ - function _adjustScoreForSegment(segmentCounter, score) { - if (segmentCounter === 0) { - // Multiplier used for matches within the most-specific part of the name (filename, for example) - return score * 3; - } else { - return score; + function findSpecialCharacters(str) { + var i, c; + + // the beginning of the string is always special + var specials = [0]; + + // lastSegmentSpecialsIndex starts off with the assumption that + // there are no segments + var lastSegmentSpecialsIndex = 0; + + // used to track down the camelCase changeovers + var lastWasLowerCase = false; + + for (i = 0; i < str.length; i++) { + c = str[i]; + if (c === "/") { + // new segment means this character and the next are special + specials.push(i++); + specials.push(i); + lastSegmentSpecialsIndex = specials.length - 1; + lastWasLowerCase = false; + } else { + // . and - are separators so they are + // special and so is the next character + if (c === "." || c === "-") { + specials.push(i++); + specials.push(i); + lastWasLowerCase = false; + } else if (c.toUpperCase() === c) { + // this is the check for camelCase changeovers + if (lastWasLowerCase) { + specials.push(i); + } + lastWasLowerCase = false; + } else { + lastWasLowerCase = true; + } + } } + return { + specials: specials, + lastSegmentSpecialsIndex: lastSegmentSpecialsIndex + }; } - /** - * Additional points are added when multiple characters in the string - * being tested match against query characters. - */ - function _boostForMatches(sequentialMatches) { - // Multiplier for the number of sequential matched characters - return sequentialMatches * sequentialMatches * 5; - } + // states used during the scanning of the string + var SPECIALS_COMPARE = 0; + var CONTIGUOUS_COMPARE = 1; + var SPECIALS_EXHAUSTED = 2; - /** - * The score is boosted for matches that occur at the beginning - * of a segment of string that is being tested against the query. - */ - function _boostForPathSegmentStart(sequentialMatches) { - // Multiplier for sequential matched characters at the beginning - // of a delimited section (after a '/' in a path, for example) - return sequentialMatches * sequentialMatches * 5; + // Scores can be hard to make sense of. The DEBUG_SCORES flag + // provides a way to peek into the parts that made up a score. + // This flag is also used in the unit tests. + var DEBUG_SCORES = false; + function _setDebugScores(ds) { + DEBUG_SCORES = ds; } - /** - * Upper case characters are boosted to help match MixedCase strings better. - */ - function _boostForUpperCase(c) { - return c.toUpperCase() === c ? 50 : 0; - } + // Constants for scoring + var SPECIAL_POINTS = 25; + var MATCH_POINTS = 10; + var LAST_SEGMENT_BOOST = 1; + var BEGINNING_OF_NAME_POINTS = 25; + var DEDUCTION_FOR_LENGTH = 0.2; + var CONSECUTIVE_MATCHES_POINTS = 25; - // based on the dynamic programming algorithm here: - // http://en.wikipedia.org/wiki/Longest_common_substring_problem - function _longestCommonSubstring(str1, str2) { - var lengths = new Int8Array(new ArrayBuffer(str1.length * str2.length)); - var maxlength = 0; - var substring = ""; - var jlength = str2.length; - var i, j, index, pos1, pos2; - for (i = 0; i < str1.length; i++) { - for (j = 0; j < str2.length; j++) { - if (str1[i] === str2[j]) { - index = i * jlength + j; - if (i === 0 || j === 0) { - lengths[index] = 1; - } else { - lengths[index] = lengths[(i - 1) * jlength + j - 1] + 1; - } - - if (lengths[index] > maxlength) { - maxlength = lengths[index]; - substring += str1[i]; - pos1 = i - maxlength + 1; - pos2 = j - maxlength + 1; - } - } - } - } + /* + * Finds the best matches between the query and the string. The query is + * compared with compareStr (generally a lower case string with a lower case + * query), but the results are returned based on str. + * + * Generally speaking, this function tries to find a "special" character + * (see findSpecialCharacters above) for the first character of the query. + * When it finds one, it then tries to look for consecutive characters that + * match. Once the characters stop matching, it tries to find a special + * character for the next query character. If a special character isn't found, + * it starts scanning sequentially. + * + * The returned object contains the following fields: + * * {Array} ranges: the scanned regions of the string, in order. For each range: + * * {string} text: the text for the string range + * * {boolean} matched: was this range part of the match? + * * {boolean} lastSegment: is this range part of the last segment of str + * * {int} matchGoodness: the score computed for this match + * * (optional) {Object} scoreDebug: if DEBUG_SCORES is true, an object with the score broken down + * + * @param {string} query the search string (generally lower cased) + * @param {string} str the original string to search + * @param {string} compareStr the string to compare with (generally lower cased) + * @param {Array} specials list of special indexes in str (from findSpecialCharacters) + * @param {int} startingSpecial optional index into specials array to start scanning with + * @param {boolean} lastSegmentStart optional which character does the last segment start at + * @return {Object} matched ranges and score + */ + function getMatchRanges(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { + var ranges = []; - if (maxlength === 0) { - return undefined; - } else { - return { - length: maxlength, - position1: pos1, - position2: pos2 + var score = 0; + var scoreDebug; + if (DEBUG_SCORES) { + scoreDebug = { + special: 0, + match: 0, + lastSegment: 0, + beginning: 0, + lengthDeduction: 0, + consecutive: 0 }; } - } - - /** - * Performs matching of a string based on a query, and scores - * the result based on specificity (assuming that the rightmost - * side of the input is the most specific) and how clustered the - * query characters are in the input string. The matching is - * case-insensitive, but case is taken into account in the scoring. - * - * If the query characters cannot be found in order (but not necessarily all together), - * undefined is returned. - * - * The returned SearchResult has a matchGoodness score that can be used - * for sorting. It also has a stringRanges array, each entry with - * "text", "matched" and "segment". If you string the "text" properties together, you will - * get the original str. Using the matched property, you can highlight - * the string matches. The segment property tells you the most specific (rightmost) - * segment covered by the range, though there may be more than one segment covered. - * Segments are currently determined by "/"s. - * - * Use basicMatchSort() to sort the filtered results taking this ranking into account. - * The label of the SearchResult is set to 'str'. - * @param {!string} str - * @param {!string} query - * @return {?SearchResult} - */ - function stringMatch(str, query) { - var result; - // start at the end and work backward, because we give preference - // to matches in the name (last) segment - var strCounter = str.length - 1; + // normalize the optional parameters + if (!lastSegmentStart) { + lastSegmentStart = 0; + } - // stringRanges are used to keep track of which parts of - // the input str matched the query - var stringRanges = []; + if (startingSpecial === undefined) { + startingSpecial = 0; + } - // segmentCounter tracks which "segment" (delimited section) of the - // str we are in so that we can treat certain (generally most-specific) segments - // specially. - var segmentCounter = 0; + var specialsCounter = startingSpecial; - // Keeps track of the most specific segment that the current stringRange - // is associated with. - var rangeSegment = 0; - - // addToStringRanges is used when we transition between matched and unmatched - // parts of the string. - function addToStringRanges(numberOfCharacters, matched) { - var segment = rangeSegment; - rangeSegment = segmentCounter; - stringRanges.unshift({ - text: str.substr(strCounter + 1, numberOfCharacters), - matched: matched, - segment: segment - }); - } - - // No query? Short circuit the normal work done and just - // return a single range that covers the whole string. - if (!query) { - result = new SearchResult(str); - result.matchGoodness = 0; - strCounter = -1; - addToStringRanges(str.length, false); - result.stringRanges = stringRanges; - return result; - } + // strCounter and queryCounter are the indexes used for pulling characters + // off of the str/compareStr and query. + var strCounter = specials[startingSpecial]; + var queryCounter = 0; - var queryChars = query.toLowerCase().split(""); + // initial state is to scan through the special characters + var state = SPECIALS_COMPARE; - // start at the end of the query - var queryCounter = queryChars.length - 1; - - var score = 0; + // currentRange keeps track of the range we are adding characters to now + var currentRange = null; + var lastSegmentScore = 0; - // sequentialMatches is positive when we are stepping through matched - // characters and negative when stepping through unmatched characters - var sequentialMatches = 0; + // the character index (against str) that was last in a matched range. this is used for + // adding unmatched ranges in between + var lastRangeCharacter = strCounter - 1; - while (strCounter >= 0 && queryCounter >= 0) { - var curChar = str.charAt(strCounter); + // variable to award bonus points for consecutive matching characters. We keep track of the + // last matched character. + var lastMatchedIndex = -2; + + // Records the current range and adds any additional ranges required to + // get to character index c. This function is called before starting a new range + // or returning from the function. + function closeRangeGap(c) { + // close the current range + if (currentRange) { + currentRange.lastSegment = lastRangeCharacter >= lastSegmentStart; + if (currentRange.matched && currentRange.lastSegment) { + if (DEBUG_SCORES) { + scoreDebug.lastSegment += lastSegmentScore * LAST_SEGMENT_BOOST; + } + score += lastSegmentScore * LAST_SEGMENT_BOOST; + } + ranges.push(currentRange); + } - // Ideally, this function will work with different delimiters used in - // different contexts. For now, this is used for paths delimited by '/'. - if (curChar === '/') { - // Beginning of segment, apply boost for a matching - // string of characters, if there is one - if (sequentialMatches > 0) { - score += _boostForPathSegmentStart(sequentialMatches); + // if there was space between the new range and the last, + // add a new unmatched range before the new range can be added. + if (lastRangeCharacter + 1 < c) { + ranges.push({ + text: str.substring(lastRangeCharacter + 1, c), + matched: false, + lastSegment: c > lastSegmentStart + }); + } + currentRange = null; + lastSegmentScore = 0; + } + + // records that the character at index c (of str) matches, adding + // that character to the current range or starting a new one. + function addMatch(c) { + var newPoints = 0; + + // A match means that we need to do some scoring bookkeeping. + if (DEBUG_SCORES) { + scoreDebug.match += MATCH_POINTS; + } + newPoints += MATCH_POINTS; + + // a bonus is given for characters that match at the beginning + // of the filename + if (c === 0 && (strCounter > lastSegmentStart)) { + if (DEBUG_SCORES) { + scoreDebug.beginning += BEGINNING_OF_NAME_POINTS; } - - score = _adjustScoreForSegment(segmentCounter, score); - segmentCounter++; + newPoints += BEGINNING_OF_NAME_POINTS; } - if (queryChars[queryCounter] === curChar.toLowerCase()) { - - score += _boostForUpperCase(curChar); - - // are we ending a string of unmatched characters? - if (sequentialMatches < 0) { - addToStringRanges(-sequentialMatches, false); - sequentialMatches = 0; + // If the new character immediately follows the last matched character, + // we award the consecutive matches bonus. + if (lastMatchedIndex + 1 === c) { + if (DEBUG_SCORES) { + scoreDebug.consecutive += CONSECUTIVE_MATCHES_POINTS; } - - // matched character, chalk up another match - // and move both counters back a character - sequentialMatches++; - queryCounter--; - strCounter--; + newPoints += CONSECUTIVE_MATCHES_POINTS; + } + + score += newPoints; + if (c > lastSegmentStart) { + lastSegmentScore += newPoints; + } + lastMatchedIndex = c; + + // if the last range wasn't a match or there's a gap, we need to close off + // the range to start a new one. + if ((currentRange && !currentRange.matched) || c > lastRangeCharacter + 1) { + closeRangeGap(c); + } + + // set up a new match range or add to the current one + if (!currentRange) { + currentRange = { + text: str[c], + matched: true + }; + } else { + currentRange.text += str[c]; + } + lastRangeCharacter = c; + } + + // Compares the current character from the query string against the + // special characters in compareStr. + function compareSpecials() { + // used to loop through the specials + var i; + + // used to keep track of where we are in compareStr + // without overriding strCounter until we have a match + var tempsc; + var foundMatch = false; + for (i = specialsCounter; i < specials.length; i++) { + tempsc = specials[i]; + // first, check to see if strCounter has moved beyond + // the current special character. This is possible + // if the contiguous comparison continued on through + // the next special + if (tempsc < strCounter) { + specialsCounter = i; + } else if (query[queryCounter] === compareStr[tempsc]) { + // we have a match! do the required tracking + specialsCounter++; + queryCounter++; + strCounter = tempsc; + if (DEBUG_SCORES) { + scoreDebug.special += SPECIAL_POINTS; + } + score += SPECIAL_POINTS; + addMatch(strCounter++); + foundMatch = true; + break; + } + } + + // when we find a match, we switch to looking for consecutive matching characters + if (foundMatch) { + state = CONTIGUOUS_COMPARE; } else { - // are we ending a string of matched characters? - if (sequentialMatches > 0) { - addToStringRanges(sequentialMatches, true); - score += _boostForMatches(sequentialMatches); - sequentialMatches = 0; + // we didn't find a match among the specials + state = SPECIALS_EXHAUSTED; + } + } + + // keep looping until we've either exhausted the query or the string + while (queryCounter < query.length && strCounter < str.length) { + if (state === SPECIALS_COMPARE) { + compareSpecials(); + } else if (state === CONTIGUOUS_COMPARE || state === SPECIALS_EXHAUSTED) { + // for both of these states, we're looking character by character + // for matches + if (query[queryCounter] === compareStr[strCounter]) { + // got a match! record it, and switch to CONTIGUOUS_COMPARE + // in case we had been in SPECIALS_EXHAUSTED state + queryCounter++; + addMatch(strCounter++); + state = CONTIGUOUS_COMPARE; + } else { + // no match. If we were in CONTIGUOUS_COMPARE mode, we + // we switch to looking for specials again. If we've + // already exhaused the specials, we're just going to keep + // stepping through compareStr. + if (state !== SPECIALS_EXHAUSTED) { + compareSpecials(); + } else { + strCounter++; + } } - // character didn't match, apply sequential matches - // to score and keep looking - strCounter--; - sequentialMatches--; } } - // if there are still query characters left, we don't - // have a match - if (queryCounter >= 0) { - return undefined; + var result; + // Add the rest of the string ranges + closeRangeGap(str.length); + + // It's not a match if we still have query string left. + if (queryCounter < query.length) { + result = null; + } else { + result = { + matchGoodness: score, + ranges: ranges + }; + if (DEBUG_SCORES) { + result.scoreDebug = scoreDebug; + } + } + return result; + } + + /* + * Seek out the best match in the last segment (generally the filename). + * Matches in the filename are preferred, but the query entered could match + * any part of the path. So, we find the best match we can get in the filename + * and then allow for searching the rest of the string with any characters that + * are left from the beginning of the query. + * + * The parameters and return value are the same as for getMatchRanges, + * except this function is always working on the last segment and the + * result can optionally include a remainder, which is the characters + * at the beginning of the query which did not match in the last segment. + */ + function lastSegmentSearch(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { + var queryCounter, matchRanges; + + // It's possible that the query is longer than the last segment. + // If so, we can chop off the bit that we know couldn't possibly be there. + var remainder = ""; + var extraCharacters = specials[startingSpecial] + query.length - str.length; + + if (extraCharacters > 0) { + remainder = query.substring(0, extraCharacters); + query = query.substring(extraCharacters); } - if (sequentialMatches) { - addToStringRanges(Math.abs(sequentialMatches), sequentialMatches > 0); + for (queryCounter = 0; queryCounter < query.length; queryCounter++) { + matchRanges = getMatchRanges(query.substring(queryCounter), + str, compareStr, specials, startingSpecial, lastSegmentStart); + + // if we've got a match *or* there are no segments in this string, we're done + if (matchRanges !== null || !startingSpecial) { + break; + } } - if (strCounter >= 0) { - stringRanges.unshift({ - text: str.substring(0, strCounter + 1), - matched: false, - segment: rangeSegment - }); + if (queryCounter === query.length || !matchRanges) { + return null; + } else { + matchRanges.remainder = remainder + query.substring(0, queryCounter); + return matchRanges; } + } + + /* + * Implements the top-level search algorithm. Search the last segment first, + * then search the rest of the string with the remainder. + * + * The parameters and return value are the same as for getMatchRanges. + */ + function orderedCompare(query, str, compareStr, specials, lastSegmentSpecialsIndex) { + var lastSegmentStart = specials[lastSegmentSpecialsIndex]; + var result; - // now, we need to apply any score we've accumulated - // before we ran out of query characters - score += _boostForMatches(sequentialMatches); + if (lastSegmentStart + query.length < str.length) { + result = lastSegmentSearch(query, str, compareStr, specials, lastSegmentSpecialsIndex, lastSegmentStart); + } - if (sequentialMatches && strCounter >= 0) { - if (str.charAt(strCounter) === '/') { - score += _boostForPathSegmentStart(sequentialMatches); + if (result) { + // Do we have more query characters that did not fit? + if (result.remainder) { + // Scan with the remainder only through the beginning of the last segment + var result2 = getMatchRanges(result.remainder, str.substring(0, lastSegmentStart), + compareStr.substring(0, lastSegmentStart), + specials.slice(0, lastSegmentSpecialsIndex), 0, lastSegmentStart); + + if (result2) { + // We have a match + // This next part deals with scoring for the case where + // there are consecutive matched characters at the border of the + // last segment. + var lastRange = result2.ranges[result2.ranges.length - 1]; + if (result.ranges[0].matched && lastRange.matched) { + result.matchGoodness += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; + if (DEBUG_SCORES) { + result.scoreDebug.consecutive += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; + } + } + + // add the new matched ranges to the beginning of the set of ranges we had + result.ranges.unshift.apply(result.ranges, result2.ranges); + } else { + // no match + return null; + } + } else { + // There was no remainder, which means that the whole match is in the + // last segment. We need to add a range at the beginning for everything + // that came before the last segment. + result.ranges.unshift({ + text: str.substring(0, lastSegmentStart), + matched: false, + lastSegment: false + }); } + delete result.remainder; + } else { + // No match in the last segment, so we start over searching the whole + // string + result = getMatchRanges(query, str, compareStr, specials, 0, lastSegmentStart); } - score = _adjustScoreForSegment(segmentCounter, score); - // Produce a SearchResult that is augmented with matchGoodness - // (used for sorting) and stringRanges (used for highlighting - // matched areas of the string) - result = new SearchResult(str); - result.matchGoodness = -1 * score; - result.stringRanges = stringRanges; + if (result) { + var lengthPenalty = -1 * Math.round(str.length * DEDUCTION_FOR_LENGTH); + if (DEBUG_SCORES) { + result.scoreDebug.lengthDeduction = lengthPenalty; + } + result.matchGoodness = result.matchGoodness + lengthPenalty; + } + return result; + } + + /* + * Match str against the query using the QuickOpen algorithm provided by + * the functions above. The general idea is to prefer matches in the last + * segment (the filename) and matches of "special" characters. stringMatch + * will try to provide the best match and produces a "matchGoodness" score + * to allow for relative ranking. + * + * The result object returned includes "stringRanges" which can be used to highlight + * the matched portions of the string, in addition to the "matchGoodness" + * mentioned above. If DEBUG_SCORES is true, scoreDebug is set on the result + * to provide insight into the score. + * + * The matching is done in a case-insensitive manner. + * + * @param {string} str The string to search + * @param {string} query The query string to find in string + */ + function stringMatch(str, query) { + var result; + + // No query? Short circuit the normal work done and just + // return a single range that covers the whole string. + if (!query) { + result = new SearchResult(str); + result.matchGoodness = 0; + if (DEBUG_SCORES) { + result.scoreDebug = {}; + } + result.stringRanges = [{ + text: str, + matched: false, + lastSegment: true + }]; + return result; + } + + // Locate the special characters and then use orderedCompare to do the real + // work. + var special = findSpecialCharacters(str); + var compareData = orderedCompare(query.toLowerCase(), str, str.toLowerCase(), special.specials, + special.lastSegmentSpecialsIndex); + + // If we get a match, turn this into a SearchResult as expected by the consumers + // of this API. + if (compareData) { + result = new SearchResult(str); + result.stringRanges = compareData.ranges; + result.matchGoodness = -1 * compareData.matchGoodness; + if (DEBUG_SCORES) { + result.scoreDebug = compareData.scoreDebug; + } + } else { + result = undefined; + } return result; } @@ -810,11 +1070,17 @@ define(function (require, exports, module) { stringRanges = [{ text: label, matched: false, - segment: 0 + lastSegment: true }]; } var displayName = ""; + if (DEBUG_SCORES) { + var sd = item.scoreDebug; + displayName += '(' + item.matchGoodness + ') '; + } // Put the path pieces together, highlighting the matched parts stringRanges.forEach(function (range) { @@ -822,7 +1088,7 @@ define(function (require, exports, module) { displayName += ""; } - var rangeText = rangeFilter ? rangeFilter(range.segment, range.text) : range.text; + var rangeText = rangeFilter ? rangeFilter(range.lastSegment, range.text) : range.text; displayName += StringUtils.breakableUrl(StringUtils.htmlEscape(rangeText)); if (range.matched) { @@ -841,8 +1107,8 @@ define(function (require, exports, module) { function _filenameResultsFormatter(item, query) { // For main label, we just want filename: drop most of the string - function fileNameFilter(segment, rangeText) { - if (segment === 0) { + function fileNameFilter(lastSegment, rangeText) { + if (lastSegment) { var rightmostSlash = rangeText.lastIndexOf('/'); return rangeText.substring(rightmostSlash + 1); // safe even if rightmostSlash is -1 } else { @@ -1066,5 +1332,8 @@ define(function (require, exports, module) { exports.basicMatchSort = basicMatchSort; exports.multiFieldSort = multiFieldSort; exports.highlightMatch = highlightMatch; - exports._longestCommonSubstring = _longestCommonSubstring; + exports._findSpecialCharacters = findSpecialCharacters; + exports._orderedCompare = orderedCompare; + exports._lastSegmentSearch = lastSegmentSearch; + exports._setDebugScores = _setDebugScores; }); diff --git a/test/spec/QuickOpen-test.js b/test/spec/QuickOpen-test.js index 7547039b783..462d370f8a4 100644 --- a/test/spec/QuickOpen-test.js +++ b/test/spec/QuickOpen-test.js @@ -23,7 +23,7 @@ /*jslint vars: true, plusplus: true, devel: true, browser: true, nomen: true, indent: 4, maxerr: 50 */ -/*global define: false, require: false, describe: false, it: false, expect: false, beforeEach: false, afterEach: false, waitsFor: false, runs: false */ +/*global define: false, require: false, describe: false, it: false, xit: false, expect: false, beforeEach: false, afterEach: false, waitsFor: false, runs: false, jasmine: false */ /*unittests: QuickOpen */ define(function (require, exports, module) { @@ -32,35 +32,209 @@ define(function (require, exports, module) { var QuickOpen = require("search/QuickOpen"); describe("QuickOpen", function () { - describe("longestCommonSubstring", function () { - var lcs = QuickOpen._longestCommonSubstring; - - it("should find the right substrings", function () { - expect(lcs("Sub", "longestCommonSubstring")).toEqual({ - length: 3, - position1: 0, - position2: 13 + QuickOpen._setDebugScores(false); + describe("findSpecialCharacters", function () { + it("should find the important match characters in the string", function () { + var fSC = QuickOpen._findSpecialCharacters; + expect(fSC("src/document/DocumentCommandHandler.js")).toEqual({ + lastSegmentSpecialsIndex: 4, + specials: [0, 3, 4, 12, 13, 21, 28, 35, 36] + }); + + expect(fSC("foobar.js")).toEqual({ + lastSegmentSpecialsIndex: 0, + specials: [0, 6, 7] }); }); - - it("should work the other way around", function () { - expect(lcs("longestCommonSubstring", "Sub")).toEqual({ - length: 3, - position1: 13, - position2: 0 + }); + + describe("_lastSegmentSearch", function () { + it("should compare results in the final segment properly", function () { + var path = "src/document/DocumentCommandHandler.js"; + var comparePath = path.toLowerCase(); + var _lastSegmentSearch = QuickOpen._lastSegmentSearch; + var sc = QuickOpen._findSpecialCharacters(path); + expect(_lastSegmentSearch("d", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + remainder: "", + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "D", matched: true, lastSegment: true }, + { text: "ocumentCommandHandler.js", matched: false, lastSegment: true } + ] + }); + + expect(_lastSegmentSearch("do", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + remainder: "", + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "Do", matched: true, lastSegment: true }, + { text: "cumentCommandHandler.js", matched: false, lastSegment: true } + ] + }); + + expect(_lastSegmentSearch("doc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + remainder: "", + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "Doc", matched: true, lastSegment: true }, + { text: "umentCommandHandler.js", matched: false, lastSegment: true } + ] + }); + + expect(_lastSegmentSearch("docc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + remainder: "", + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "Doc", matched: true, lastSegment: true }, + { text: "ument", matched: false, lastSegment: true }, + { text: "C", matched: true, lastSegment: true }, + { text: "ommandHandler.js", matched: false, lastSegment: true } + ] + }); + + expect(_lastSegmentSearch("docch", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + remainder: "", + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "Doc", matched: true, lastSegment: true }, + { text: "ument", matched: false, lastSegment: true }, + { text: "C", matched: true, lastSegment: true }, + { text: "ommand", matched: false, lastSegment: true }, + { text: "H", matched: true, lastSegment: true }, + { text: "andler.js", matched: false, lastSegment: true } + ] }); + + expect(_lastSegmentSearch("docch.js", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + remainder: "", + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "Doc", matched: true, lastSegment: true }, + { text: "ument", matched: false, lastSegment: true }, + { text: "C", matched: true, lastSegment: true }, + { text: "ommand", matched: false, lastSegment: true }, + { text: "H", matched: true, lastSegment: true }, + { text: "andler", matched: false, lastSegment: true }, + { text: ".js", matched: true, lastSegment: true } + ] + }); + + expect(_lastSegmentSearch("ocu", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + remainder: "", + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "D", matched: false, lastSegment: true }, + { text: "ocu", matched: true, lastSegment: true }, + { text: "mentCommandHandler.js", matched: false, lastSegment: true } + ] + }); + + expect(_lastSegmentSearch("ocuha", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + remainder: "", + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "D", matched: false, lastSegment: true }, + { text: "ocu", matched: true, lastSegment: true }, + { text: "mentCommand", matched: false, lastSegment: true }, + { text: "Ha", matched: true, lastSegment: true }, + { text: "ndler.js", matched: false, lastSegment: true } + ] + }); + + expect(_lastSegmentSearch("z", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + expect(_lastSegmentSearch("ocuz", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + + expect(_lastSegmentSearch("sdoc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + matchGoodness: jasmine.any(Number), + remainder: "s", + ranges: [ + { text: "Doc", matched: true, lastSegment: true }, + { text: "umentCommandHandler.js", matched: false, lastSegment: true } + ] + }); + }); + + it("should handle weird comparisons as well", function () { + // this is a special case, to ensure that a "special" character following + // other matched characters doesn't become a match + // note that the d's in Command and Handler have been removed + var path = "DocumentCommanHanler.js"; + var comparePath = path.toLowerCase(); + var _lastSegmentSearch = QuickOpen._lastSegmentSearch; + var sc = QuickOpen._findSpecialCharacters(path); + expect(_lastSegmentSearch("ocud", path, comparePath, sc.specials)).toEqual(null); }); - it("should handle substrings properly", function () { - expect(lcs("longestCommonSubstring", "randomjunkSubmorejunk")).toEqual({ - length: 3, - position1: 13, - position2: 10 + it("should compare matches that don't fit in just the final segment", function () { + var path = "src/document/DocumentCommandHandler.js"; + var comparePath = path.toLowerCase(); + var orderedCompare = QuickOpen._orderedCompare; + var sc = QuickOpen._findSpecialCharacters(path); + expect(orderedCompare("sdoc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "s", matched: true, lastSegment: false }, + { text: "rc/document/", matched: false, lastSegment: false }, + { text: "Doc", matched: true, lastSegment: true }, + { text: "umentCommandHandler.js", matched: false, lastSegment: true } + ] + }); + + expect(orderedCompare("doc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "src/document/", matched: false, lastSegment: false }, + { text: "Doc", matched: true, lastSegment: true }, + { text: "umentCommandHandler.js", matched: false, lastSegment: true } + ] + }); + expect(orderedCompare("z", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + + expect(orderedCompare("docdoc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "src/", matched: false, lastSegment: false }, + { text: "doc", matched: true, lastSegment: false }, + { text: "ument/", matched: false, lastSegment: false }, + { text: "Doc", matched: true, lastSegment: true }, + { text: "umentCommandHandler.js", matched: false, lastSegment: true } + ] }); }); - it("should handle no matches at all", function () { - expect(lcs("longestCommonSubstring", "zzz")).toBeUndefined(); + it("should handle matches that don't fit at all in the final segment", function () { + var path = "src/extensions/default/QuickOpenCSS/main.js"; + var comparePath = path.toLowerCase(); + var orderedCompare = QuickOpen._orderedCompare; + var sc = QuickOpen._findSpecialCharacters(path); + expect(orderedCompare("quick", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "src/extensions/default/", matched: false, lastSegment: false }, + { text: "Quick", matched: true, lastSegment: false }, + { text: "OpenCSS/main.js", matched: false, lastSegment: true } + ] + }); + + expect(orderedCompare("quickopen", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "src/extensions/default/", matched: false, lastSegment: false }, + { text: "QuickOpen", matched: true, lastSegment: false }, + { text: "CSS/main.js", matched: false, lastSegment: true } + ] + }); + + expect(orderedCompare("quickopenain", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "src/extensions/default/", matched: false, lastSegment: false }, + { text: "QuickOpen", matched: true, lastSegment: false }, + { text: "CSS/m", matched: false, lastSegment: true }, + { text: "ain", matched: true, lastSegment: true }, + { text: ".js", matched: false, lastSegment: true } + ] + }); }); }); @@ -88,6 +262,22 @@ define(function (require, exports, module) { range = ranges.shift(); expect(range.text).toBe("b"); expect(range.matched).toBe(true); + + result = stringMatch("src/extensions/default/QuickOpenCSS/main.js", "quick"); + ranges = result.stringRanges; + expect(ranges.length).toBe(3); + + expect(stringMatch("src/search/QuickOpen.js", "qo")).toEqual({ + matchGoodness: jasmine.any(Number), + label: "src/search/QuickOpen.js", + stringRanges: [ + { text: "src/search/", matched: false, lastSegment: false }, + { text: "Q", matched: true, lastSegment: true }, + { text: "uick", matched: false, lastSegment: true }, + { text: "O", matched: true, lastSegment: true }, + { text: "pen.js", matched: false, lastSegment: true } + ] + }); }); var goodRelativeOrdering = function (query, testStrings) { @@ -119,6 +309,52 @@ define(function (require, exports, module) { "test/spec/LiveDevelopment-chrome-user-data/Default/VisitedLinks" ])).toBe(true); }); + it("should find the right samples/index", function () { + expect(goodRelativeOrdering("samples/index", [ + "samples/de/Erste Schritte/index.html", + "src/thirdparty/CodeMirror2/mode/ntriples/index.html" + ])).toBe(true); + }); + it("should find the right Commands", function () { + expect(goodRelativeOrdering("Commands", [ + "src/command/Commands.js", + "src/command/CommandManager.js" + ])).toBe(true); + }); + it("should find the right extensions", function () { + expect(goodRelativeOrdering("extensions", [ + "src/utils/ExtensionLoader.js", + "src/extensions/default/RecentProjects/styles.css" + ])).toBe(true); + }); + it("should find the right EUtil", function () { + expect(goodRelativeOrdering("EUtil", [ + "src/editor/EditorUtils.js", + "src/utils/ExtensionUtils.js", + "src/file/FileUtils.js" + ])); + }); + }); + + describe("scoring", function () { + beforeEach(function () { + QuickOpen._setDebugScores(true); + }); + + afterEach(function () { + QuickOpen._setDebugScores(false); + }); + + it("should score consecutive matches across the last segment", function () { + var result1 = QuickOpen.stringMatch("test/spec/LiveDevelopment-test.js", "spec/live"); + var result2 = QuickOpen.stringMatch("test/spec/live/foobar.js", "spec/live"); + expect(result2.scoreDebug.consecutive).toEqual(result1.scoreDebug.consecutive); + }); + + it("should boost last segment matches, even when searching the whole string", function () { + var result = QuickOpen.stringMatch("src/extensions/default/QuickOpenCSS/main.js", "quickopenain"); + expect(result.scoreDebug.lastSegment).toBeGreaterThan(0); + }); }); }); }); From 751e425664b915c880e783ba2c03bf66dafec776 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Tue, 8 Jan 2013 11:00:20 -0500 Subject: [PATCH 04/14] first round of cleanup based on pflynn's review feedback. checkpointing here because I'm going to work on some changes that I might want to roll back (merging lastMatchedIndex and lastRangeCharacter) --- src/search/QuickOpen.js | 78 ++++++++++++++++++++++--------------- test/spec/QuickOpen-test.js | 27 +++++++------ 2 files changed, 61 insertions(+), 44 deletions(-) diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index e5ac3a788d6..103f9263e68 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -437,7 +437,7 @@ define(function (require, exports, module) { * * * the first character * * "/" and the character following the "/" - * * "." and "-" + * * "_", "." and "-" and the character following it * * an uppercase character that follows a lowercase one (think camelCase) * * The returned object contains an array called "specials". This array is @@ -448,7 +448,7 @@ define(function (require, exports, module) { * the last segment's specials separately.) * * @param {string} input string to break apart (e.g. filename that is being searched) - * @return {Object} + * @return {{specials:Array., lastSegmentSpecialsIndex:number}} */ function findSpecialCharacters(str) { var i, c; @@ -471,22 +471,20 @@ define(function (require, exports, module) { specials.push(i); lastSegmentSpecialsIndex = specials.length - 1; lastWasLowerCase = false; - } else { - // . and - are separators so they are + } else if (c === "." || c === "-" || c === "_") { + // _, . and - are separators so they are // special and so is the next character - if (c === "." || c === "-") { - specials.push(i++); + specials.push(i++); + specials.push(i); + lastWasLowerCase = false; + } else if (c.toUpperCase() === c) { + // this is the check for camelCase changeovers + if (lastWasLowerCase) { specials.push(i); - lastWasLowerCase = false; - } else if (c.toUpperCase() === c) { - // this is the check for camelCase changeovers - if (lastWasLowerCase) { - specials.push(i); - } - lastWasLowerCase = false; - } else { - lastWasLowerCase = true; } + lastWasLowerCase = false; + } else { + lastWasLowerCase = true; } } return { @@ -502,7 +500,7 @@ define(function (require, exports, module) { // Scores can be hard to make sense of. The DEBUG_SCORES flag // provides a way to peek into the parts that made up a score. - // This flag is also used in the unit tests. + // This flag is used for manual debugging and in the unit tests only. var DEBUG_SCORES = false; function _setDebugScores(ds) { DEBUG_SCORES = ds; @@ -540,9 +538,9 @@ define(function (require, exports, module) { * @param {string} str the original string to search * @param {string} compareStr the string to compare with (generally lower cased) * @param {Array} specials list of special indexes in str (from findSpecialCharacters) - * @param {int} startingSpecial optional index into specials array to start scanning with + * @param {int} startingSpecial index into specials array to start scanning with * @param {boolean} lastSegmentStart optional which character does the last segment start at - * @return {Object} matched ranges and score + * @return {{ranges:Array.{text:string, matched:boolean, lastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ function getMatchRanges(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { var ranges = []; @@ -589,7 +587,7 @@ define(function (require, exports, module) { // variable to award bonus points for consecutive matching characters. We keep track of the // last matched character. - var lastMatchedIndex = -2; + var lastMatchedIndex = -1; // Records the current range and adds any additional ranges required to // get to character index c. This function is called before starting a new range @@ -772,8 +770,16 @@ define(function (require, exports, module) { * except this function is always working on the last segment and the * result can optionally include a remainder, which is the characters * at the beginning of the query which did not match in the last segment. + * + * @param {string} query the search string (generally lower cased) + * @param {string} str the original string to search + * @param {string} compareStr the string to compare with (generally lower cased) + * @param {Array} specials list of special indexes in str (from findSpecialCharacters) + * @param {int} startingSpecial index into specials array to start scanning with + * @param {boolean} lastSegmentStart which character does the last segment start at + * @return {{ranges:Array.{text:string, matched:boolean, lastSegment:boolean}, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ - function lastSegmentSearch(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { + function _lastSegmentSearch(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { var queryCounter, matchRanges; // It's possible that the query is longer than the last segment. @@ -791,7 +797,7 @@ define(function (require, exports, module) { str, compareStr, specials, startingSpecial, lastSegmentStart); // if we've got a match *or* there are no segments in this string, we're done - if (matchRanges !== null || !startingSpecial) { + if (matchRanges || startingSpecial === 0) { break; } } @@ -809,29 +815,39 @@ define(function (require, exports, module) { * then search the rest of the string with the remainder. * * The parameters and return value are the same as for getMatchRanges. + * + * @param {string} query the search string (will be searched lower case) + * @param {string} str the original string to search + * @param {Array} specials list of special indexes in str (from findSpecialCharacters) + * @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with + * @return {{ranges:Array.{text:string, matched:boolean, lastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ - function orderedCompare(query, str, compareStr, specials, lastSegmentSpecialsIndex) { + function _computeMatch(query, str, specials, lastSegmentSpecialsIndex) { + // set up query as all lower case and make a lower case string to use for comparisons + query = query.toLowerCase(); + var compareStr = str.toLowerCase(); + var lastSegmentStart = specials[lastSegmentSpecialsIndex]; var result; if (lastSegmentStart + query.length < str.length) { - result = lastSegmentSearch(query, str, compareStr, specials, lastSegmentSpecialsIndex, lastSegmentStart); + result = _lastSegmentSearch(query, str, compareStr, specials, lastSegmentSpecialsIndex, lastSegmentStart); } if (result) { // Do we have more query characters that did not fit? if (result.remainder) { // Scan with the remainder only through the beginning of the last segment - var result2 = getMatchRanges(result.remainder, str.substring(0, lastSegmentStart), + var remainderResult = getMatchRanges(result.remainder, str.substring(0, lastSegmentStart), compareStr.substring(0, lastSegmentStart), specials.slice(0, lastSegmentSpecialsIndex), 0, lastSegmentStart); - if (result2) { + if (remainderResult) { // We have a match // This next part deals with scoring for the case where // there are consecutive matched characters at the border of the // last segment. - var lastRange = result2.ranges[result2.ranges.length - 1]; + var lastRange = remainderResult.ranges[remainderResult.ranges.length - 1]; if (result.ranges[0].matched && lastRange.matched) { result.matchGoodness += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; if (DEBUG_SCORES) { @@ -840,7 +856,7 @@ define(function (require, exports, module) { } // add the new matched ranges to the beginning of the set of ranges we had - result.ranges.unshift.apply(result.ranges, result2.ranges); + result.ranges.unshift.apply(result.ranges, remainderResult.ranges); } else { // no match return null; @@ -911,7 +927,7 @@ define(function (require, exports, module) { // Locate the special characters and then use orderedCompare to do the real // work. var special = findSpecialCharacters(str); - var compareData = orderedCompare(query.toLowerCase(), str, str.toLowerCase(), special.specials, + var compareData = _computeMatch(query, str, special.specials, special.lastSegmentSpecialsIndex); // If we get a match, turn this into a SearchResult as expected by the consumers @@ -923,8 +939,6 @@ define(function (require, exports, module) { if (DEBUG_SCORES) { result.scoreDebug = compareData.scoreDebug; } - } else { - result = undefined; } return result; } @@ -1333,7 +1347,7 @@ define(function (require, exports, module) { exports.multiFieldSort = multiFieldSort; exports.highlightMatch = highlightMatch; exports._findSpecialCharacters = findSpecialCharacters; - exports._orderedCompare = orderedCompare; - exports._lastSegmentSearch = lastSegmentSearch; + exports._computeMatch = _computeMatch; + exports._lastSegmentSearch = _lastSegmentSearch; exports._setDebugScores = _setDebugScores; }); diff --git a/test/spec/QuickOpen-test.js b/test/spec/QuickOpen-test.js index 462d370f8a4..0ab32d3f988 100644 --- a/test/spec/QuickOpen-test.js +++ b/test/spec/QuickOpen-test.js @@ -162,15 +162,14 @@ define(function (require, exports, module) { var comparePath = path.toLowerCase(); var _lastSegmentSearch = QuickOpen._lastSegmentSearch; var sc = QuickOpen._findSpecialCharacters(path); - expect(_lastSegmentSearch("ocud", path, comparePath, sc.specials)).toEqual(null); + expect(_lastSegmentSearch("ocud", path, comparePath, sc.specials, 0)).toEqual(null); }); it("should compare matches that don't fit in just the final segment", function () { var path = "src/document/DocumentCommandHandler.js"; - var comparePath = path.toLowerCase(); - var orderedCompare = QuickOpen._orderedCompare; + var computeMatch = QuickOpen._computeMatch; var sc = QuickOpen._findSpecialCharacters(path); - expect(orderedCompare("sdoc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(computeMatch("sdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ { text: "s", matched: true, lastSegment: false }, @@ -180,7 +179,7 @@ define(function (require, exports, module) { ] }); - expect(orderedCompare("doc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(computeMatch("doc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ { text: "src/document/", matched: false, lastSegment: false }, @@ -188,9 +187,9 @@ define(function (require, exports, module) { { text: "umentCommandHandler.js", matched: false, lastSegment: true } ] }); - expect(orderedCompare("z", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + expect(computeMatch("z", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); - expect(orderedCompare("docdoc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(computeMatch("docdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ { text: "src/", matched: false, lastSegment: false }, @@ -204,10 +203,9 @@ define(function (require, exports, module) { it("should handle matches that don't fit at all in the final segment", function () { var path = "src/extensions/default/QuickOpenCSS/main.js"; - var comparePath = path.toLowerCase(); - var orderedCompare = QuickOpen._orderedCompare; + var computeMatch = QuickOpen._computeMatch; var sc = QuickOpen._findSpecialCharacters(path); - expect(orderedCompare("quick", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(computeMatch("quick", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ { text: "src/extensions/default/", matched: false, lastSegment: false }, @@ -216,7 +214,7 @@ define(function (require, exports, module) { ] }); - expect(orderedCompare("quickopen", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(computeMatch("quickopen", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ { text: "src/extensions/default/", matched: false, lastSegment: false }, @@ -225,7 +223,7 @@ define(function (require, exports, module) { ] }); - expect(orderedCompare("quickopenain", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(computeMatch("quickopenain", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ { text: "src/extensions/default/", matched: false, lastSegment: false }, @@ -355,6 +353,11 @@ define(function (require, exports, module) { var result = QuickOpen.stringMatch("src/extensions/default/QuickOpenCSS/main.js", "quickopenain"); expect(result.scoreDebug.lastSegment).toBeGreaterThan(0); }); + + it("should treat the character after _ as a special", function () { + var result = QuickOpen.stringMatch("src/extensions/default/Quick_Open.js", "o"); + expect(result.scoreDebug.special).toBeGreaterThan(0); + }); }); }); }); From 7c8a5fabd9a30c96bcbf27e0313cea7ab6282ca4 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Tue, 8 Jan 2013 11:27:56 -0500 Subject: [PATCH 05/14] merge lastRangeCharacter and lastMatchedIndex into lastMatchIndex --- src/search/QuickOpen.js | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index 103f9263e68..964c25bb744 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -581,13 +581,9 @@ define(function (require, exports, module) { var currentRange = null; var lastSegmentScore = 0; - // the character index (against str) that was last in a matched range. this is used for - // adding unmatched ranges in between - var lastRangeCharacter = strCounter - 1; - - // variable to award bonus points for consecutive matching characters. We keep track of the - // last matched character. - var lastMatchedIndex = -1; + // the character index (against str) that was last in a matched range. This is used for + // adding unmatched ranges in between and adding bonus points for consecutive matches. + var lastMatchIndex = strCounter - 1; // Records the current range and adds any additional ranges required to // get to character index c. This function is called before starting a new range @@ -595,7 +591,7 @@ define(function (require, exports, module) { function closeRangeGap(c) { // close the current range if (currentRange) { - currentRange.lastSegment = lastRangeCharacter >= lastSegmentStart; + currentRange.lastSegment = lastMatchIndex >= lastSegmentStart; if (currentRange.matched && currentRange.lastSegment) { if (DEBUG_SCORES) { scoreDebug.lastSegment += lastSegmentScore * LAST_SEGMENT_BOOST; @@ -607,9 +603,9 @@ define(function (require, exports, module) { // if there was space between the new range and the last, // add a new unmatched range before the new range can be added. - if (lastRangeCharacter + 1 < c) { + if (lastMatchIndex + 1 < c) { ranges.push({ - text: str.substring(lastRangeCharacter + 1, c), + text: str.substring(lastMatchIndex + 1, c), matched: false, lastSegment: c > lastSegmentStart }); @@ -639,8 +635,10 @@ define(function (require, exports, module) { } // If the new character immediately follows the last matched character, - // we award the consecutive matches bonus. - if (lastMatchedIndex + 1 === c) { + // we award the consecutive matches bonus. The check for score > 0 + // handles the initial value of lastMatchIndex which is used for + // constructing ranges but we don't yet have a true match. + if (score > 0 && lastMatchIndex + 1 === c) { if (DEBUG_SCORES) { scoreDebug.consecutive += CONSECUTIVE_MATCHES_POINTS; } @@ -651,13 +649,13 @@ define(function (require, exports, module) { if (c > lastSegmentStart) { lastSegmentScore += newPoints; } - lastMatchedIndex = c; // if the last range wasn't a match or there's a gap, we need to close off // the range to start a new one. - if ((currentRange && !currentRange.matched) || c > lastRangeCharacter + 1) { + if ((currentRange && !currentRange.matched) || c > lastMatchIndex + 1) { closeRangeGap(c); } + lastMatchIndex = c; // set up a new match range or add to the current one if (!currentRange) { @@ -668,7 +666,6 @@ define(function (require, exports, module) { } else { currentRange.text += str[c]; } - lastRangeCharacter = c; } // Compares the current character from the query string against the @@ -694,11 +691,11 @@ define(function (require, exports, module) { specialsCounter++; queryCounter++; strCounter = tempsc; + addMatch(strCounter++); if (DEBUG_SCORES) { scoreDebug.special += SPECIAL_POINTS; } score += SPECIAL_POINTS; - addMatch(strCounter++); foundMatch = true; break; } From cc9c3ad03e48b768947b68e7758e361455bb9c45 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Wed, 9 Jan 2013 09:27:00 -0500 Subject: [PATCH 06/14] a bunch of changes per pflynn's code review, including some variable/function renames for clarity. Note that there is currently a failing test. I wanted to put scoring changes into a separate commit, and that should fix the failing test. --- src/search/QuickOpen.js | 50 ++++++------ test/spec/QuickOpen-test.js | 153 +++++++++++++++++++++--------------- 2 files changed, 114 insertions(+), 89 deletions(-) diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index 964c25bb744..acf5b052918 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -530,7 +530,7 @@ define(function (require, exports, module) { * * {Array} ranges: the scanned regions of the string, in order. For each range: * * {string} text: the text for the string range * * {boolean} matched: was this range part of the match? - * * {boolean} lastSegment: is this range part of the last segment of str + * * {boolean} includesLastSegment: is this range part of the last segment of str * * {int} matchGoodness: the score computed for this match * * (optional) {Object} scoreDebug: if DEBUG_SCORES is true, an object with the score broken down * @@ -540,7 +540,7 @@ define(function (require, exports, module) { * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} startingSpecial index into specials array to start scanning with * @param {boolean} lastSegmentStart optional which character does the last segment start at - * @return {{ranges:Array.{text:string, matched:boolean, lastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ function getMatchRanges(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { var ranges = []; @@ -591,8 +591,8 @@ define(function (require, exports, module) { function closeRangeGap(c) { // close the current range if (currentRange) { - currentRange.lastSegment = lastMatchIndex >= lastSegmentStart; - if (currentRange.matched && currentRange.lastSegment) { + currentRange.includesLastSegment = lastMatchIndex >= lastSegmentStart; + if (currentRange.matched && currentRange.includesLastSegment) { if (DEBUG_SCORES) { scoreDebug.lastSegment += lastSegmentScore * LAST_SEGMENT_BOOST; } @@ -607,7 +607,7 @@ define(function (require, exports, module) { ranges.push({ text: str.substring(lastMatchIndex + 1, c), matched: false, - lastSegment: c > lastSegmentStart + includesLastSegment: c > lastSegmentStart }); } currentRange = null; @@ -627,7 +627,7 @@ define(function (require, exports, module) { // a bonus is given for characters that match at the beginning // of the filename - if (c === 0 && (strCounter > lastSegmentStart)) { + if (c === lastSegmentStart) { if (DEBUG_SCORES) { scoreDebug.beginning += BEGINNING_OF_NAME_POINTS; } @@ -646,7 +646,7 @@ define(function (require, exports, module) { } score += newPoints; - if (c > lastSegmentStart) { + if (c >= lastSegmentStart) { lastSegmentScore += newPoints; } @@ -670,27 +670,23 @@ define(function (require, exports, module) { // Compares the current character from the query string against the // special characters in compareStr. - function compareSpecials() { + function findMatchingSpecial() { // used to loop through the specials var i; - // used to keep track of where we are in compareStr - // without overriding strCounter until we have a match - var tempsc; var foundMatch = false; for (i = specialsCounter; i < specials.length; i++) { - tempsc = specials[i]; // first, check to see if strCounter has moved beyond // the current special character. This is possible // if the contiguous comparison continued on through // the next special - if (tempsc < strCounter) { + if (specials[i] < strCounter) { specialsCounter = i; - } else if (query[queryCounter] === compareStr[tempsc]) { + } else if (query[queryCounter] === compareStr[specials[i]]) { // we have a match! do the required tracking - specialsCounter++; + specialsCounter = i; queryCounter++; - strCounter = tempsc; + strCounter = specials[i]; addMatch(strCounter++); if (DEBUG_SCORES) { scoreDebug.special += SPECIAL_POINTS; @@ -713,7 +709,7 @@ define(function (require, exports, module) { // keep looping until we've either exhausted the query or the string while (queryCounter < query.length && strCounter < str.length) { if (state === SPECIALS_COMPARE) { - compareSpecials(); + findMatchingSpecial(); } else if (state === CONTIGUOUS_COMPARE || state === SPECIALS_EXHAUSTED) { // for both of these states, we're looking character by character // for matches @@ -729,7 +725,7 @@ define(function (require, exports, module) { // already exhaused the specials, we're just going to keep // stepping through compareStr. if (state !== SPECIALS_EXHAUSTED) { - compareSpecials(); + findMatchingSpecial(); } else { strCounter++; } @@ -774,7 +770,7 @@ define(function (require, exports, module) { * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} startingSpecial index into specials array to start scanning with * @param {boolean} lastSegmentStart which character does the last segment start at - * @return {{ranges:Array.{text:string, matched:boolean, lastSegment:boolean}, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ function _lastSegmentSearch(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { var queryCounter, matchRanges; @@ -817,7 +813,7 @@ define(function (require, exports, module) { * @param {string} str the original string to search * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with - * @return {{ranges:Array.{text:string, matched:boolean, lastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ function _computeMatch(query, str, specials, lastSegmentSpecialsIndex) { // set up query as all lower case and make a lower case string to use for comparisons @@ -865,7 +861,7 @@ define(function (require, exports, module) { result.ranges.unshift({ text: str.substring(0, lastSegmentStart), matched: false, - lastSegment: false + includesLastSegment: false }); } delete result.remainder; @@ -916,7 +912,7 @@ define(function (require, exports, module) { result.stringRanges = [{ text: str, matched: false, - lastSegment: true + includesLastSegment: true }]; return result; } @@ -1068,7 +1064,7 @@ define(function (require, exports, module) { * matched; else formats the label with no highlighting. * @param {!string|SearchResult} item * @param {?string} matchClass CSS class for highlighting matched text - * @param {?function(number, string):string} rangeFilter + * @param {?function(boolean, string):string} rangeFilter * @return {!string} bolded, HTML-escaped result */ function highlightMatch(item, matchClass, rangeFilter) { @@ -1081,7 +1077,7 @@ define(function (require, exports, module) { stringRanges = [{ text: label, matched: false, - lastSegment: true + includesLastSegment: true }]; } @@ -1099,7 +1095,7 @@ define(function (require, exports, module) { displayName += ""; } - var rangeText = rangeFilter ? rangeFilter(range.lastSegment, range.text) : range.text; + var rangeText = rangeFilter ? rangeFilter(range.includesLastSegment, range.text) : range.text; displayName += StringUtils.breakableUrl(StringUtils.htmlEscape(rangeText)); if (range.matched) { @@ -1118,8 +1114,8 @@ define(function (require, exports, module) { function _filenameResultsFormatter(item, query) { // For main label, we just want filename: drop most of the string - function fileNameFilter(lastSegment, rangeText) { - if (lastSegment) { + function fileNameFilter(includesLastSegment, rangeText) { + if (includesLastSegment) { var rightmostSlash = rangeText.lastIndexOf('/'); return rangeText.substring(rightmostSlash + 1); // safe even if rightmostSlash is -1 } else { diff --git a/test/spec/QuickOpen-test.js b/test/spec/QuickOpen-test.js index 0ab32d3f988..c5f680e280d 100644 --- a/test/spec/QuickOpen-test.js +++ b/test/spec/QuickOpen-test.js @@ -32,7 +32,9 @@ define(function (require, exports, module) { var QuickOpen = require("search/QuickOpen"); describe("QuickOpen", function () { + QuickOpen._setDebugScores(false); + describe("findSpecialCharacters", function () { it("should find the important match characters in the string", function () { var fSC = QuickOpen._findSpecialCharacters; @@ -45,6 +47,11 @@ define(function (require, exports, module) { lastSegmentSpecialsIndex: 0, specials: [0, 6, 7] }); + + expect(fSC("foo")).toEqual({ + lastSegmentSpecialsIndex: 0, + specials: [0] + }); }); }); @@ -58,8 +65,8 @@ define(function (require, exports, module) { remainder: "", matchGoodness: jasmine.any(Number), ranges: [ - { text: "D", matched: true, lastSegment: true }, - { text: "ocumentCommandHandler.js", matched: false, lastSegment: true } + { text: "D", matched: true, includesLastSegment: true }, + { text: "ocumentCommandHandler.js", matched: false, includesLastSegment: true } ] }); @@ -67,8 +74,8 @@ define(function (require, exports, module) { remainder: "", matchGoodness: jasmine.any(Number), ranges: [ - { text: "Do", matched: true, lastSegment: true }, - { text: "cumentCommandHandler.js", matched: false, lastSegment: true } + { text: "Do", matched: true, includesLastSegment: true }, + { text: "cumentCommandHandler.js", matched: false, includesLastSegment: true } ] }); @@ -76,8 +83,8 @@ define(function (require, exports, module) { remainder: "", matchGoodness: jasmine.any(Number), ranges: [ - { text: "Doc", matched: true, lastSegment: true }, - { text: "umentCommandHandler.js", matched: false, lastSegment: true } + { text: "Doc", matched: true, includesLastSegment: true }, + { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } ] }); @@ -85,10 +92,10 @@ define(function (require, exports, module) { remainder: "", matchGoodness: jasmine.any(Number), ranges: [ - { text: "Doc", matched: true, lastSegment: true }, - { text: "ument", matched: false, lastSegment: true }, - { text: "C", matched: true, lastSegment: true }, - { text: "ommandHandler.js", matched: false, lastSegment: true } + { text: "Doc", matched: true, includesLastSegment: true }, + { text: "ument", matched: false, includesLastSegment: true }, + { text: "C", matched: true, includesLastSegment: true }, + { text: "ommandHandler.js", matched: false, includesLastSegment: true } ] }); @@ -96,12 +103,12 @@ define(function (require, exports, module) { remainder: "", matchGoodness: jasmine.any(Number), ranges: [ - { text: "Doc", matched: true, lastSegment: true }, - { text: "ument", matched: false, lastSegment: true }, - { text: "C", matched: true, lastSegment: true }, - { text: "ommand", matched: false, lastSegment: true }, - { text: "H", matched: true, lastSegment: true }, - { text: "andler.js", matched: false, lastSegment: true } + { text: "Doc", matched: true, includesLastSegment: true }, + { text: "ument", matched: false, includesLastSegment: true }, + { text: "C", matched: true, includesLastSegment: true }, + { text: "ommand", matched: false, includesLastSegment: true }, + { text: "H", matched: true, includesLastSegment: true }, + { text: "andler.js", matched: false, includesLastSegment: true } ] }); @@ -109,13 +116,13 @@ define(function (require, exports, module) { remainder: "", matchGoodness: jasmine.any(Number), ranges: [ - { text: "Doc", matched: true, lastSegment: true }, - { text: "ument", matched: false, lastSegment: true }, - { text: "C", matched: true, lastSegment: true }, - { text: "ommand", matched: false, lastSegment: true }, - { text: "H", matched: true, lastSegment: true }, - { text: "andler", matched: false, lastSegment: true }, - { text: ".js", matched: true, lastSegment: true } + { text: "Doc", matched: true, includesLastSegment: true }, + { text: "ument", matched: false, includesLastSegment: true }, + { text: "C", matched: true, includesLastSegment: true }, + { text: "ommand", matched: false, includesLastSegment: true }, + { text: "H", matched: true, includesLastSegment: true }, + { text: "andler", matched: false, includesLastSegment: true }, + { text: ".js", matched: true, includesLastSegment: true } ] }); @@ -123,9 +130,9 @@ define(function (require, exports, module) { remainder: "", matchGoodness: jasmine.any(Number), ranges: [ - { text: "D", matched: false, lastSegment: true }, - { text: "ocu", matched: true, lastSegment: true }, - { text: "mentCommandHandler.js", matched: false, lastSegment: true } + { text: "D", matched: false, includesLastSegment: true }, + { text: "ocu", matched: true, includesLastSegment: true }, + { text: "mentCommandHandler.js", matched: false, includesLastSegment: true } ] }); @@ -133,11 +140,11 @@ define(function (require, exports, module) { remainder: "", matchGoodness: jasmine.any(Number), ranges: [ - { text: "D", matched: false, lastSegment: true }, - { text: "ocu", matched: true, lastSegment: true }, - { text: "mentCommand", matched: false, lastSegment: true }, - { text: "Ha", matched: true, lastSegment: true }, - { text: "ndler.js", matched: false, lastSegment: true } + { text: "D", matched: false, includesLastSegment: true }, + { text: "ocu", matched: true, includesLastSegment: true }, + { text: "mentCommand", matched: false, includesLastSegment: true }, + { text: "Ha", matched: true, includesLastSegment: true }, + { text: "ndler.js", matched: false, includesLastSegment: true } ] }); @@ -148,8 +155,8 @@ define(function (require, exports, module) { matchGoodness: jasmine.any(Number), remainder: "s", ranges: [ - { text: "Doc", matched: true, lastSegment: true }, - { text: "umentCommandHandler.js", matched: false, lastSegment: true } + { text: "Doc", matched: true, includesLastSegment: true }, + { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } ] }); }); @@ -172,19 +179,19 @@ define(function (require, exports, module) { expect(computeMatch("sdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ - { text: "s", matched: true, lastSegment: false }, - { text: "rc/document/", matched: false, lastSegment: false }, - { text: "Doc", matched: true, lastSegment: true }, - { text: "umentCommandHandler.js", matched: false, lastSegment: true } + { text: "s", matched: true, includesLastSegment: false }, + { text: "rc/document/", matched: false, includesLastSegment: false }, + { text: "Doc", matched: true, includesLastSegment: true }, + { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } ] }); expect(computeMatch("doc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ - { text: "src/document/", matched: false, lastSegment: false }, - { text: "Doc", matched: true, lastSegment: true }, - { text: "umentCommandHandler.js", matched: false, lastSegment: true } + { text: "src/document/", matched: false, includesLastSegment: false }, + { text: "Doc", matched: true, includesLastSegment: true }, + { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } ] }); expect(computeMatch("z", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); @@ -192,13 +199,16 @@ define(function (require, exports, module) { expect(computeMatch("docdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ - { text: "src/", matched: false, lastSegment: false }, - { text: "doc", matched: true, lastSegment: false }, - { text: "ument/", matched: false, lastSegment: false }, - { text: "Doc", matched: true, lastSegment: true }, - { text: "umentCommandHandler.js", matched: false, lastSegment: true } + { text: "src/", matched: false, includesLastSegment: false }, + { text: "doc", matched: true, includesLastSegment: false }, + { text: "ument/", matched: false, includesLastSegment: false }, + { text: "Doc", matched: true, includesLastSegment: true }, + { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } ] }); + + // test for a suspected bug where specials are matched out of order. + expect(computeMatch("hc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); }); it("should handle matches that don't fit at all in the final segment", function () { @@ -208,29 +218,29 @@ define(function (require, exports, module) { expect(computeMatch("quick", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ - { text: "src/extensions/default/", matched: false, lastSegment: false }, - { text: "Quick", matched: true, lastSegment: false }, - { text: "OpenCSS/main.js", matched: false, lastSegment: true } + { text: "src/extensions/default/", matched: false, includesLastSegment: false }, + { text: "Quick", matched: true, includesLastSegment: false }, + { text: "OpenCSS/main.js", matched: false, includesLastSegment: true } ] }); expect(computeMatch("quickopen", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ - { text: "src/extensions/default/", matched: false, lastSegment: false }, - { text: "QuickOpen", matched: true, lastSegment: false }, - { text: "CSS/main.js", matched: false, lastSegment: true } + { text: "src/extensions/default/", matched: false, includesLastSegment: false }, + { text: "QuickOpen", matched: true, includesLastSegment: false }, + { text: "CSS/main.js", matched: false, includesLastSegment: true } ] }); expect(computeMatch("quickopenain", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ - { text: "src/extensions/default/", matched: false, lastSegment: false }, - { text: "QuickOpen", matched: true, lastSegment: false }, - { text: "CSS/m", matched: false, lastSegment: true }, - { text: "ain", matched: true, lastSegment: true }, - { text: ".js", matched: false, lastSegment: true } + { text: "src/extensions/default/", matched: false, includesLastSegment: false }, + { text: "QuickOpen", matched: true, includesLastSegment: false }, + { text: "CSS/m", matched: false, includesLastSegment: true }, + { text: "ain", matched: true, includesLastSegment: true }, + { text: ".js", matched: false, includesLastSegment: true } ] }); }); @@ -241,7 +251,9 @@ define(function (require, exports, module) { it("should return appropriate matching ranges", function () { var result; + expect(stringMatch("foo/bar/baz.js", "bingo")).toBeUndefined(); + result = stringMatch("foo/bar/baz.js", "fbb.js"); expect(result).not.toBeUndefined(); expect(result.matchGoodness).toBeLessThan(-100); @@ -269,17 +281,33 @@ define(function (require, exports, module) { matchGoodness: jasmine.any(Number), label: "src/search/QuickOpen.js", stringRanges: [ - { text: "src/search/", matched: false, lastSegment: false }, - { text: "Q", matched: true, lastSegment: true }, - { text: "uick", matched: false, lastSegment: true }, - { text: "O", matched: true, lastSegment: true }, - { text: "pen.js", matched: false, lastSegment: true } + { text: "src/search/", matched: false, includesLastSegment: false }, + { text: "Q", matched: true, includesLastSegment: true }, + { text: "uick", matched: false, includesLastSegment: true }, + { text: "O", matched: true, includesLastSegment: true }, + { text: "pen.js", matched: false, includesLastSegment: true } + ] + }); + }); + + it("should prefer special characters", function () { + expect(stringMatch("src/document/DocumentCommandHandler.js", "dch")).toEqual({ + matchGoodness: jasmine.any(Number), + label: "src/document/DocumentCommandHandler.js", + stringRanges: [ + { text: "src/document/", matched: false, includesLastSegment: false }, + { text: "D", matched: true, includesLastSegment: true }, + { text: "ocument", matched: false, includesLastSegment: true }, + { text: "C", matched: true, includesLastSegment: true }, + { text: "ommand", matched: false, includesLastSegment: true }, + { text: "H", matched: true, includesLastSegment: true }, + { text: "andler.js", matched: false, includesLastSegment: true } ] }); }); var goodRelativeOrdering = function (query, testStrings) { - var lastScore = -20000; + var lastScore = -Infinity; var goodOrdering = true; testStrings.forEach(function (str) { var result = stringMatch(str, query); @@ -347,6 +375,7 @@ define(function (require, exports, module) { var result1 = QuickOpen.stringMatch("test/spec/LiveDevelopment-test.js", "spec/live"); var result2 = QuickOpen.stringMatch("test/spec/live/foobar.js", "spec/live"); expect(result2.scoreDebug.consecutive).toEqual(result1.scoreDebug.consecutive); + expect(result2.scoreDebug.consecutive).toBeGreaterThan(0); }); it("should boost last segment matches, even when searching the whole string", function () { From 763e78534d8afb61e5459bacfb45e5fdd7191472 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Wed, 9 Jan 2013 10:12:17 -0500 Subject: [PATCH 07/14] adds a penalty for matches that don't start on a special character, which fixes the failing test for samples/index --- src/search/QuickOpen.js | 20 ++++++++++++++++++-- test/spec/QuickOpen-test.js | 31 +++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index acf5b052918..3dddfc93382 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -513,6 +513,7 @@ define(function (require, exports, module) { var BEGINNING_OF_NAME_POINTS = 25; var DEDUCTION_FOR_LENGTH = 0.2; var CONSECUTIVE_MATCHES_POINTS = 25; + var NOT_STARTING_ON_SPECIAL_PENALTY = 25; /* * Finds the best matches between the query and the string. The query is @@ -554,7 +555,8 @@ define(function (require, exports, module) { lastSegment: 0, beginning: 0, lengthDeduction: 0, - consecutive: 0 + consecutive: 0, + notStartingOnSpecial: 0 }; } @@ -580,6 +582,7 @@ define(function (require, exports, module) { // currentRange keeps track of the range we are adding characters to now var currentRange = null; var lastSegmentScore = 0; + var currentRangeStartedOnSpecial = false; // the character index (against str) that was last in a matched range. This is used for // adding unmatched ranges in between and adding bonus points for consecutive matches. @@ -598,6 +601,13 @@ define(function (require, exports, module) { } score += lastSegmentScore * LAST_SEGMENT_BOOST; } + + if (currentRange.matched && !currentRangeStartedOnSpecial) { + if (DEBUG_SCORES) { + scoreDebug.notStartingOnSpecial -= NOT_STARTING_ON_SPECIAL_PENALTY; + } + score -= NOT_STARTING_ON_SPECIAL_PENALTY; + } ranges.push(currentRange); } @@ -663,6 +673,11 @@ define(function (require, exports, module) { text: str[c], matched: true }; + + // Check to see if this new matched range is starting on a special + // character. We penalize those ranges that don't, because most + // people will search on the logical boundaries of the name + currentRangeStartedOnSpecial = c === specials[specialsCounter]; } else { currentRange.text += str[c]; } @@ -1086,7 +1101,8 @@ define(function (require, exports, module) { var sd = item.scoreDebug; displayName += '(' + item.matchGoodness + ') '; + ', ld:' + sd.lengthDeduction + ', c:' + sd.consecutive + ', nsos: ' + + sd.notStartingOnSpecial + '">(' + item.matchGoodness + ') '; } // Put the path pieces together, highlighting the matched parts diff --git a/test/spec/QuickOpen-test.js b/test/spec/QuickOpen-test.js index c5f680e280d..8ae032ba72c 100644 --- a/test/spec/QuickOpen-test.js +++ b/test/spec/QuickOpen-test.js @@ -329,30 +329,35 @@ define(function (require, exports, module) { "src/extensions/default/QuickOpenCSS/main.js" ])).toBe(true); }); + it("should find the right spec/live", function () { expect(goodRelativeOrdering("spec/live", [ "test/spec/LiveDevelopment-test.js", "test/spec/LiveDevelopment-chrome-user-data/Default/VisitedLinks" ])).toBe(true); }); + it("should find the right samples/index", function () { expect(goodRelativeOrdering("samples/index", [ "samples/de/Erste Schritte/index.html", "src/thirdparty/CodeMirror2/mode/ntriples/index.html" ])).toBe(true); }); + it("should find the right Commands", function () { expect(goodRelativeOrdering("Commands", [ "src/command/Commands.js", "src/command/CommandManager.js" ])).toBe(true); }); + it("should find the right extensions", function () { expect(goodRelativeOrdering("extensions", [ "src/utils/ExtensionLoader.js", "src/extensions/default/RecentProjects/styles.css" ])).toBe(true); }); + it("should find the right EUtil", function () { expect(goodRelativeOrdering("EUtil", [ "src/editor/EditorUtils.js", @@ -360,6 +365,21 @@ define(function (require, exports, module) { "src/file/FileUtils.js" ])); }); + + it("should find the right ECH", function () { + expect(goodRelativeOrdering("ECH", [ + "EditorCommandHandlers", + "EditorCommandHandlers-test", + "SpecHelper" + ])); + }); + + it("should find the right DMan", function () { + expect(goodRelativeOrdering("DMan", [ + "DocumentManager", + "CommandManager" + ])); + }); }); describe("scoring", function () { @@ -376,17 +396,28 @@ define(function (require, exports, module) { var result2 = QuickOpen.stringMatch("test/spec/live/foobar.js", "spec/live"); expect(result2.scoreDebug.consecutive).toEqual(result1.scoreDebug.consecutive); expect(result2.scoreDebug.consecutive).toBeGreaterThan(0); + expect(result2.scoreDebug.notStartingOnSpecial).toEqual(0); }); it("should boost last segment matches, even when searching the whole string", function () { var result = QuickOpen.stringMatch("src/extensions/default/QuickOpenCSS/main.js", "quickopenain"); expect(result.scoreDebug.lastSegment).toBeGreaterThan(0); + expect(result.scoreDebug.notStartingOnSpecial).toEqual(-25); }); it("should treat the character after _ as a special", function () { var result = QuickOpen.stringMatch("src/extensions/default/Quick_Open.js", "o"); expect(result.scoreDebug.special).toBeGreaterThan(0); }); + + it("should penalize matches that don't start on a special", function () { + var result = QuickOpen.stringMatch("src/thirdparty/CodeMirror2/mode/ntriples/index.html", "samples/index"); + expect(result.scoreDebug.notStartingOnSpecial).toEqual(-50); + + result = QuickOpen.stringMatch("src/thirdparty/CodeMirror2/mode/ntriples/index.html", "codemirror"); + expect(result.scoreDebug.notStartingOnSpecial).toEqual(0); + }); + }); }); }); From 4eafccf533d0147096536efdaa7c43e3dbfc5216 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Wed, 9 Jan 2013 11:03:52 -0500 Subject: [PATCH 08/14] pull the stringMatch code out of QuickOpen and into a new module --- src/search/QuickOpen.js | 599 +---------------- src/utils/StringMatch.js | 622 ++++++++++++++++++ test/UnitTestSuite.js | 2 +- ...{QuickOpen-test.js => StringMatch-test.js} | 44 +- 4 files changed, 650 insertions(+), 617 deletions(-) create mode 100644 src/utils/StringMatch.js rename test/spec/{QuickOpen-test.js => StringMatch-test.js} (92%) diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index 3dddfc93382..f325a5eae8d 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -23,7 +23,6 @@ /*jslint vars: true, plusplus: true, devel: true, nomen: true, indent: 4, maxerr: 50 */ /*global define, $, window, setTimeout, ArrayBuffer, Int8Array */ -/*unittests: QuickOpen */ /* * Displays an auto suggest pop-up list of files to allow the user to quickly navigate to a file and lines @@ -49,7 +48,8 @@ define(function (require, exports, module) { Commands = require("command/Commands"), ProjectManager = require("project/ProjectManager"), KeyEvent = require("utils/KeyEvent"), - ModalBar = require("widgets/ModalBar").ModalBar; + ModalBar = require("widgets/ModalBar").ModalBar, + StringMatch = require("utils/StringMatch"); /** @type Array. */ @@ -87,11 +87,6 @@ define(function (require, exports, module) { */ var _curDialog; - /** Object representing a search result with associated metadata (added as extra ad hoc fields) */ - function SearchResult(label) { - this.label = label; - } - /** * Defines API for new QuickOpen plug-ins */ @@ -426,582 +421,6 @@ define(function (require, exports, module) { $(window.document).off("mousedown", this._handleDocumentMouseDown); }; - /** - * Helper functions for stringMatch at the heart of the QuickOpen - * matching. - */ - - /* - * Identifies the "special" characters in the given string. - * Special characters for matching purposes are: - * - * * the first character - * * "/" and the character following the "/" - * * "_", "." and "-" and the character following it - * * an uppercase character that follows a lowercase one (think camelCase) - * - * The returned object contains an array called "specials". This array is - * a list of indexes into the original string where all of the special - * characters are. It also has a property "lastSegmentSpecialsIndex" which - * is an index into the specials array that denotes which location is the - * beginning of the last path segment. (This is used to allow scanning of - * the last segment's specials separately.) - * - * @param {string} input string to break apart (e.g. filename that is being searched) - * @return {{specials:Array., lastSegmentSpecialsIndex:number}} - */ - function findSpecialCharacters(str) { - var i, c; - - // the beginning of the string is always special - var specials = [0]; - - // lastSegmentSpecialsIndex starts off with the assumption that - // there are no segments - var lastSegmentSpecialsIndex = 0; - - // used to track down the camelCase changeovers - var lastWasLowerCase = false; - - for (i = 0; i < str.length; i++) { - c = str[i]; - if (c === "/") { - // new segment means this character and the next are special - specials.push(i++); - specials.push(i); - lastSegmentSpecialsIndex = specials.length - 1; - lastWasLowerCase = false; - } else if (c === "." || c === "-" || c === "_") { - // _, . and - are separators so they are - // special and so is the next character - specials.push(i++); - specials.push(i); - lastWasLowerCase = false; - } else if (c.toUpperCase() === c) { - // this is the check for camelCase changeovers - if (lastWasLowerCase) { - specials.push(i); - } - lastWasLowerCase = false; - } else { - lastWasLowerCase = true; - } - } - return { - specials: specials, - lastSegmentSpecialsIndex: lastSegmentSpecialsIndex - }; - } - - // states used during the scanning of the string - var SPECIALS_COMPARE = 0; - var CONTIGUOUS_COMPARE = 1; - var SPECIALS_EXHAUSTED = 2; - - // Scores can be hard to make sense of. The DEBUG_SCORES flag - // provides a way to peek into the parts that made up a score. - // This flag is used for manual debugging and in the unit tests only. - var DEBUG_SCORES = false; - function _setDebugScores(ds) { - DEBUG_SCORES = ds; - } - - // Constants for scoring - var SPECIAL_POINTS = 25; - var MATCH_POINTS = 10; - var LAST_SEGMENT_BOOST = 1; - var BEGINNING_OF_NAME_POINTS = 25; - var DEDUCTION_FOR_LENGTH = 0.2; - var CONSECUTIVE_MATCHES_POINTS = 25; - var NOT_STARTING_ON_SPECIAL_PENALTY = 25; - - /* - * Finds the best matches between the query and the string. The query is - * compared with compareStr (generally a lower case string with a lower case - * query), but the results are returned based on str. - * - * Generally speaking, this function tries to find a "special" character - * (see findSpecialCharacters above) for the first character of the query. - * When it finds one, it then tries to look for consecutive characters that - * match. Once the characters stop matching, it tries to find a special - * character for the next query character. If a special character isn't found, - * it starts scanning sequentially. - * - * The returned object contains the following fields: - * * {Array} ranges: the scanned regions of the string, in order. For each range: - * * {string} text: the text for the string range - * * {boolean} matched: was this range part of the match? - * * {boolean} includesLastSegment: is this range part of the last segment of str - * * {int} matchGoodness: the score computed for this match - * * (optional) {Object} scoreDebug: if DEBUG_SCORES is true, an object with the score broken down - * - * @param {string} query the search string (generally lower cased) - * @param {string} str the original string to search - * @param {string} compareStr the string to compare with (generally lower cased) - * @param {Array} specials list of special indexes in str (from findSpecialCharacters) - * @param {int} startingSpecial index into specials array to start scanning with - * @param {boolean} lastSegmentStart optional which character does the last segment start at - * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score - */ - function getMatchRanges(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { - var ranges = []; - - var score = 0; - var scoreDebug; - if (DEBUG_SCORES) { - scoreDebug = { - special: 0, - match: 0, - lastSegment: 0, - beginning: 0, - lengthDeduction: 0, - consecutive: 0, - notStartingOnSpecial: 0 - }; - } - - // normalize the optional parameters - if (!lastSegmentStart) { - lastSegmentStart = 0; - } - - if (startingSpecial === undefined) { - startingSpecial = 0; - } - - var specialsCounter = startingSpecial; - - // strCounter and queryCounter are the indexes used for pulling characters - // off of the str/compareStr and query. - var strCounter = specials[startingSpecial]; - var queryCounter = 0; - - // initial state is to scan through the special characters - var state = SPECIALS_COMPARE; - - // currentRange keeps track of the range we are adding characters to now - var currentRange = null; - var lastSegmentScore = 0; - var currentRangeStartedOnSpecial = false; - - // the character index (against str) that was last in a matched range. This is used for - // adding unmatched ranges in between and adding bonus points for consecutive matches. - var lastMatchIndex = strCounter - 1; - - // Records the current range and adds any additional ranges required to - // get to character index c. This function is called before starting a new range - // or returning from the function. - function closeRangeGap(c) { - // close the current range - if (currentRange) { - currentRange.includesLastSegment = lastMatchIndex >= lastSegmentStart; - if (currentRange.matched && currentRange.includesLastSegment) { - if (DEBUG_SCORES) { - scoreDebug.lastSegment += lastSegmentScore * LAST_SEGMENT_BOOST; - } - score += lastSegmentScore * LAST_SEGMENT_BOOST; - } - - if (currentRange.matched && !currentRangeStartedOnSpecial) { - if (DEBUG_SCORES) { - scoreDebug.notStartingOnSpecial -= NOT_STARTING_ON_SPECIAL_PENALTY; - } - score -= NOT_STARTING_ON_SPECIAL_PENALTY; - } - ranges.push(currentRange); - } - - // if there was space between the new range and the last, - // add a new unmatched range before the new range can be added. - if (lastMatchIndex + 1 < c) { - ranges.push({ - text: str.substring(lastMatchIndex + 1, c), - matched: false, - includesLastSegment: c > lastSegmentStart - }); - } - currentRange = null; - lastSegmentScore = 0; - } - - // records that the character at index c (of str) matches, adding - // that character to the current range or starting a new one. - function addMatch(c) { - var newPoints = 0; - - // A match means that we need to do some scoring bookkeeping. - if (DEBUG_SCORES) { - scoreDebug.match += MATCH_POINTS; - } - newPoints += MATCH_POINTS; - - // a bonus is given for characters that match at the beginning - // of the filename - if (c === lastSegmentStart) { - if (DEBUG_SCORES) { - scoreDebug.beginning += BEGINNING_OF_NAME_POINTS; - } - newPoints += BEGINNING_OF_NAME_POINTS; - } - - // If the new character immediately follows the last matched character, - // we award the consecutive matches bonus. The check for score > 0 - // handles the initial value of lastMatchIndex which is used for - // constructing ranges but we don't yet have a true match. - if (score > 0 && lastMatchIndex + 1 === c) { - if (DEBUG_SCORES) { - scoreDebug.consecutive += CONSECUTIVE_MATCHES_POINTS; - } - newPoints += CONSECUTIVE_MATCHES_POINTS; - } - - score += newPoints; - if (c >= lastSegmentStart) { - lastSegmentScore += newPoints; - } - - // if the last range wasn't a match or there's a gap, we need to close off - // the range to start a new one. - if ((currentRange && !currentRange.matched) || c > lastMatchIndex + 1) { - closeRangeGap(c); - } - lastMatchIndex = c; - - // set up a new match range or add to the current one - if (!currentRange) { - currentRange = { - text: str[c], - matched: true - }; - - // Check to see if this new matched range is starting on a special - // character. We penalize those ranges that don't, because most - // people will search on the logical boundaries of the name - currentRangeStartedOnSpecial = c === specials[specialsCounter]; - } else { - currentRange.text += str[c]; - } - } - - // Compares the current character from the query string against the - // special characters in compareStr. - function findMatchingSpecial() { - // used to loop through the specials - var i; - - var foundMatch = false; - for (i = specialsCounter; i < specials.length; i++) { - // first, check to see if strCounter has moved beyond - // the current special character. This is possible - // if the contiguous comparison continued on through - // the next special - if (specials[i] < strCounter) { - specialsCounter = i; - } else if (query[queryCounter] === compareStr[specials[i]]) { - // we have a match! do the required tracking - specialsCounter = i; - queryCounter++; - strCounter = specials[i]; - addMatch(strCounter++); - if (DEBUG_SCORES) { - scoreDebug.special += SPECIAL_POINTS; - } - score += SPECIAL_POINTS; - foundMatch = true; - break; - } - } - - // when we find a match, we switch to looking for consecutive matching characters - if (foundMatch) { - state = CONTIGUOUS_COMPARE; - } else { - // we didn't find a match among the specials - state = SPECIALS_EXHAUSTED; - } - } - - // keep looping until we've either exhausted the query or the string - while (queryCounter < query.length && strCounter < str.length) { - if (state === SPECIALS_COMPARE) { - findMatchingSpecial(); - } else if (state === CONTIGUOUS_COMPARE || state === SPECIALS_EXHAUSTED) { - // for both of these states, we're looking character by character - // for matches - if (query[queryCounter] === compareStr[strCounter]) { - // got a match! record it, and switch to CONTIGUOUS_COMPARE - // in case we had been in SPECIALS_EXHAUSTED state - queryCounter++; - addMatch(strCounter++); - state = CONTIGUOUS_COMPARE; - } else { - // no match. If we were in CONTIGUOUS_COMPARE mode, we - // we switch to looking for specials again. If we've - // already exhaused the specials, we're just going to keep - // stepping through compareStr. - if (state !== SPECIALS_EXHAUSTED) { - findMatchingSpecial(); - } else { - strCounter++; - } - } - } - } - - var result; - // Add the rest of the string ranges - closeRangeGap(str.length); - - // It's not a match if we still have query string left. - if (queryCounter < query.length) { - result = null; - } else { - result = { - matchGoodness: score, - ranges: ranges - }; - if (DEBUG_SCORES) { - result.scoreDebug = scoreDebug; - } - } - return result; - } - - /* - * Seek out the best match in the last segment (generally the filename). - * Matches in the filename are preferred, but the query entered could match - * any part of the path. So, we find the best match we can get in the filename - * and then allow for searching the rest of the string with any characters that - * are left from the beginning of the query. - * - * The parameters and return value are the same as for getMatchRanges, - * except this function is always working on the last segment and the - * result can optionally include a remainder, which is the characters - * at the beginning of the query which did not match in the last segment. - * - * @param {string} query the search string (generally lower cased) - * @param {string} str the original string to search - * @param {string} compareStr the string to compare with (generally lower cased) - * @param {Array} specials list of special indexes in str (from findSpecialCharacters) - * @param {int} startingSpecial index into specials array to start scanning with - * @param {boolean} lastSegmentStart which character does the last segment start at - * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score - */ - function _lastSegmentSearch(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { - var queryCounter, matchRanges; - - // It's possible that the query is longer than the last segment. - // If so, we can chop off the bit that we know couldn't possibly be there. - var remainder = ""; - var extraCharacters = specials[startingSpecial] + query.length - str.length; - - if (extraCharacters > 0) { - remainder = query.substring(0, extraCharacters); - query = query.substring(extraCharacters); - } - - for (queryCounter = 0; queryCounter < query.length; queryCounter++) { - matchRanges = getMatchRanges(query.substring(queryCounter), - str, compareStr, specials, startingSpecial, lastSegmentStart); - - // if we've got a match *or* there are no segments in this string, we're done - if (matchRanges || startingSpecial === 0) { - break; - } - } - - if (queryCounter === query.length || !matchRanges) { - return null; - } else { - matchRanges.remainder = remainder + query.substring(0, queryCounter); - return matchRanges; - } - } - - /* - * Implements the top-level search algorithm. Search the last segment first, - * then search the rest of the string with the remainder. - * - * The parameters and return value are the same as for getMatchRanges. - * - * @param {string} query the search string (will be searched lower case) - * @param {string} str the original string to search - * @param {Array} specials list of special indexes in str (from findSpecialCharacters) - * @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with - * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score - */ - function _computeMatch(query, str, specials, lastSegmentSpecialsIndex) { - // set up query as all lower case and make a lower case string to use for comparisons - query = query.toLowerCase(); - var compareStr = str.toLowerCase(); - - var lastSegmentStart = specials[lastSegmentSpecialsIndex]; - var result; - - if (lastSegmentStart + query.length < str.length) { - result = _lastSegmentSearch(query, str, compareStr, specials, lastSegmentSpecialsIndex, lastSegmentStart); - } - - if (result) { - // Do we have more query characters that did not fit? - if (result.remainder) { - // Scan with the remainder only through the beginning of the last segment - var remainderResult = getMatchRanges(result.remainder, str.substring(0, lastSegmentStart), - compareStr.substring(0, lastSegmentStart), - specials.slice(0, lastSegmentSpecialsIndex), 0, lastSegmentStart); - - if (remainderResult) { - // We have a match - // This next part deals with scoring for the case where - // there are consecutive matched characters at the border of the - // last segment. - var lastRange = remainderResult.ranges[remainderResult.ranges.length - 1]; - if (result.ranges[0].matched && lastRange.matched) { - result.matchGoodness += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; - if (DEBUG_SCORES) { - result.scoreDebug.consecutive += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; - } - } - - // add the new matched ranges to the beginning of the set of ranges we had - result.ranges.unshift.apply(result.ranges, remainderResult.ranges); - } else { - // no match - return null; - } - } else { - // There was no remainder, which means that the whole match is in the - // last segment. We need to add a range at the beginning for everything - // that came before the last segment. - result.ranges.unshift({ - text: str.substring(0, lastSegmentStart), - matched: false, - includesLastSegment: false - }); - } - delete result.remainder; - } else { - // No match in the last segment, so we start over searching the whole - // string - result = getMatchRanges(query, str, compareStr, specials, 0, lastSegmentStart); - } - - if (result) { - var lengthPenalty = -1 * Math.round(str.length * DEDUCTION_FOR_LENGTH); - if (DEBUG_SCORES) { - result.scoreDebug.lengthDeduction = lengthPenalty; - } - result.matchGoodness = result.matchGoodness + lengthPenalty; - } - return result; - } - - /* - * Match str against the query using the QuickOpen algorithm provided by - * the functions above. The general idea is to prefer matches in the last - * segment (the filename) and matches of "special" characters. stringMatch - * will try to provide the best match and produces a "matchGoodness" score - * to allow for relative ranking. - * - * The result object returned includes "stringRanges" which can be used to highlight - * the matched portions of the string, in addition to the "matchGoodness" - * mentioned above. If DEBUG_SCORES is true, scoreDebug is set on the result - * to provide insight into the score. - * - * The matching is done in a case-insensitive manner. - * - * @param {string} str The string to search - * @param {string} query The query string to find in string - */ - function stringMatch(str, query) { - var result; - - // No query? Short circuit the normal work done and just - // return a single range that covers the whole string. - if (!query) { - result = new SearchResult(str); - result.matchGoodness = 0; - if (DEBUG_SCORES) { - result.scoreDebug = {}; - } - result.stringRanges = [{ - text: str, - matched: false, - includesLastSegment: true - }]; - return result; - } - - // Locate the special characters and then use orderedCompare to do the real - // work. - var special = findSpecialCharacters(str); - var compareData = _computeMatch(query, str, special.specials, - special.lastSegmentSpecialsIndex); - - // If we get a match, turn this into a SearchResult as expected by the consumers - // of this API. - if (compareData) { - result = new SearchResult(str); - result.stringRanges = compareData.ranges; - result.matchGoodness = -1 * compareData.matchGoodness; - if (DEBUG_SCORES) { - result.scoreDebug = compareData.scoreDebug; - } - } - return result; - } - - /** - * Sorts an array of SearchResult objects on a primary field, followed by secondary fields - * in case of ties. 'fields' maps field name to priority, where 0 is the primary field. E.g.: - * multiFieldSort(bugList, { milestone: 0, severity: 1 }); - * Would sort a bug list by milestone, and within each milestone sort bugs by severity. - * - * Any fields that have a string value are compared case-insensitively. Fields used should be - * present on all SearchResult objects (no optional/undefined fields). - * - * @param {!Array.} searchResults - * @param {!Object.} fields - */ - function multiFieldSort(searchResults, fields) { - // Move field names into an array, with primary field first - var fieldNames = []; - $.each(fields, function (key, priority) { - fieldNames[priority] = key; - }); - - searchResults.sort(function (a, b) { - var priority; - for (priority = 0; priority < fieldNames.length; priority++) { - var fieldName = fieldNames[priority]; - var valueA = a[fieldName]; - var valueB = b[fieldName]; - if (typeof valueA === "string") { - valueA = valueA.toLowerCase(); - valueB = valueB.toLowerCase(); - } - - if (valueA < valueB) { - return -1; - } else if (valueA > valueB) { - return 1; - } - // otherwise, move on to next sort priority - } - return 0; // all sort fields are equal - }); - } - - /** - * Sorts search results generated by stringMatch(): results are sorted into several - * tiers based on how well they matched the search query, then sorted alphabetically - * within each tier. - */ - function basicMatchSort(searchResults) { - multiFieldSort(searchResults, { matchGoodness: 0, label: 1 }); - } - - function searchFileList(query) { // FileIndexManager may still be loading asynchronously - if so, can't return a result yet if (!fileList) { @@ -1027,7 +446,7 @@ define(function (require, exports, module) { var filteredList = $.map(fileList, function (fileInfo) { // Is it a match at all? // match query against the full path (with gaps between query characters allowed) - var searchResult = stringMatch(ProjectManager.makeProjectRelativeIfPossible(fileInfo.fullPath), query); + var searchResult = StringMatch.stringMatch(ProjectManager.makeProjectRelativeIfPossible(fileInfo.fullPath), query); if (searchResult) { searchResult.label = fileInfo.name; searchResult.fullPath = fileInfo.fullPath; @@ -1039,7 +458,7 @@ define(function (require, exports, module) { // Sort by "match goodness" tier first, then within each tier sort alphabetically - first by filename // sans extension, (so that "abc.js" comes before "abc-d.js"), then by filename, and finally (for // identically-named files) by full path - multiFieldSort(filteredList, { matchGoodness: 0, filenameWithoutExtension: 1, label: 2, fullPath: 3 }); + StringMatch.multiFieldSort(filteredList, { matchGoodness: 0, filenameWithoutExtension: 1, label: 2, fullPath: 3 }); return filteredList; } @@ -1097,7 +516,7 @@ define(function (require, exports, module) { } var displayName = ""; - if (DEBUG_SCORES) { + if (item.scoreDebug) { var sd = item.scoreDebug; displayName += ', lastSegmentSpecialsIndex:number}} + */ + function findSpecialCharacters(str) { + var i, c; + + // the beginning of the string is always special + var specials = [0]; + + // lastSegmentSpecialsIndex starts off with the assumption that + // there are no segments + var lastSegmentSpecialsIndex = 0; + + // used to track down the camelCase changeovers + var lastWasLowerCase = false; + + for (i = 0; i < str.length; i++) { + c = str[i]; + if (c === "/") { + // new segment means this character and the next are special + specials.push(i++); + specials.push(i); + lastSegmentSpecialsIndex = specials.length - 1; + lastWasLowerCase = false; + } else if (c === "." || c === "-" || c === "_") { + // _, . and - are separators so they are + // special and so is the next character + specials.push(i++); + specials.push(i); + lastWasLowerCase = false; + } else if (c.toUpperCase() === c) { + // this is the check for camelCase changeovers + if (lastWasLowerCase) { + specials.push(i); + } + lastWasLowerCase = false; + } else { + lastWasLowerCase = true; + } + } + return { + specials: specials, + lastSegmentSpecialsIndex: lastSegmentSpecialsIndex + }; + } + + // states used during the scanning of the string + var SPECIALS_COMPARE = 0; + var CONTIGUOUS_COMPARE = 1; + var SPECIALS_EXHAUSTED = 2; + + // Scores can be hard to make sense of. The DEBUG_SCORES flag + // provides a way to peek into the parts that made up a score. + // This flag is used for manual debugging and in the unit tests only. + var DEBUG_SCORES = false; + function _setDebugScores(ds) { + DEBUG_SCORES = ds; + } + + // Constants for scoring + var SPECIAL_POINTS = 25; + var MATCH_POINTS = 10; + var LAST_SEGMENT_BOOST = 1; + var BEGINNING_OF_NAME_POINTS = 25; + var DEDUCTION_FOR_LENGTH = 0.2; + var CONSECUTIVE_MATCHES_POINTS = 25; + var NOT_STARTING_ON_SPECIAL_PENALTY = 25; + + /* + * Finds the best matches between the query and the string. The query is + * compared with compareStr (generally a lower case string with a lower case + * query), but the results are returned based on str. + * + * Generally speaking, this function tries to find a "special" character + * (see findSpecialCharacters above) for the first character of the query. + * When it finds one, it then tries to look for consecutive characters that + * match. Once the characters stop matching, it tries to find a special + * character for the next query character. If a special character isn't found, + * it starts scanning sequentially. + * + * The returned object contains the following fields: + * * {Array} ranges: the scanned regions of the string, in order. For each range: + * * {string} text: the text for the string range + * * {boolean} matched: was this range part of the match? + * * {boolean} includesLastSegment: is this range part of the last segment of str + * * {int} matchGoodness: the score computed for this match + * * (optional) {Object} scoreDebug: if DEBUG_SCORES is true, an object with the score broken down + * + * @param {string} query the search string (generally lower cased) + * @param {string} str the original string to search + * @param {string} compareStr the string to compare with (generally lower cased) + * @param {Array} specials list of special indexes in str (from findSpecialCharacters) + * @param {int} startingSpecial index into specials array to start scanning with + * @param {boolean} lastSegmentStart optional which character does the last segment start at + * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score + */ + function getMatchRanges(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { + var ranges = []; + + var score = 0; + var scoreDebug; + if (DEBUG_SCORES) { + scoreDebug = { + special: 0, + match: 0, + lastSegment: 0, + beginning: 0, + lengthDeduction: 0, + consecutive: 0, + notStartingOnSpecial: 0 + }; + } + + // normalize the optional parameters + if (!lastSegmentStart) { + lastSegmentStart = 0; + } + + if (startingSpecial === undefined) { + startingSpecial = 0; + } + + var specialsCounter = startingSpecial; + + // strCounter and queryCounter are the indexes used for pulling characters + // off of the str/compareStr and query. + var strCounter = specials[startingSpecial]; + var queryCounter = 0; + + // initial state is to scan through the special characters + var state = SPECIALS_COMPARE; + + // currentRange keeps track of the range we are adding characters to now + var currentRange = null; + var lastSegmentScore = 0; + var currentRangeStartedOnSpecial = false; + + // the character index (against str) that was last in a matched range. This is used for + // adding unmatched ranges in between and adding bonus points for consecutive matches. + var lastMatchIndex = strCounter - 1; + + // Records the current range and adds any additional ranges required to + // get to character index c. This function is called before starting a new range + // or returning from the function. + function closeRangeGap(c) { + // close the current range + if (currentRange) { + currentRange.includesLastSegment = lastMatchIndex >= lastSegmentStart; + if (currentRange.matched && currentRange.includesLastSegment) { + if (DEBUG_SCORES) { + scoreDebug.lastSegment += lastSegmentScore * LAST_SEGMENT_BOOST; + } + score += lastSegmentScore * LAST_SEGMENT_BOOST; + } + + if (currentRange.matched && !currentRangeStartedOnSpecial) { + if (DEBUG_SCORES) { + scoreDebug.notStartingOnSpecial -= NOT_STARTING_ON_SPECIAL_PENALTY; + } + score -= NOT_STARTING_ON_SPECIAL_PENALTY; + } + ranges.push(currentRange); + } + + // if there was space between the new range and the last, + // add a new unmatched range before the new range can be added. + if (lastMatchIndex + 1 < c) { + ranges.push({ + text: str.substring(lastMatchIndex + 1, c), + matched: false, + includesLastSegment: c > lastSegmentStart + }); + } + currentRange = null; + lastSegmentScore = 0; + } + + // records that the character at index c (of str) matches, adding + // that character to the current range or starting a new one. + function addMatch(c) { + var newPoints = 0; + + // A match means that we need to do some scoring bookkeeping. + if (DEBUG_SCORES) { + scoreDebug.match += MATCH_POINTS; + } + newPoints += MATCH_POINTS; + + // a bonus is given for characters that match at the beginning + // of the filename + if (c === lastSegmentStart) { + if (DEBUG_SCORES) { + scoreDebug.beginning += BEGINNING_OF_NAME_POINTS; + } + newPoints += BEGINNING_OF_NAME_POINTS; + } + + // If the new character immediately follows the last matched character, + // we award the consecutive matches bonus. The check for score > 0 + // handles the initial value of lastMatchIndex which is used for + // constructing ranges but we don't yet have a true match. + if (score > 0 && lastMatchIndex + 1 === c) { + if (DEBUG_SCORES) { + scoreDebug.consecutive += CONSECUTIVE_MATCHES_POINTS; + } + newPoints += CONSECUTIVE_MATCHES_POINTS; + } + + score += newPoints; + if (c >= lastSegmentStart) { + lastSegmentScore += newPoints; + } + + // if the last range wasn't a match or there's a gap, we need to close off + // the range to start a new one. + if ((currentRange && !currentRange.matched) || c > lastMatchIndex + 1) { + closeRangeGap(c); + } + lastMatchIndex = c; + + // set up a new match range or add to the current one + if (!currentRange) { + currentRange = { + text: str[c], + matched: true + }; + + // Check to see if this new matched range is starting on a special + // character. We penalize those ranges that don't, because most + // people will search on the logical boundaries of the name + currentRangeStartedOnSpecial = c === specials[specialsCounter]; + } else { + currentRange.text += str[c]; + } + } + + // Compares the current character from the query string against the + // special characters in compareStr. + function findMatchingSpecial() { + // used to loop through the specials + var i; + + var foundMatch = false; + for (i = specialsCounter; i < specials.length; i++) { + // first, check to see if strCounter has moved beyond + // the current special character. This is possible + // if the contiguous comparison continued on through + // the next special + if (specials[i] < strCounter) { + specialsCounter = i; + } else if (query[queryCounter] === compareStr[specials[i]]) { + // we have a match! do the required tracking + specialsCounter = i; + queryCounter++; + strCounter = specials[i]; + addMatch(strCounter++); + if (DEBUG_SCORES) { + scoreDebug.special += SPECIAL_POINTS; + } + score += SPECIAL_POINTS; + foundMatch = true; + break; + } + } + + // when we find a match, we switch to looking for consecutive matching characters + if (foundMatch) { + state = CONTIGUOUS_COMPARE; + } else { + // we didn't find a match among the specials + state = SPECIALS_EXHAUSTED; + } + } + + // keep looping until we've either exhausted the query or the string + while (queryCounter < query.length && strCounter < str.length) { + if (state === SPECIALS_COMPARE) { + findMatchingSpecial(); + } else if (state === CONTIGUOUS_COMPARE || state === SPECIALS_EXHAUSTED) { + // for both of these states, we're looking character by character + // for matches + if (query[queryCounter] === compareStr[strCounter]) { + // got a match! record it, and switch to CONTIGUOUS_COMPARE + // in case we had been in SPECIALS_EXHAUSTED state + queryCounter++; + addMatch(strCounter++); + state = CONTIGUOUS_COMPARE; + } else { + // no match. If we were in CONTIGUOUS_COMPARE mode, we + // we switch to looking for specials again. If we've + // already exhaused the specials, we're just going to keep + // stepping through compareStr. + if (state !== SPECIALS_EXHAUSTED) { + findMatchingSpecial(); + } else { + strCounter++; + } + } + } + } + + var result; + // Add the rest of the string ranges + closeRangeGap(str.length); + + // It's not a match if we still have query string left. + if (queryCounter < query.length) { + result = null; + } else { + result = { + matchGoodness: score, + ranges: ranges + }; + if (DEBUG_SCORES) { + result.scoreDebug = scoreDebug; + } + } + return result; + } + + /* + * Seek out the best match in the last segment (generally the filename). + * Matches in the filename are preferred, but the query entered could match + * any part of the path. So, we find the best match we can get in the filename + * and then allow for searching the rest of the string with any characters that + * are left from the beginning of the query. + * + * The parameters and return value are the same as for getMatchRanges, + * except this function is always working on the last segment and the + * result can optionally include a remainder, which is the characters + * at the beginning of the query which did not match in the last segment. + * + * @param {string} query the search string (generally lower cased) + * @param {string} str the original string to search + * @param {string} compareStr the string to compare with (generally lower cased) + * @param {Array} specials list of special indexes in str (from findSpecialCharacters) + * @param {int} startingSpecial index into specials array to start scanning with + * @param {boolean} lastSegmentStart which character does the last segment start at + * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score + */ + function _lastSegmentSearch(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { + var queryCounter, matchRanges; + + // It's possible that the query is longer than the last segment. + // If so, we can chop off the bit that we know couldn't possibly be there. + var remainder = ""; + var extraCharacters = specials[startingSpecial] + query.length - str.length; + + if (extraCharacters > 0) { + remainder = query.substring(0, extraCharacters); + query = query.substring(extraCharacters); + } + + for (queryCounter = 0; queryCounter < query.length; queryCounter++) { + matchRanges = getMatchRanges(query.substring(queryCounter), + str, compareStr, specials, startingSpecial, lastSegmentStart); + + // if we've got a match *or* there are no segments in this string, we're done + if (matchRanges || startingSpecial === 0) { + break; + } + } + + if (queryCounter === query.length || !matchRanges) { + return null; + } else { + matchRanges.remainder = remainder + query.substring(0, queryCounter); + return matchRanges; + } + } + + /* + * Implements the top-level search algorithm. Search the last segment first, + * then search the rest of the string with the remainder. + * + * The parameters and return value are the same as for getMatchRanges. + * + * @param {string} query the search string (will be searched lower case) + * @param {string} str the original string to search + * @param {Array} specials list of special indexes in str (from findSpecialCharacters) + * @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with + * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score + */ + function _computeMatch(query, str, specials, lastSegmentSpecialsIndex) { + // set up query as all lower case and make a lower case string to use for comparisons + query = query.toLowerCase(); + var compareStr = str.toLowerCase(); + + var lastSegmentStart = specials[lastSegmentSpecialsIndex]; + var result; + + if (lastSegmentStart + query.length < str.length) { + result = _lastSegmentSearch(query, str, compareStr, specials, lastSegmentSpecialsIndex, lastSegmentStart); + } + + if (result) { + // Do we have more query characters that did not fit? + if (result.remainder) { + // Scan with the remainder only through the beginning of the last segment + var remainderResult = getMatchRanges(result.remainder, str.substring(0, lastSegmentStart), + compareStr.substring(0, lastSegmentStart), + specials.slice(0, lastSegmentSpecialsIndex), 0, lastSegmentStart); + + if (remainderResult) { + // We have a match + // This next part deals with scoring for the case where + // there are consecutive matched characters at the border of the + // last segment. + var lastRange = remainderResult.ranges[remainderResult.ranges.length - 1]; + if (result.ranges[0].matched && lastRange.matched) { + result.matchGoodness += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; + if (DEBUG_SCORES) { + result.scoreDebug.consecutive += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; + } + } + + // add the new matched ranges to the beginning of the set of ranges we had + result.ranges.unshift.apply(result.ranges, remainderResult.ranges); + } else { + // no match + return null; + } + } else { + // There was no remainder, which means that the whole match is in the + // last segment. We need to add a range at the beginning for everything + // that came before the last segment. + result.ranges.unshift({ + text: str.substring(0, lastSegmentStart), + matched: false, + includesLastSegment: false + }); + } + delete result.remainder; + } else { + // No match in the last segment, so we start over searching the whole + // string + result = getMatchRanges(query, str, compareStr, specials, 0, lastSegmentStart); + } + + if (result) { + var lengthPenalty = -1 * Math.round(str.length * DEDUCTION_FOR_LENGTH); + if (DEBUG_SCORES) { + result.scoreDebug.lengthDeduction = lengthPenalty; + } + result.matchGoodness = result.matchGoodness + lengthPenalty; + } + return result; + } + + /* + * Match str against the query using the QuickOpen algorithm provided by + * the functions above. The general idea is to prefer matches in the last + * segment (the filename) and matches of "special" characters. stringMatch + * will try to provide the best match and produces a "matchGoodness" score + * to allow for relative ranking. + * + * The result object returned includes "stringRanges" which can be used to highlight + * the matched portions of the string, in addition to the "matchGoodness" + * mentioned above. If DEBUG_SCORES is true, scoreDebug is set on the result + * to provide insight into the score. + * + * The matching is done in a case-insensitive manner. + * + * @param {string} str The string to search + * @param {string} query The query string to find in string + */ + function stringMatch(str, query) { + var result; + + // No query? Short circuit the normal work done and just + // return a single range that covers the whole string. + if (!query) { + result = new SearchResult(str); + result.matchGoodness = 0; + if (DEBUG_SCORES) { + result.scoreDebug = {}; + } + result.stringRanges = [{ + text: str, + matched: false, + includesLastSegment: true + }]; + return result; + } + + // Locate the special characters and then use orderedCompare to do the real + // work. + var special = findSpecialCharacters(str); + var compareData = _computeMatch(query, str, special.specials, + special.lastSegmentSpecialsIndex); + + // If we get a match, turn this into a SearchResult as expected by the consumers + // of this API. + if (compareData) { + result = new SearchResult(str); + result.stringRanges = compareData.ranges; + result.matchGoodness = -1 * compareData.matchGoodness; + if (DEBUG_SCORES) { + result.scoreDebug = compareData.scoreDebug; + } + } + return result; + } + + /** + * Sorts an array of SearchResult objects on a primary field, followed by secondary fields + * in case of ties. 'fields' maps field name to priority, where 0 is the primary field. E.g.: + * multiFieldSort(bugList, { milestone: 0, severity: 1 }); + * Would sort a bug list by milestone, and within each milestone sort bugs by severity. + * + * Any fields that have a string value are compared case-insensitively. Fields used should be + * present on all SearchResult objects (no optional/undefined fields). + * + * @param {!Array.} searchResults + * @param {!Object.} fields + */ + function multiFieldSort(searchResults, fields) { + // Move field names into an array, with primary field first + var fieldNames = []; + $.each(fields, function (key, priority) { + fieldNames[priority] = key; + }); + + searchResults.sort(function (a, b) { + var priority; + for (priority = 0; priority < fieldNames.length; priority++) { + var fieldName = fieldNames[priority]; + var valueA = a[fieldName]; + var valueB = b[fieldName]; + if (typeof valueA === "string") { + valueA = valueA.toLowerCase(); + valueB = valueB.toLowerCase(); + } + + if (valueA < valueB) { + return -1; + } else if (valueA > valueB) { + return 1; + } + // otherwise, move on to next sort priority + } + return 0; // all sort fields are equal + }); + } + + /** + * Sorts search results generated by stringMatch(): results are sorted into several + * tiers based on how well they matched the search query, then sorted alphabetically + * within each tier. + */ + function basicMatchSort(searchResults) { + multiFieldSort(searchResults, { matchGoodness: 0, label: 1 }); + } + + exports._findSpecialCharacters = findSpecialCharacters; + exports._computeMatch = _computeMatch; + exports._lastSegmentSearch = _lastSegmentSearch; + exports._setDebugScores = _setDebugScores; + + // public exports + exports.SearchResult = SearchResult; + exports.stringMatch = stringMatch; + exports.basicMatchSort = basicMatchSort; + exports.multiFieldSort = multiFieldSort; +}); diff --git a/test/UnitTestSuite.js b/test/UnitTestSuite.js index d9838080b5f..c06a86aedc0 100644 --- a/test/UnitTestSuite.js +++ b/test/UnitTestSuite.js @@ -47,7 +47,7 @@ define(function (require, exports, module) { require("spec/NativeFileSystem-test"); require("spec/PreferencesManager-test"); require("spec/ProjectManager-test"); - require("spec/QuickOpen-test"); + require("spec/StringMatch-test"); require("spec/UpdateNotification-test"); require("spec/ViewUtils-test"); require("spec/WorkingSetView-test"); diff --git a/test/spec/QuickOpen-test.js b/test/spec/StringMatch-test.js similarity index 92% rename from test/spec/QuickOpen-test.js rename to test/spec/StringMatch-test.js index 8ae032ba72c..c862083aac8 100644 --- a/test/spec/QuickOpen-test.js +++ b/test/spec/StringMatch-test.js @@ -24,20 +24,20 @@ /*jslint vars: true, plusplus: true, devel: true, browser: true, nomen: true, indent: 4, maxerr: 50 */ /*global define: false, require: false, describe: false, it: false, xit: false, expect: false, beforeEach: false, afterEach: false, waitsFor: false, runs: false, jasmine: false */ -/*unittests: QuickOpen */ +/*unittests: StringMatch */ define(function (require, exports, module) { 'use strict'; - var QuickOpen = require("search/QuickOpen"); + var StringMatch = require("utils/StringMatch"); - describe("QuickOpen", function () { + describe("StringMatch", function () { - QuickOpen._setDebugScores(false); + StringMatch._setDebugScores(false); describe("findSpecialCharacters", function () { it("should find the important match characters in the string", function () { - var fSC = QuickOpen._findSpecialCharacters; + var fSC = StringMatch._findSpecialCharacters; expect(fSC("src/document/DocumentCommandHandler.js")).toEqual({ lastSegmentSpecialsIndex: 4, specials: [0, 3, 4, 12, 13, 21, 28, 35, 36] @@ -59,8 +59,8 @@ define(function (require, exports, module) { it("should compare results in the final segment properly", function () { var path = "src/document/DocumentCommandHandler.js"; var comparePath = path.toLowerCase(); - var _lastSegmentSearch = QuickOpen._lastSegmentSearch; - var sc = QuickOpen._findSpecialCharacters(path); + var _lastSegmentSearch = StringMatch._lastSegmentSearch; + var sc = StringMatch._findSpecialCharacters(path); expect(_lastSegmentSearch("d", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", matchGoodness: jasmine.any(Number), @@ -167,15 +167,15 @@ define(function (require, exports, module) { // note that the d's in Command and Handler have been removed var path = "DocumentCommanHanler.js"; var comparePath = path.toLowerCase(); - var _lastSegmentSearch = QuickOpen._lastSegmentSearch; - var sc = QuickOpen._findSpecialCharacters(path); + var _lastSegmentSearch = StringMatch._lastSegmentSearch; + var sc = StringMatch._findSpecialCharacters(path); expect(_lastSegmentSearch("ocud", path, comparePath, sc.specials, 0)).toEqual(null); }); it("should compare matches that don't fit in just the final segment", function () { var path = "src/document/DocumentCommandHandler.js"; - var computeMatch = QuickOpen._computeMatch; - var sc = QuickOpen._findSpecialCharacters(path); + var computeMatch = StringMatch._computeMatch; + var sc = StringMatch._findSpecialCharacters(path); expect(computeMatch("sdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ @@ -213,8 +213,8 @@ define(function (require, exports, module) { it("should handle matches that don't fit at all in the final segment", function () { var path = "src/extensions/default/QuickOpenCSS/main.js"; - var computeMatch = QuickOpen._computeMatch; - var sc = QuickOpen._findSpecialCharacters(path); + var computeMatch = StringMatch._computeMatch; + var sc = StringMatch._findSpecialCharacters(path); expect(computeMatch("quick", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ matchGoodness: jasmine.any(Number), ranges: [ @@ -247,7 +247,7 @@ define(function (require, exports, module) { }); describe("stringMatch", function () { - var stringMatch = QuickOpen.stringMatch; + var stringMatch = StringMatch.stringMatch; it("should return appropriate matching ranges", function () { var result; @@ -384,37 +384,37 @@ define(function (require, exports, module) { describe("scoring", function () { beforeEach(function () { - QuickOpen._setDebugScores(true); + StringMatch._setDebugScores(true); }); afterEach(function () { - QuickOpen._setDebugScores(false); + StringMatch._setDebugScores(false); }); it("should score consecutive matches across the last segment", function () { - var result1 = QuickOpen.stringMatch("test/spec/LiveDevelopment-test.js", "spec/live"); - var result2 = QuickOpen.stringMatch("test/spec/live/foobar.js", "spec/live"); + var result1 = StringMatch.stringMatch("test/spec/LiveDevelopment-test.js", "spec/live"); + var result2 = StringMatch.stringMatch("test/spec/live/foobar.js", "spec/live"); expect(result2.scoreDebug.consecutive).toEqual(result1.scoreDebug.consecutive); expect(result2.scoreDebug.consecutive).toBeGreaterThan(0); expect(result2.scoreDebug.notStartingOnSpecial).toEqual(0); }); it("should boost last segment matches, even when searching the whole string", function () { - var result = QuickOpen.stringMatch("src/extensions/default/QuickOpenCSS/main.js", "quickopenain"); + var result = StringMatch.stringMatch("src/extensions/default/QuickOpenCSS/main.js", "quickopenain"); expect(result.scoreDebug.lastSegment).toBeGreaterThan(0); expect(result.scoreDebug.notStartingOnSpecial).toEqual(-25); }); it("should treat the character after _ as a special", function () { - var result = QuickOpen.stringMatch("src/extensions/default/Quick_Open.js", "o"); + var result = StringMatch.stringMatch("src/extensions/default/Quick_Open.js", "o"); expect(result.scoreDebug.special).toBeGreaterThan(0); }); it("should penalize matches that don't start on a special", function () { - var result = QuickOpen.stringMatch("src/thirdparty/CodeMirror2/mode/ntriples/index.html", "samples/index"); + var result = StringMatch.stringMatch("src/thirdparty/CodeMirror2/mode/ntriples/index.html", "samples/index"); expect(result.scoreDebug.notStartingOnSpecial).toEqual(-50); - result = QuickOpen.stringMatch("src/thirdparty/CodeMirror2/mode/ntriples/index.html", "codemirror"); + result = StringMatch.stringMatch("src/thirdparty/CodeMirror2/mode/ntriples/index.html", "codemirror"); expect(result.scoreDebug.notStartingOnSpecial).toEqual(0); }); From 57d262804728b1d179f2e1a98e6932884c37e77b Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Wed, 9 Jan 2013 22:11:34 -0500 Subject: [PATCH 09/14] review comment responses and the addition of two failing tests that illustrate bugs found during review --- src/search/QuickOpen.js | 8 +++++++- src/utils/StringMatch.js | 8 ++++---- test/spec/StringMatch-test.js | 16 +++++++++++++++- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/search/QuickOpen.js b/src/search/QuickOpen.js index f325a5eae8d..350e9b08e71 100644 --- a/src/search/QuickOpen.js +++ b/src/search/QuickOpen.js @@ -22,7 +22,7 @@ */ /*jslint vars: true, plusplus: true, devel: true, nomen: true, indent: 4, maxerr: 50 */ -/*global define, $, window, setTimeout, ArrayBuffer, Int8Array */ +/*global define, $, window, setTimeout */ /* * Displays an auto suggest pop-up list of files to allow the user to quickly navigate to a file and lines @@ -770,4 +770,10 @@ define(function (require, exports, module) { exports.beginSearch = beginSearch; exports.addQuickOpenPlugin = addQuickOpenPlugin; exports.highlightMatch = highlightMatch; + + // accessing these from this module will ultimately be deprecated + exports.stringMatch = StringMatch.stringMatch; + exports.SearchResult = StringMatch.SearchResult; + exports.basicMatchSort = StringMatch.basicMatchSort; + exports.multiFieldSort = StringMatch.multiFieldSort; }); diff --git a/src/utils/StringMatch.js b/src/utils/StringMatch.js index 754954a1a9f..17f77dae932 100644 --- a/src/utils/StringMatch.js +++ b/src/utils/StringMatch.js @@ -22,7 +22,7 @@ */ /*jslint vars: true, plusplus: true, devel: true, nomen: true, indent: 4, maxerr: 50 */ -/*global define, $, window, setTimeout, ArrayBuffer, Int8Array */ +/*global define, $, window, setTimeout */ /*unittests: StringMatch */ define(function (require, exports, module) { @@ -149,7 +149,7 @@ define(function (require, exports, module) { * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} startingSpecial index into specials array to start scanning with * @param {boolean} lastSegmentStart optional which character does the last segment start at - * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ function getMatchRanges(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { var ranges = []; @@ -393,7 +393,7 @@ define(function (require, exports, module) { * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} startingSpecial index into specials array to start scanning with * @param {boolean} lastSegmentStart which character does the last segment start at - * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ function _lastSegmentSearch(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { var queryCounter, matchRanges; @@ -436,7 +436,7 @@ define(function (require, exports, module) { * @param {string} str the original string to search * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with - * @return {{ranges:Array.{text:string, matched:boolean, includesLastSegment:boolean}, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ function _computeMatch(query, str, specials, lastSegmentSpecialsIndex) { // set up query as all lower case and make a lower case string to use for comparisons diff --git a/test/spec/StringMatch-test.js b/test/spec/StringMatch-test.js index c862083aac8..c331541b4ff 100644 --- a/test/spec/StringMatch-test.js +++ b/test/spec/StringMatch-test.js @@ -288,6 +288,16 @@ define(function (require, exports, module) { { text: "pen.js", matched: false, includesLastSegment: true } ] }); + + expect(stringMatch("MoonsunSum", "sun")).toEqual({ + matchGoodness: jasmine.any(Number), + label: "MoonsunSum", + stringRanges: [ + { text: "Moon", matched: false, includesLastSegment: true }, + { text: "sun", matched: true, includesLastSegment: true }, + { text: "Sum", matched: false, includesLastSegment: true } + ] + }); }); it("should prefer special characters", function () { @@ -417,7 +427,11 @@ define(function (require, exports, module) { result = StringMatch.stringMatch("src/thirdparty/CodeMirror2/mode/ntriples/index.html", "codemirror"); expect(result.scoreDebug.notStartingOnSpecial).toEqual(0); }); - + + it("should try to prioritize points for the last segment", function () { + var result = StringMatch.stringMatch("abc/def/zzz/abc/def", "abc/def"); + expect(result.scoreDebug.lastSegment).toBeGreaterThan(0); + }); }); }); }); From 705b097cda83f2aa94443d92b6184dfda1923dd6 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Mon, 14 Jan 2013 12:34:09 -0500 Subject: [PATCH 10/14] adds backtracking to fix cases where matches that should be found are not. also improves scoring in a number of cases. --- src/utils/StringMatch.js | 589 +++++++++++++++++++--------------- test/spec/StringMatch-test.js | 379 +++++++++++++++------- 2 files changed, 590 insertions(+), 378 deletions(-) diff --git a/src/utils/StringMatch.js b/src/utils/StringMatch.js index 17f77dae932..998f947178b 100644 --- a/src/utils/StringMatch.js +++ b/src/utils/StringMatch.js @@ -103,8 +103,7 @@ define(function (require, exports, module) { // states used during the scanning of the string var SPECIALS_COMPARE = 0; - var CONTIGUOUS_COMPARE = 1; - var SPECIALS_EXHAUSTED = 2; + var CONSECUTIVE_COMPARE = 1; // Scores can be hard to make sense of. The DEBUG_SCORES flag // provides a way to peek into the parts that made up a score. @@ -115,7 +114,7 @@ define(function (require, exports, module) { } // Constants for scoring - var SPECIAL_POINTS = 25; + var SPECIAL_POINTS = 30; var MATCH_POINTS = 10; var LAST_SEGMENT_BOOST = 1; var BEGINNING_OF_NAME_POINTS = 25; @@ -123,254 +122,188 @@ define(function (require, exports, module) { var CONSECUTIVE_MATCHES_POINTS = 25; var NOT_STARTING_ON_SPECIAL_PENALTY = 25; + // Used in match lists to designate matches of "special" characters (see + // findSpecialCharacters above + function SpecialMatch(index) { + this.index = index; + } + + // Used in match lists to designate any matched characters that are not special + function NormalMatch(index) { + this.index = index; + } + /* * Finds the best matches between the query and the string. The query is - * compared with compareStr (generally a lower case string with a lower case - * query), but the results are returned based on str. - * - * Generally speaking, this function tries to find a "special" character - * (see findSpecialCharacters above) for the first character of the query. - * When it finds one, it then tries to look for consecutive characters that - * match. Once the characters stop matching, it tries to find a special - * character for the next query character. If a special character isn't found, - * it starts scanning sequentially. + * compared with str (usually a lower case string with a lower case + * query). * - * The returned object contains the following fields: - * * {Array} ranges: the scanned regions of the string, in order. For each range: - * * {string} text: the text for the string range - * * {boolean} matched: was this range part of the match? - * * {boolean} includesLastSegment: is this range part of the last segment of str - * * {int} matchGoodness: the score computed for this match - * * (optional) {Object} scoreDebug: if DEBUG_SCORES is true, an object with the score broken down + * Generally speaking, this function tries to find "special" characters + * (see findSpecialCharacters above) first. Failing that, it starts scanning + * the "normal" characters. Sometimes, it will find a special character that matches + * but then not be able to match the rest of the query. In cases like that, the + * search will backtrack and try to find matches for the whole query earlier in the + * string. * * @param {string} query the search string (generally lower cased) - * @param {string} str the original string to search - * @param {string} compareStr the string to compare with (generally lower cased) + * @param {string} str the string to compare with (generally lower cased) * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} startingSpecial index into specials array to start scanning with - * @param {boolean} lastSegmentStart optional which character does the last segment start at - * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {Array.} matched indexes or null if no matches possible */ - function getMatchRanges(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { - var ranges = []; - - var score = 0; - var scoreDebug; - if (DEBUG_SCORES) { - scoreDebug = { - special: 0, - match: 0, - lastSegment: 0, - beginning: 0, - lengthDeduction: 0, - consecutive: 0, - notStartingOnSpecial: 0 - }; - } - - // normalize the optional parameters - if (!lastSegmentStart) { - lastSegmentStart = 0; - } - - if (startingSpecial === undefined) { - startingSpecial = 0; - } + function _generateMatchList(query, str, specials, startingSpecial) { + var result = []; + // used to keep track of which special character we're testing now var specialsCounter = startingSpecial; // strCounter and queryCounter are the indexes used for pulling characters // off of the str/compareStr and query. var strCounter = specials[startingSpecial]; - var queryCounter = 0; + var queryCounter; - // initial state is to scan through the special characters - var state = SPECIALS_COMPARE; + // the search branches out between special characters and normal characters + // that are found via consecutive character scanning. In the process of + // performing these scans, we discover that parts of the query will not match + // beyond a given point in the string. We keep track of that information + // in deadBranches, which has a slot for each character in the query. + // The value stored in the slot is the index into the string after which we + // are certain there is no match. + var deadBranches = []; - // currentRange keeps track of the range we are adding characters to now - var currentRange = null; - var lastSegmentScore = 0; - var currentRangeStartedOnSpecial = false; - - // the character index (against str) that was last in a matched range. This is used for - // adding unmatched ranges in between and adding bonus points for consecutive matches. - var lastMatchIndex = strCounter - 1; - - // Records the current range and adds any additional ranges required to - // get to character index c. This function is called before starting a new range - // or returning from the function. - function closeRangeGap(c) { - // close the current range - if (currentRange) { - currentRange.includesLastSegment = lastMatchIndex >= lastSegmentStart; - if (currentRange.matched && currentRange.includesLastSegment) { - if (DEBUG_SCORES) { - scoreDebug.lastSegment += lastSegmentScore * LAST_SEGMENT_BOOST; - } - score += lastSegmentScore * LAST_SEGMENT_BOOST; - } - - if (currentRange.matched && !currentRangeStartedOnSpecial) { - if (DEBUG_SCORES) { - scoreDebug.notStartingOnSpecial -= NOT_STARTING_ON_SPECIAL_PENALTY; - } - score -= NOT_STARTING_ON_SPECIAL_PENALTY; - } - ranges.push(currentRange); - } - - // if there was space between the new range and the last, - // add a new unmatched range before the new range can be added. - if (lastMatchIndex + 1 < c) { - ranges.push({ - text: str.substring(lastMatchIndex + 1, c), - matched: false, - includesLastSegment: c > lastSegmentStart - }); - } - currentRange = null; - lastSegmentScore = 0; + for (queryCounter = 0; queryCounter < query.length; queryCounter++) { + deadBranches[queryCounter] = Infinity; } - // records that the character at index c (of str) matches, adding - // that character to the current range or starting a new one. - function addMatch(c) { - var newPoints = 0; - - // A match means that we need to do some scoring bookkeeping. - if (DEBUG_SCORES) { - scoreDebug.match += MATCH_POINTS; - } - newPoints += MATCH_POINTS; - - // a bonus is given for characters that match at the beginning - // of the filename - if (c === lastSegmentStart) { - if (DEBUG_SCORES) { - scoreDebug.beginning += BEGINNING_OF_NAME_POINTS; - } - newPoints += BEGINNING_OF_NAME_POINTS; - } - - // If the new character immediately follows the last matched character, - // we award the consecutive matches bonus. The check for score > 0 - // handles the initial value of lastMatchIndex which is used for - // constructing ranges but we don't yet have a true match. - if (score > 0 && lastMatchIndex + 1 === c) { - if (DEBUG_SCORES) { - scoreDebug.consecutive += CONSECUTIVE_MATCHES_POINTS; - } - newPoints += CONSECUTIVE_MATCHES_POINTS; - } - - score += newPoints; - if (c >= lastSegmentStart) { - lastSegmentScore += newPoints; - } - - // if the last range wasn't a match or there's a gap, we need to close off - // the range to start a new one. - if ((currentRange && !currentRange.matched) || c > lastMatchIndex + 1) { - closeRangeGap(c); - } - lastMatchIndex = c; - - // set up a new match range or add to the current one - if (!currentRange) { - currentRange = { - text: str[c], - matched: true - }; - - // Check to see if this new matched range is starting on a special - // character. We penalize those ranges that don't, because most - // people will search on the logical boundaries of the name - currentRangeStartedOnSpecial = c === specials[specialsCounter]; - } else { - currentRange.text += str[c]; - } - } + queryCounter = 0; + + var state = SPECIALS_COMPARE; // Compares the current character from the query string against the - // special characters in compareStr. + // special characters in str. Returns true if a match was found, + // false otherwise. function findMatchingSpecial() { // used to loop through the specials var i; - var foundMatch = false; for (i = specialsCounter; i < specials.length; i++) { - // first, check to see if strCounter has moved beyond - // the current special character. This is possible - // if the contiguous comparison continued on through - // the next special + // short circuit this search when we know there are no matches following + if (specials[i] >= deadBranches[queryCounter]) { + break; + } + + // First, ensure that we're not comparing specials that + // come earlier in the string than our current search position. + // This can happen when the string position changes elsewhere. if (specials[i] < strCounter) { specialsCounter = i; - } else if (query[queryCounter] === compareStr[specials[i]]) { + } else if (query[queryCounter] === str[specials[i]]) { // we have a match! do the required tracking specialsCounter = i; queryCounter++; strCounter = specials[i]; - addMatch(strCounter++); - if (DEBUG_SCORES) { - scoreDebug.special += SPECIAL_POINTS; - } - score += SPECIAL_POINTS; - foundMatch = true; - break; + result.push(new SpecialMatch(strCounter++)); + return true; } } - // when we find a match, we switch to looking for consecutive matching characters - if (foundMatch) { - state = CONTIGUOUS_COMPARE; - } else { - // we didn't find a match among the specials - state = SPECIALS_EXHAUSTED; + return false; + } + + // Keeps track of how far back we need to go to keep searching. + var backtrackTo = Infinity; + + // This function implements the backtracking that is done when we fail to find + // a match with the query using the "search for specials first" approach. + // + // returns false when it is not able to backtrack successfully + function backtrack() { + + // The idea is to pull matches off of our match list, rolling back + // characters from the query. We pay special attention to the special + // characters since they are searched first. + while (result.length > 0) { + var item = result.pop(); + + // nothing in the list? there's no possible match then. + if (!item) { + return false; + } + + // we pulled off a match, which means that we need to put a character + // back into our query + queryCounter--; + + if (item instanceof SpecialMatch) { + // pulled off a special, which means we need to make that special available + // for matching again + specialsCounter--; + + // check to see if we've gone back as far as we need to + if (item.index < backtrackTo) { + // next time, we'll have to go back farther + backtrackTo = item.index - 1; + + // we now know that this part of the query does not match beyond this + // point + deadBranches[queryCounter] = backtrackTo; + + // since we failed with the specials along this track, we're + // going to reset to looking for matches consecutively. + state = CONSECUTIVE_COMPARE; + + // we figure out where to start looking based on the new + // last item in the list. If there isn't anything else + // in the match list, we'll start over at the starting special + // (which is generally the beginning of the string, or the + // beginning of the last segment of the string) + item = result[result.length - 1]; + if (!item) { + strCounter = specials[startingSpecial] + 1; + return true; + } + strCounter = item.index + 1; + return true; + } + } } + return false; } - // keep looping until we've either exhausted the query or the string - while (queryCounter < query.length && strCounter < str.length) { - if (state === SPECIALS_COMPARE) { - findMatchingSpecial(); - } else if (state === CONTIGUOUS_COMPARE || state === SPECIALS_EXHAUSTED) { - // for both of these states, we're looking character by character - // for matches - if (query[queryCounter] === compareStr[strCounter]) { - // got a match! record it, and switch to CONTIGUOUS_COMPARE - // in case we had been in SPECIALS_EXHAUSTED state - queryCounter++; - addMatch(strCounter++); - state = CONTIGUOUS_COMPARE; - } else { - // no match. If we were in CONTIGUOUS_COMPARE mode, we - // we switch to looking for specials again. If we've - // already exhaused the specials, we're just going to keep - // stepping through compareStr. - if (state !== SPECIALS_EXHAUSTED) { - findMatchingSpecial(); + while (true) { + + // keep looping until we've either exhausted the query or the string + while (queryCounter < query.length && strCounter < str.length && strCounter <= deadBranches[queryCounter]) { + if (state === SPECIALS_COMPARE) { + if (!findMatchingSpecial()) { + state = CONSECUTIVE_COMPARE; + } + } + + if (state === CONSECUTIVE_COMPARE) { + // we look character by character for matches + if (query[queryCounter] === str[strCounter]) { + // got a match! record it, and switch back to searching specials + queryCounter++; + result.push(new NormalMatch(strCounter++)); + state = SPECIALS_COMPARE; } else { + // no match, keep looking strCounter++; } } } + + // if we've finished the query, or we haven't finished the query but we have no + // more backtracking we can do, then we're all done searching. + if ((queryCounter < query.length && !backtrack()) || queryCounter >= query.length) { + break; + } } - var result; - // Add the rest of the string ranges - closeRangeGap(str.length); - - // It's not a match if we still have query string left. - if (queryCounter < query.length) { - result = null; - } else { - result = { - matchGoodness: score, - ranges: ranges - }; - if (DEBUG_SCORES) { - result.scoreDebug = scoreDebug; - } + // return null when we don't find anything + if (queryCounter < query.length || result.length === 0) { + return null; } return result; } @@ -395,13 +328,13 @@ define(function (require, exports, module) { * @param {boolean} lastSegmentStart which character does the last segment start at * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ - function _lastSegmentSearch(query, str, compareStr, specials, startingSpecial, lastSegmentStart) { - var queryCounter, matchRanges; + function _lastSegmentSearch(query, compareStr, specials, startingSpecial, lastSegmentStart) { + var queryCounter, matchList; // It's possible that the query is longer than the last segment. // If so, we can chop off the bit that we know couldn't possibly be there. var remainder = ""; - var extraCharacters = specials[startingSpecial] + query.length - str.length; + var extraCharacters = specials[startingSpecial] + query.length - compareStr.length; if (extraCharacters > 0) { remainder = query.substring(0, extraCharacters); @@ -409,20 +342,22 @@ define(function (require, exports, module) { } for (queryCounter = 0; queryCounter < query.length; queryCounter++) { - matchRanges = getMatchRanges(query.substring(queryCounter), - str, compareStr, specials, startingSpecial, lastSegmentStart); + matchList = _generateMatchList(query.substring(queryCounter), + compareStr, specials, startingSpecial); // if we've got a match *or* there are no segments in this string, we're done - if (matchRanges || startingSpecial === 0) { + if (matchList || startingSpecial === 0) { break; } } - if (queryCounter === query.length || !matchRanges) { + if (queryCounter === query.length || !matchList) { return null; } else { - matchRanges.remainder = remainder + query.substring(0, queryCounter); - return matchRanges; + return { + remainder: remainder + query.substring(0, queryCounter), + matchList: matchList + }; } } @@ -438,72 +373,209 @@ define(function (require, exports, module) { * @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ - function _computeMatch(query, str, specials, lastSegmentSpecialsIndex) { + function _wholeStringSearch(query, str, specials, lastSegmentSpecialsIndex) { // set up query as all lower case and make a lower case string to use for comparisons query = query.toLowerCase(); var compareStr = str.toLowerCase(); var lastSegmentStart = specials[lastSegmentSpecialsIndex]; var result; + var matchList; - if (lastSegmentStart + query.length < str.length) { - result = _lastSegmentSearch(query, str, compareStr, specials, lastSegmentSpecialsIndex, lastSegmentStart); - } + result = _lastSegmentSearch(query, compareStr, specials, lastSegmentSpecialsIndex, lastSegmentStart); if (result) { + matchList = result.matchList; + // Do we have more query characters that did not fit? if (result.remainder) { // Scan with the remainder only through the beginning of the last segment - var remainderResult = getMatchRanges(result.remainder, str.substring(0, lastSegmentStart), + var remainderMatchList = _generateMatchList(result.remainder, compareStr.substring(0, lastSegmentStart), - specials.slice(0, lastSegmentSpecialsIndex), 0, lastSegmentStart); + specials.slice(0, lastSegmentSpecialsIndex), 0); - if (remainderResult) { - // We have a match - // This next part deals with scoring for the case where - // there are consecutive matched characters at the border of the - // last segment. - var lastRange = remainderResult.ranges[remainderResult.ranges.length - 1]; - if (result.ranges[0].matched && lastRange.matched) { - result.matchGoodness += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; - if (DEBUG_SCORES) { - result.scoreDebug.consecutive += lastRange.text.length * CONSECUTIVE_MATCHES_POINTS; - } - } - + if (remainderMatchList) { // add the new matched ranges to the beginning of the set of ranges we had - result.ranges.unshift.apply(result.ranges, remainderResult.ranges); + matchList.unshift.apply(matchList, remainderMatchList); } else { // no match return null; } - } else { - // There was no remainder, which means that the whole match is in the - // last segment. We need to add a range at the beginning for everything - // that came before the last segment. - result.ranges.unshift({ - text: str.substring(0, lastSegmentStart), - matched: false, - includesLastSegment: false - }); } - delete result.remainder; } else { // No match in the last segment, so we start over searching the whole // string - result = getMatchRanges(query, str, compareStr, specials, 0, lastSegmentStart); + matchList = _generateMatchList(query, compareStr, specials, 0, lastSegmentStart); } - if (result) { - var lengthPenalty = -1 * Math.round(str.length * DEDUCTION_FOR_LENGTH); + return matchList; + } + + /** + * Converts a list of matches into a form suitable for returning from stringMatch. + * + * @param {Array.} matchList to convert + * @param {string} original string + * @param {int} character index where last segment begins + * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score + */ + function _computeRangesAndScore(matchList, str, lastSegmentStart) { + var matchCounter; + var ranges = []; + var lastMatchIndex = -1; + var lastSegmentScore = 0; + var currentRangeStartedOnSpecial = false; + + var score = 0; + var scoreDebug; + if (DEBUG_SCORES) { + scoreDebug = { + special: 0, + match: 0, + lastSegment: 0, + beginning: 0, + lengthDeduction: 0, + consecutive: 0, + notStartingOnSpecial: 0 + }; + } + + var currentRange = null; + + // Records the current range and adds any additional ranges required to + // get to character index c. This function is called before starting a new range + // or returning from the function. + function closeRangeGap(c) { + // close the current range + if (currentRange) { + currentRange.includesLastSegment = lastMatchIndex >= lastSegmentStart; + if (currentRange.matched && currentRange.includesLastSegment) { + if (DEBUG_SCORES) { + scoreDebug.lastSegment += lastSegmentScore * LAST_SEGMENT_BOOST; + } + score += lastSegmentScore * LAST_SEGMENT_BOOST; + } + + if (currentRange.matched && !currentRangeStartedOnSpecial) { + if (DEBUG_SCORES) { + scoreDebug.notStartingOnSpecial -= NOT_STARTING_ON_SPECIAL_PENALTY; + } + score -= NOT_STARTING_ON_SPECIAL_PENALTY; + } + ranges.push(currentRange); + } + + // if there was space between the new range and the last, + // add a new unmatched range before the new range can be added. + if (lastMatchIndex + 1 < c) { + ranges.push({ + text: str.substring(lastMatchIndex + 1, c), + matched: false, + includesLastSegment: c > lastSegmentStart + }); + } + currentRange = null; + lastSegmentScore = 0; + } + + // adds a matched character to the appropriate range + function addMatch(match) { + // pull off the character index + var c = match.index; + var newPoints = 0; + + // A match means that we need to do some scoring bookkeeping. + // Start with points added for any match if (DEBUG_SCORES) { - result.scoreDebug.lengthDeduction = lengthPenalty; + scoreDebug.match += MATCH_POINTS; + } + newPoints += MATCH_POINTS; + + // a bonus is given for characters that match at the beginning + // of the filename + if (c === lastSegmentStart) { + if (DEBUG_SCORES) { + scoreDebug.beginning += BEGINNING_OF_NAME_POINTS; + } + newPoints += BEGINNING_OF_NAME_POINTS; + } + + // If the new character immediately follows the last matched character, + // we award the consecutive matches bonus. The check for score > 0 + // handles the initial value of lastMatchIndex which is used for + // constructing ranges but we don't yet have a true match. + if (score > 0 && lastMatchIndex + 1 === c) { + if (DEBUG_SCORES) { + scoreDebug.consecutive += CONSECUTIVE_MATCHES_POINTS; + } + newPoints += CONSECUTIVE_MATCHES_POINTS; } - result.matchGoodness = result.matchGoodness + lengthPenalty; + + // add points for "special" character matches + if (match instanceof SpecialMatch) { + if (DEBUG_SCORES) { + scoreDebug.special += SPECIAL_POINTS; + } + newPoints += SPECIAL_POINTS; + } + + score += newPoints; + + // points accumulated in the last segment get an extra bonus + if (c >= lastSegmentStart) { + lastSegmentScore += newPoints; + } + + // if the last range wasn't a match or there's a gap, we need to close off + // the range to start a new one. + if ((currentRange && !currentRange.matched) || c > lastMatchIndex + 1) { + closeRangeGap(c); + } + lastMatchIndex = c; + + // set up a new match range or add to the current one + if (!currentRange) { + currentRange = { + text: str[c], + matched: true + }; + + // Check to see if this new matched range is starting on a special + // character. We penalize those ranges that don't, because most + // people will search on the logical boundaries of the name + currentRangeStartedOnSpecial = match instanceof SpecialMatch; + } else { + currentRange.text += str[c]; + } + } + + // scan through the matches, adding each one in turn + for (matchCounter = 0; matchCounter < matchList.length; matchCounter++) { + var match = matchList[matchCounter]; + addMatch(match); + } + + // add a range for the last part of the string + closeRangeGap(str.length); + + // shorter strings that match are often better than longer ones + var lengthPenalty = -1 * Math.round(str.length * DEDUCTION_FOR_LENGTH); + if (DEBUG_SCORES) { + scoreDebug.lengthDeduction = lengthPenalty; + } + score = score + lengthPenalty; + + var result = { + ranges: ranges, + matchGoodness: score + }; + + if (DEBUG_SCORES) { + result.scoreDebug = scoreDebug; } return result; } - + /* * Match str against the query using the QuickOpen algorithm provided by * the functions above. The general idea is to prefer matches in the last @@ -520,6 +592,7 @@ define(function (require, exports, module) { * * @param {string} str The string to search * @param {string} query The query string to find in string + * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score */ function stringMatch(str, query) { var result; @@ -543,12 +616,14 @@ define(function (require, exports, module) { // Locate the special characters and then use orderedCompare to do the real // work. var special = findSpecialCharacters(str); - var compareData = _computeMatch(query, str, special.specials, + var lastSegmentStart = special.specials[special.lastSegmentSpecialsIndex]; + var matchList = _wholeStringSearch(query, str, special.specials, special.lastSegmentSpecialsIndex); // If we get a match, turn this into a SearchResult as expected by the consumers // of this API. - if (compareData) { + if (matchList) { + var compareData = _computeRangesAndScore(matchList, str, lastSegmentStart); result = new SearchResult(str); result.stringRanges = compareData.ranges; result.matchGoodness = -1 * compareData.matchGoodness; @@ -610,9 +685,13 @@ define(function (require, exports, module) { } exports._findSpecialCharacters = findSpecialCharacters; - exports._computeMatch = _computeMatch; + exports._wholeStringSearch = _wholeStringSearch; exports._lastSegmentSearch = _lastSegmentSearch; exports._setDebugScores = _setDebugScores; + exports._generateMatchList = _generateMatchList; + exports._SpecialMatch = SpecialMatch; + exports._NormalMatch = NormalMatch; + exports._computeRangesAndScore = _computeRangesAndScore; // public exports exports.SearchResult = SearchResult; diff --git a/test/spec/StringMatch-test.js b/test/spec/StringMatch-test.js index c331541b4ff..3f5b75a14b4 100644 --- a/test/spec/StringMatch-test.js +++ b/test/spec/StringMatch-test.js @@ -55,108 +55,231 @@ define(function (require, exports, module) { }); }); + describe("_generateMatchList", function () { + var fSC = StringMatch._findSpecialCharacters; + var generateMatchList = StringMatch._generateMatchList; + var SpecialMatch = StringMatch._SpecialMatch; + var NormalMatch = StringMatch._NormalMatch; + + beforeEach(function () { + SpecialMatch.prototype.type = "special"; + NormalMatch.prototype.type = "normal"; + }); + + afterEach(function () { + delete SpecialMatch.prototype.type; + delete NormalMatch.prototype.type; + }); + + var path = "src/document/DocumentCommandHandler.js"; + var specialsInfo = fSC(path); + path = path.toLowerCase(); + + it("should return undefined for no matches", function () { + var result = generateMatchList("foo", path, specialsInfo.specials, 0); + expect(result).toEqual(null); + }); + + it("should return an array with specials matches", function () { + var result = generateMatchList("d", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual([new SpecialMatch(13)]); + + result = generateMatchList("ch", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual([new SpecialMatch(21), new SpecialMatch(28)]); + }); + + it("should try contiguous matches as well, but prefer specials", function () { + var result = generateMatchList("do", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual([new SpecialMatch(13), new NormalMatch(14)]); + + result = generateMatchList("doc", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual([new SpecialMatch(13), new NormalMatch(14), new SpecialMatch(21)]); + + result = generateMatchList("doch", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual([new SpecialMatch(13), new NormalMatch(14), new SpecialMatch(21), new SpecialMatch(28)]); + }); + + it("should handle contiguous matches that stand alone", function () { + var result = generateMatchList("o", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual([new NormalMatch(14)]); + }); + + it("should recognize non-matches", function () { + var result = generateMatchList("ham", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual(null); + }); + + it("should backtrack as needed", function () { + var result = generateMatchList("cu", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual([new NormalMatch(15), new NormalMatch(16)]); + + result = generateMatchList("dcho", path, specialsInfo.specials, specialsInfo.lastSegmentSpecialsIndex); + expect(result).toEqual(null); + + var btpath = "MamoMeMiMoMu"; + var btspecials = fSC(btpath); + btpath = btpath.toLowerCase(); + + result = generateMatchList("m", btpath, btspecials.specials, 0); + expect(result).toEqual([new SpecialMatch(0)]); + + result = generateMatchList("mu", btpath, btspecials.specials, 0); + expect(result).toEqual([new SpecialMatch(0), new NormalMatch(11)]); + + result = generateMatchList("mamo", btpath, btspecials.specials, 0); + expect(result).toEqual([new SpecialMatch(0), new NormalMatch(1), new SpecialMatch(4), new NormalMatch(9)]); + + btpath = "AbcdefzBcdefCdefDefEfF"; + btspecials = fSC(btpath); + btpath = btpath.toLowerCase(); + + result = generateMatchList("f", btpath, btspecials.specials, 0); + expect(result).toEqual([new SpecialMatch(21)]); + + result = generateMatchList("abcdefz", btpath, btspecials.specials, 0); + expect(result).toEqual([new SpecialMatch(0), new NormalMatch(1), new NormalMatch(2), new NormalMatch(3), new NormalMatch(4), new NormalMatch(5), new NormalMatch(6)]); + + result = generateMatchList("abcdefe", btpath, btspecials.specials, 0); + expect(result).toEqual([new SpecialMatch(0), new SpecialMatch(7), new SpecialMatch(12), new SpecialMatch(16), new NormalMatch(17), new NormalMatch(18), new SpecialMatch(19)]); + }); + + }); + + describe("_computeRangesAndScore", function () { + var compute = StringMatch._computeRangesAndScore; + var SpecialMatch = StringMatch._SpecialMatch; + var NormalMatch = StringMatch._NormalMatch; + + var path = "src/document/DocumentCommandHandler.js"; + + var matchList = [new SpecialMatch(13)]; + expect(compute(matchList, path, 13)).toEqual({ + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "src/document/", matched: false, includesLastSegment: false }, + { text: "D", matched: true, includesLastSegment: true }, + { text: "ocumentCommandHandler.js", matched: false, includesLastSegment: true } + ] + }); + + matchList = [new SpecialMatch(13), new NormalMatch(14)]; + expect(compute(matchList, path, 13)).toEqual({ + matchGoodness: jasmine.any(Number), + ranges: [ + { text: "src/document/", matched: false, includesLastSegment: false }, + { text: "Do", matched: true, includesLastSegment: true }, + { text: "cumentCommandHandler.js", matched: false, includesLastSegment: true } + ] + }); + + }); + describe("_lastSegmentSearch", function () { + var SpecialMatch = StringMatch._SpecialMatch; + var NormalMatch = StringMatch._NormalMatch; + beforeEach(function () { + SpecialMatch.prototype.type = "special"; + NormalMatch.prototype.type = "normal"; + }); + + afterEach(function () { + delete SpecialMatch.prototype.type; + delete NormalMatch.prototype.type; + }); + it("should compare results in the final segment properly", function () { var path = "src/document/DocumentCommandHandler.js"; var comparePath = path.toLowerCase(); var _lastSegmentSearch = StringMatch._lastSegmentSearch; var sc = StringMatch._findSpecialCharacters(path); - expect(_lastSegmentSearch("d", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(_lastSegmentSearch("d", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "D", matched: true, includesLastSegment: true }, - { text: "ocumentCommandHandler.js", matched: false, includesLastSegment: true } + matchList: [ + new SpecialMatch(13) ] }); - expect(_lastSegmentSearch("do", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(_lastSegmentSearch("do", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "Do", matched: true, includesLastSegment: true }, - { text: "cumentCommandHandler.js", matched: false, includesLastSegment: true } + matchList: [ + new SpecialMatch(13), + new NormalMatch(14) ] }); - expect(_lastSegmentSearch("doc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(_lastSegmentSearch("doc", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "Doc", matched: true, includesLastSegment: true }, - { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } + matchList: [ + new SpecialMatch(13), + new NormalMatch(14), + new SpecialMatch(21) ] }); - expect(_lastSegmentSearch("docc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(_lastSegmentSearch("docc", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "Doc", matched: true, includesLastSegment: true }, - { text: "ument", matched: false, includesLastSegment: true }, - { text: "C", matched: true, includesLastSegment: true }, - { text: "ommandHandler.js", matched: false, includesLastSegment: true } + matchList: [ + new SpecialMatch(13), + new NormalMatch(14), + new NormalMatch(15), + new SpecialMatch(21) ] }); - expect(_lastSegmentSearch("docch", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(_lastSegmentSearch("docch", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "Doc", matched: true, includesLastSegment: true }, - { text: "ument", matched: false, includesLastSegment: true }, - { text: "C", matched: true, includesLastSegment: true }, - { text: "ommand", matched: false, includesLastSegment: true }, - { text: "H", matched: true, includesLastSegment: true }, - { text: "andler.js", matched: false, includesLastSegment: true } + matchList: [ + new SpecialMatch(13), + new NormalMatch(14), + new NormalMatch(15), + new SpecialMatch(21), + new SpecialMatch(28) ] }); - expect(_lastSegmentSearch("docch.js", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(_lastSegmentSearch("docch.js", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "Doc", matched: true, includesLastSegment: true }, - { text: "ument", matched: false, includesLastSegment: true }, - { text: "C", matched: true, includesLastSegment: true }, - { text: "ommand", matched: false, includesLastSegment: true }, - { text: "H", matched: true, includesLastSegment: true }, - { text: "andler", matched: false, includesLastSegment: true }, - { text: ".js", matched: true, includesLastSegment: true } + matchList: [ + new SpecialMatch(13), + new NormalMatch(14), + new NormalMatch(15), + new SpecialMatch(21), + new SpecialMatch(28), + new SpecialMatch(35), + new SpecialMatch(36), + new NormalMatch(37) ] }); - expect(_lastSegmentSearch("ocu", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(_lastSegmentSearch("ocu", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "D", matched: false, includesLastSegment: true }, - { text: "ocu", matched: true, includesLastSegment: true }, - { text: "mentCommandHandler.js", matched: false, includesLastSegment: true } + matchList: [ + new NormalMatch(14), + new NormalMatch(15), + new NormalMatch(16) ] }); - expect(_lastSegmentSearch("ocuha", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ + expect(_lastSegmentSearch("ocuha", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "", - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "D", matched: false, includesLastSegment: true }, - { text: "ocu", matched: true, includesLastSegment: true }, - { text: "mentCommand", matched: false, includesLastSegment: true }, - { text: "Ha", matched: true, includesLastSegment: true }, - { text: "ndler.js", matched: false, includesLastSegment: true } + matchList: [ + new NormalMatch(14), + new NormalMatch(15), + new NormalMatch(16), + new SpecialMatch(28), + new NormalMatch(29) ] }); - expect(_lastSegmentSearch("z", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); - expect(_lastSegmentSearch("ocuz", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + expect(_lastSegmentSearch("z", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + expect(_lastSegmentSearch("ocuz", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); - expect(_lastSegmentSearch("sdoc", path, comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ - matchGoodness: jasmine.any(Number), + expect(_lastSegmentSearch("sdoc", comparePath, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ remainder: "s", - ranges: [ - { text: "Doc", matched: true, includesLastSegment: true }, - { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } + matchList: [ + new SpecialMatch(13), + new NormalMatch(14), + new SpecialMatch(21) ] }); }); @@ -169,80 +292,83 @@ define(function (require, exports, module) { var comparePath = path.toLowerCase(); var _lastSegmentSearch = StringMatch._lastSegmentSearch; var sc = StringMatch._findSpecialCharacters(path); - expect(_lastSegmentSearch("ocud", path, comparePath, sc.specials, 0)).toEqual(null); + expect(_lastSegmentSearch("ocud", comparePath, sc.specials, 0)).toEqual(null); }); it("should compare matches that don't fit in just the final segment", function () { var path = "src/document/DocumentCommandHandler.js"; - var computeMatch = StringMatch._computeMatch; + + var wholeStringSearch = StringMatch._wholeStringSearch; var sc = StringMatch._findSpecialCharacters(path); - expect(computeMatch("sdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "s", matched: true, includesLastSegment: false }, - { text: "rc/document/", matched: false, includesLastSegment: false }, - { text: "Doc", matched: true, includesLastSegment: true }, - { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } - ] - }); - expect(computeMatch("doc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "src/document/", matched: false, includesLastSegment: false }, - { text: "Doc", matched: true, includesLastSegment: true }, - { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } - ] - }); - expect(computeMatch("z", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + expect(wholeStringSearch("sdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual([ + new SpecialMatch(0), + new SpecialMatch(13), + new NormalMatch(14), + new SpecialMatch(21) + ]); - expect(computeMatch("docdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "src/", matched: false, includesLastSegment: false }, - { text: "doc", matched: true, includesLastSegment: false }, - { text: "ument/", matched: false, includesLastSegment: false }, - { text: "Doc", matched: true, includesLastSegment: true }, - { text: "umentCommandHandler.js", matched: false, includesLastSegment: true } - ] - }); + expect(wholeStringSearch("doc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual([ + new SpecialMatch(13), + new NormalMatch(14), + new SpecialMatch(21) + ]); + + expect(wholeStringSearch("z", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + + expect(wholeStringSearch("docdoc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual([ + new SpecialMatch(4), + new NormalMatch(5), + new NormalMatch(6), + new SpecialMatch(13), + new NormalMatch(14), + new SpecialMatch(21) + ]); // test for a suspected bug where specials are matched out of order. - expect(computeMatch("hc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); + expect(wholeStringSearch("hc", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual(null); }); it("should handle matches that don't fit at all in the final segment", function () { var path = "src/extensions/default/QuickOpenCSS/main.js"; - var computeMatch = StringMatch._computeMatch; + + var wholeStringSearch = StringMatch._wholeStringSearch; var sc = StringMatch._findSpecialCharacters(path); - expect(computeMatch("quick", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "src/extensions/default/", matched: false, includesLastSegment: false }, - { text: "Quick", matched: true, includesLastSegment: false }, - { text: "OpenCSS/main.js", matched: false, includesLastSegment: true } - ] - }); - expect(computeMatch("quickopen", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "src/extensions/default/", matched: false, includesLastSegment: false }, - { text: "QuickOpen", matched: true, includesLastSegment: false }, - { text: "CSS/main.js", matched: false, includesLastSegment: true } - ] - }); + expect(wholeStringSearch("quick", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual([ + new SpecialMatch(23), + new NormalMatch(24), + new NormalMatch(25), + new NormalMatch(26), + new NormalMatch(27) + ]); - expect(computeMatch("quickopenain", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual({ - matchGoodness: jasmine.any(Number), - ranges: [ - { text: "src/extensions/default/", matched: false, includesLastSegment: false }, - { text: "QuickOpen", matched: true, includesLastSegment: false }, - { text: "CSS/m", matched: false, includesLastSegment: true }, - { text: "ain", matched: true, includesLastSegment: true }, - { text: ".js", matched: false, includesLastSegment: true } - ] - }); + expect(wholeStringSearch("quickopen", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual([ + new SpecialMatch(23), + new NormalMatch(24), + new NormalMatch(25), + new NormalMatch(26), + new NormalMatch(27), + new SpecialMatch(28), + new NormalMatch(29), + new NormalMatch(30), + new NormalMatch(39) + ]); + + expect(wholeStringSearch("quickopenain", path, sc.specials, sc.lastSegmentSpecialsIndex)).toEqual([ + new SpecialMatch(23), + new NormalMatch(24), + new NormalMatch(25), + new NormalMatch(26), + new NormalMatch(27), + new SpecialMatch(28), + new NormalMatch(29), + new NormalMatch(30), + new NormalMatch(31), + new NormalMatch(37), + new NormalMatch(38), + new NormalMatch(39) + ]); }); }); @@ -373,7 +499,7 @@ define(function (require, exports, module) { "src/editor/EditorUtils.js", "src/utils/ExtensionUtils.js", "src/file/FileUtils.js" - ])); + ])).toBe(true); }); it("should find the right ECH", function () { @@ -381,14 +507,21 @@ define(function (require, exports, module) { "EditorCommandHandlers", "EditorCommandHandlers-test", "SpecHelper" - ])); + ])).toBe(true); }); it("should find the right DMan", function () { expect(goodRelativeOrdering("DMan", [ "DocumentManager", "CommandManager" - ])); + ])).toBe(true); + }); + + it("should find the right sru", function () { + expect(goodRelativeOrdering("sru", [ + "test/spec/SpecRunnerUtils.js", + "test/SpecRunner.html" + ])).toBe(true); }); }); From 112b2064d8a62059ef5bd859b2325eec64cec68d Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Wed, 16 Jan 2013 15:28:07 -0500 Subject: [PATCH 11/14] mostly doc changes per review feedback. There were also a couple of minor code changes (variable renames and such) but no algorithmic changes. --- src/utils/StringMatch.js | 64 +++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/src/utils/StringMatch.js b/src/utils/StringMatch.js index 998f947178b..d1b5580b110 100644 --- a/src/utils/StringMatch.js +++ b/src/utils/StringMatch.js @@ -102,8 +102,8 @@ define(function (require, exports, module) { } // states used during the scanning of the string - var SPECIALS_COMPARE = 0; - var CONSECUTIVE_COMPARE = 1; + var SPECIALS_MATCH = 0; + var ANY_MATCH = 1; // Scores can be hard to make sense of. The DEBUG_SCORES flag // provides a way to peek into the parts that made up a score. @@ -145,6 +145,35 @@ define(function (require, exports, module) { * search will backtrack and try to find matches for the whole query earlier in the * string. * + * A contrived example will help illustrate how the searching and backtracking works. It's a bit long, + * but it illustrates different pieces of the algorithm which can be tricky. Let's say that we're + * searching the string "AzzBzzCzdzezzDgxgEF" for "abcdex". + * + * To start with, it will match "abcde" from the query to "A B C D E" in the string (the spaces + * represent gaps in the matched part of the string), because those are all "special characters". + * However, the "x" in the query doesn't match the "F" which is the only character left in the + * string. + * + * Backtracking kicks in. The "E" is pulled off of the match list. backtrackTo is set to the "g" + * before the "E" (which means that the next time we backtrack, we'll need to at least that far back. + * deadBranches[4] is set to the "g" before the "E" as well. This means that for the 5th + * query character (the "e") we know that we don't have a match beyond that point in the string. + * + * To resume searching, the backtrack function looks at the previous match (the "D") and starts + * searching in character-by-character (ANY_MATCH) mode right after that. It fails to find an + * "e" before it gets to deadBranches[4], so it has to backtrack again. + * + * This time, the "D" is pulled off the match list. backtrackTo goes back to the "z" before the "D". + * deadBranches[3] is set to the "z" before the "D", because we know that for the "dex" part of the + * query, we can't make it work past the "D". We'll resume searching with the "z" after the "C". + * + * Doing an ANY_MATCH search, we find the "d". We then start searching specials for "e", but we + * stop before we get to "E" because deadBranches[4] tells us that's a dead end. So, we switch + * to ANY_MATCH and find the "e". + * + * Finally, we search for the "x". We don't find a special that matches, so we start an ANY_MATCH + * search. Then we find the "x", and we have a successful match. + * * @param {string} query the search string (generally lower cased) * @param {string} str the string to compare with (generally lower cased) * @param {Array} specials list of special indexes in str (from findSpecialCharacters) @@ -177,7 +206,7 @@ define(function (require, exports, module) { queryCounter = 0; - var state = SPECIALS_COMPARE; + var state = SPECIALS_MATCH; // Compares the current character from the query string against the // special characters in str. Returns true if a match was found, @@ -231,7 +260,9 @@ define(function (require, exports, module) { } // we pulled off a match, which means that we need to put a character - // back into our query + // back into our query. strCounter is going to be set once we've pulled + // off the right special character and know where we're going to restart + // searching from. queryCounter--; if (item instanceof SpecialMatch) { @@ -250,7 +281,7 @@ define(function (require, exports, module) { // since we failed with the specials along this track, we're // going to reset to looking for matches consecutively. - state = CONSECUTIVE_COMPARE; + state = ANY_MATCH; // we figure out where to start looking based on the new // last item in the list. If there isn't anything else @@ -274,19 +305,19 @@ define(function (require, exports, module) { // keep looping until we've either exhausted the query or the string while (queryCounter < query.length && strCounter < str.length && strCounter <= deadBranches[queryCounter]) { - if (state === SPECIALS_COMPARE) { + if (state === SPECIALS_MATCH) { if (!findMatchingSpecial()) { - state = CONSECUTIVE_COMPARE; + state = ANY_MATCH; } } - if (state === CONSECUTIVE_COMPARE) { + if (state === ANY_MATCH) { // we look character by character for matches if (query[queryCounter] === str[strCounter]) { // got a match! record it, and switch back to searching specials queryCounter++; result.push(new NormalMatch(strCounter++)); - state = SPECIALS_COMPARE; + state = SPECIALS_MATCH; } else { // no match, keep looking strCounter++; @@ -296,7 +327,7 @@ define(function (require, exports, module) { // if we've finished the query, or we haven't finished the query but we have no // more backtracking we can do, then we're all done searching. - if ((queryCounter < query.length && !backtrack()) || queryCounter >= query.length) { + if (queryCounter >= query.length || (queryCounter < query.length && !backtrack())) { break; } } @@ -321,20 +352,19 @@ define(function (require, exports, module) { * at the beginning of the query which did not match in the last segment. * * @param {string} query the search string (generally lower cased) - * @param {string} str the original string to search - * @param {string} compareStr the string to compare with (generally lower cased) + * @param {string} str the string to compare with (generally lower cased) * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} startingSpecial index into specials array to start scanning with * @param {boolean} lastSegmentStart which character does the last segment start at - * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, remainder:string, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {{remainder:int, matchList:Array.}} matched indexes or null if no matches possible */ - function _lastSegmentSearch(query, compareStr, specials, startingSpecial, lastSegmentStart) { + function _lastSegmentSearch(query, str, specials, startingSpecial, lastSegmentStart) { var queryCounter, matchList; // It's possible that the query is longer than the last segment. // If so, we can chop off the bit that we know couldn't possibly be there. var remainder = ""; - var extraCharacters = specials[startingSpecial] + query.length - compareStr.length; + var extraCharacters = specials[startingSpecial] + query.length - str.length; if (extraCharacters > 0) { remainder = query.substring(0, extraCharacters); @@ -343,7 +373,7 @@ define(function (require, exports, module) { for (queryCounter = 0; queryCounter < query.length; queryCounter++) { matchList = _generateMatchList(query.substring(queryCounter), - compareStr, specials, startingSpecial); + str, specials, startingSpecial); // if we've got a match *or* there are no segments in this string, we're done if (matchList || startingSpecial === 0) { @@ -371,7 +401,7 @@ define(function (require, exports, module) { * @param {string} str the original string to search * @param {Array} specials list of special indexes in str (from findSpecialCharacters) * @param {int} lastSegmentSpecialsIndex index into specials array to start scanning with - * @return {{ranges:Array.<{text:string, matched:boolean, includesLastSegment:boolean}>, matchGoodness:int, scoreDebug: Object}} matched ranges and score + * @return {Array.} matched indexes or null if no matches possible */ function _wholeStringSearch(query, str, specials, lastSegmentSpecialsIndex) { // set up query as all lower case and make a lower case string to use for comparisons From bfb4fdf1ba2e02b84114aea047e68bb57de4b7dc Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Wed, 16 Jan 2013 22:14:44 -0500 Subject: [PATCH 12/14] add notes on how the matching algorithm works, from @peterflynn's comment. --- src/utils/StringMatch.js | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/utils/StringMatch.js b/src/utils/StringMatch.js index d1b5580b110..ab83ac672ff 100644 --- a/src/utils/StringMatch.js +++ b/src/utils/StringMatch.js @@ -174,6 +174,33 @@ define(function (require, exports, module) { * Finally, we search for the "x". We don't find a special that matches, so we start an ANY_MATCH * search. Then we find the "x", and we have a successful match. * + * Here are some notes on how the algorithm works: + * + * * We only backtrack() when we're exhausted both special AND normal forward searches past that point, + * for the query remainder we currently have. For a different query remainder, we may well get further + * along - hence deadBranches[] being dependent on queryCounter; but in order to get a different query + * remainder, we must give up one or more current matches by backtracking. + * + * * Normal "any char" forward search is a superset of special matching mode -- anything that would have + * been matched in special mode *could* also be matched by normal mode. In practice, however, + * any special characters that could have matched would be picked up first by the specials matching + * code. + * + * * backtrack() always goes at least as far back as str[backtrackTo-1] before allowing forward searching + * to resume + * + * * When `deadBranches[qi] = si` it means if we're still trying to match `queryStr[qi]` and we get to + * `str[si]`, there's no way we can match the remainer of `queryStr` with the remainder of `str` -- + * either using specials-only or full any-char matching. + * + * * We know this because deadBranches[] is set in backtrack(), and we don't get to backtrack() unless + * either: + * 1. We've already exhausted both special AND normal forward searches past that point + * (i.e. backtrack() due to `strCounter >= str.length`, yet `queryCounter < query.length`) + * 2. We stopped searching further forward due to a previously set deadBranches[] value + * (i.e. backtrack() due to `strCounter > deadBranches[queryCounter]`, yet + * `queryCounter < query.length`) + * * @param {string} query the search string (generally lower cased) * @param {string} str the string to compare with (generally lower cased) * @param {Array} specials list of special indexes in str (from findSpecialCharacters) From 366df47b701cef5fba35f09efb7a4c60a444ec06 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Thu, 17 Jan 2013 10:22:59 -0500 Subject: [PATCH 13/14] fix _computerangesa not matching _computeRangesAndScore. The problem was that backtrackTo was causing backtracking to go too far back for the "s", because it had already backtracked to the "r" previously (when it hit the "g" in the query). backtrackTo was the original mechanism I used in backtracking before adding deadBranches. It turns out that backtracking really needs to go back before deadBranches[queryCounter], because where we need to backtrack to depends on where we are in the query. --- src/utils/StringMatch.js | 28 +++++++++++----------------- test/spec/StringMatch-test.js | 12 ++++++++++++ 2 files changed, 23 insertions(+), 17 deletions(-) diff --git a/src/utils/StringMatch.js b/src/utils/StringMatch.js index ab83ac672ff..b830d932756 100644 --- a/src/utils/StringMatch.js +++ b/src/utils/StringMatch.js @@ -154,16 +154,15 @@ define(function (require, exports, module) { * However, the "x" in the query doesn't match the "F" which is the only character left in the * string. * - * Backtracking kicks in. The "E" is pulled off of the match list. backtrackTo is set to the "g" - * before the "E" (which means that the next time we backtrack, we'll need to at least that far back. - * deadBranches[4] is set to the "g" before the "E" as well. This means that for the 5th + * Backtracking kicks in. The "E" is pulled off of the match list. + * deadBranches[4] is set to the "g" before the "E". This means that for the 5th * query character (the "e") we know that we don't have a match beyond that point in the string. * * To resume searching, the backtrack function looks at the previous match (the "D") and starts * searching in character-by-character (ANY_MATCH) mode right after that. It fails to find an * "e" before it gets to deadBranches[4], so it has to backtrack again. * - * This time, the "D" is pulled off the match list. backtrackTo goes back to the "z" before the "D". + * This time, the "D" is pulled off the match list. * deadBranches[3] is set to the "z" before the "D", because we know that for the "dex" part of the * query, we can't make it work past the "D". We'll resume searching with the "z" after the "C". * @@ -186,12 +185,13 @@ define(function (require, exports, module) { * any special characters that could have matched would be picked up first by the specials matching * code. * - * * backtrack() always goes at least as far back as str[backtrackTo-1] before allowing forward searching - * to resume + * * backtrack() always goes at least as far back as str[deadBranches[queryCounter]-1] before allowing + * forward searching to resume * - * * When `deadBranches[qi] = si` it means if we're still trying to match `queryStr[qi]` and we get to - * `str[si]`, there's no way we can match the remainer of `queryStr` with the remainder of `str` -- - * either using specials-only or full any-char matching. + * * When `deadBranches[queryCounter] = strCounter` it means if we're still trying to match + * `queryStr[queryCounter]` and we get to `str[strCounter]`, there's no way we can match the + * remainer of `queryStr` with the remainder of `str` -- either using specials-only or + * full any-char matching. * * * We know this because deadBranches[] is set in backtrack(), and we don't get to backtrack() unless * either: @@ -266,9 +266,6 @@ define(function (require, exports, module) { return false; } - // Keeps track of how far back we need to go to keep searching. - var backtrackTo = Infinity; - // This function implements the backtracking that is done when we fail to find // a match with the query using the "search for specials first" approach. // @@ -298,13 +295,10 @@ define(function (require, exports, module) { specialsCounter--; // check to see if we've gone back as far as we need to - if (item.index < backtrackTo) { - // next time, we'll have to go back farther - backtrackTo = item.index - 1; - + if (item.index < deadBranches[queryCounter]) { // we now know that this part of the query does not match beyond this // point - deadBranches[queryCounter] = backtrackTo; + deadBranches[queryCounter] = item.index - 1; // since we failed with the specials along this track, we're // going to reset to looking for matches consecutively. diff --git a/test/spec/StringMatch-test.js b/test/spec/StringMatch-test.js index 3f5b75a14b4..9618a4dab07 100644 --- a/test/spec/StringMatch-test.js +++ b/test/spec/StringMatch-test.js @@ -141,6 +141,18 @@ define(function (require, exports, module) { result = generateMatchList("abcdefe", btpath, btspecials.specials, 0); expect(result).toEqual([new SpecialMatch(0), new SpecialMatch(7), new SpecialMatch(12), new SpecialMatch(16), new NormalMatch(17), new NormalMatch(18), new SpecialMatch(19)]); + + var str = "_computeRangesAndScore"; + var strSpecials = fSC(str); + str = str.toLowerCase(); + result = generateMatchList("_computerangesa", str, strSpecials.specials, 0); + expect(result).toEqual([ + new SpecialMatch(0), new SpecialMatch(1), new NormalMatch(2), + new NormalMatch(3), new NormalMatch(4), new NormalMatch(5), + new NormalMatch(6), new NormalMatch(7), new SpecialMatch(8), + new NormalMatch(9), new NormalMatch(10), new NormalMatch(11), + new NormalMatch(12), new NormalMatch(13), new SpecialMatch(14) + ]); }); }); From f3250309bb23ac4a4a3ef5f5998a150268fc3602 Mon Sep 17 00:00:00 2001 From: Kevin Dangoor Date: Thu, 17 Jan 2013 10:51:41 -0500 Subject: [PATCH 14/14] scoring tweak to improve results in certain cases (tests added). @peterflynn noted that "jsutil" matched "JSLintUtils.js" over "JSUtils.js". This change gives a significant boost to consecutive matches that started on a special character. I also boosted specials a little more to balance out specials vs. consecutive matches. --- src/utils/StringMatch.js | 30 +++++++++++++++++++++--------- test/spec/StringMatch-test.js | 14 ++++++++++++++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/utils/StringMatch.js b/src/utils/StringMatch.js index b830d932756..43dc931a7e1 100644 --- a/src/utils/StringMatch.js +++ b/src/utils/StringMatch.js @@ -114,12 +114,12 @@ define(function (require, exports, module) { } // Constants for scoring - var SPECIAL_POINTS = 30; + var SPECIAL_POINTS = 35; var MATCH_POINTS = 10; var LAST_SEGMENT_BOOST = 1; var BEGINNING_OF_NAME_POINTS = 25; var DEDUCTION_FOR_LENGTH = 0.2; - var CONSECUTIVE_MATCHES_POINTS = 25; + var CONSECUTIVE_MATCHES_POINTS = 10; var NOT_STARTING_ON_SPECIAL_PENALTY = 25; // Used in match lists to designate matches of "special" characters (see @@ -497,7 +497,7 @@ define(function (require, exports, module) { // get to character index c. This function is called before starting a new range // or returning from the function. function closeRangeGap(c) { - // close the current range + // Close the current range if (currentRange) { currentRange.includesLastSegment = lastMatchIndex >= lastSegmentStart; if (currentRange.matched && currentRange.includesLastSegment) { @@ -516,7 +516,7 @@ define(function (require, exports, module) { ranges.push(currentRange); } - // if there was space between the new range and the last, + // If there was space between the new range and the last, // add a new unmatched range before the new range can be added. if (lastMatchIndex + 1 < c) { ranges.push({ @@ -529,9 +529,13 @@ define(function (require, exports, module) { lastSegmentScore = 0; } - // adds a matched character to the appropriate range + // In some cases (see the use of this variable below), we accelerate the + // bonus the more consecutive matches there are. + var numConsecutive = 0; + + // Adds a matched character to the appropriate range function addMatch(match) { - // pull off the character index + // Pull off the character index var c = match.index; var newPoints = 0; @@ -542,7 +546,7 @@ define(function (require, exports, module) { } newPoints += MATCH_POINTS; - // a bonus is given for characters that match at the beginning + // A bonus is given for characters that match at the beginning // of the filename if (c === lastSegmentStart) { if (DEBUG_SCORES) { @@ -556,10 +560,18 @@ define(function (require, exports, module) { // handles the initial value of lastMatchIndex which is used for // constructing ranges but we don't yet have a true match. if (score > 0 && lastMatchIndex + 1 === c) { + // Consecutive matches that started on a special are a + // good indicator of intent, so we award an added bonus there. + if (currentRangeStartedOnSpecial) { + numConsecutive++; + } + if (DEBUG_SCORES) { - scoreDebug.consecutive += CONSECUTIVE_MATCHES_POINTS; + scoreDebug.consecutive += CONSECUTIVE_MATCHES_POINTS * numConsecutive; } - newPoints += CONSECUTIVE_MATCHES_POINTS; + newPoints += CONSECUTIVE_MATCHES_POINTS * numConsecutive; + } else { + numConsecutive = 1; } // add points for "special" character matches diff --git a/test/spec/StringMatch-test.js b/test/spec/StringMatch-test.js index 9618a4dab07..0af6fb2409a 100644 --- a/test/spec/StringMatch-test.js +++ b/test/spec/StringMatch-test.js @@ -535,6 +535,20 @@ define(function (require, exports, module) { "test/SpecRunner.html" ])).toBe(true); }); + + it("should find the right jsutil", function () { + expect(goodRelativeOrdering("jsutil", [ + "src/language/JSUtil.js", + "src/language/JSLintUtils.js" + ])).toBe(true); + }); + + it("should find the right jsu", function () { + expect(goodRelativeOrdering("jsu", [ + "src/language/JSLintUtils.js", + "src/language/JSUtil.js" + ])).toBe(true); + }); }); describe("scoring", function () {