forked from golang/tools
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
internal/lsp/fuzzy: add fuzzy matching library
This change uses a fuzzy matching library to score completion results. Updates golang/go#32754 Change-Id: Ia7771b33534de393a865443e05c0fcbf1e9a969b Reviewed-on: https://go-review.googlesource.com/c/tools/+/184441 Run-TryBot: Rebecca Stambler <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Ian Cottrell <[email protected]>
- Loading branch information
1 parent
719fbf7
commit 2214986
Showing
6 changed files
with
1,169 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
// Copyright 2019 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package fuzzy | ||
|
||
import ( | ||
"unicode" | ||
) | ||
|
||
// Input specifies the type of the input. This influences how the runes are interpreted wrt to | ||
// segmenting the input. | ||
type Input int | ||
|
||
const ( | ||
// Text represents a text input type. Input is not segmented. | ||
Text Input = iota | ||
// Filename represents a filepath input type with '/' segment delimitors. | ||
Filename | ||
// Symbol represents a symbol input type with '.' and ':' segment delimitors. | ||
Symbol | ||
) | ||
|
||
// RuneRole specifies the role of a rune in the context of an input. | ||
type RuneRole byte | ||
|
||
const ( | ||
// RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII). | ||
RNone RuneRole = iota | ||
// RSep specifies a rune with the role of segment separator. | ||
RSep | ||
// RTail specifies a rune which is a lower-case tail in a word in the input. | ||
RTail | ||
// RUCTail specifies a rune which is an upper-case tail in a word in the input. | ||
RUCTail | ||
// RHead specifies a rune which is the first character in a word in the input. | ||
RHead | ||
) | ||
|
||
// RuneRoles detects the roles of each byte rune in an input string and stores it in the output | ||
// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string | ||
// or when it filled the output. If output is nil, then it gets created. | ||
func RuneRoles(str string, input Input, reuse []RuneRole) []RuneRole { | ||
var output []RuneRole | ||
if cap(reuse) < len(str) { | ||
output = make([]RuneRole, 0, len(str)) | ||
} else { | ||
output = reuse[:0] | ||
} | ||
|
||
prev, prev2 := rtNone, rtNone | ||
for i := 0; i < len(str); i++ { | ||
r := rune(str[i]) | ||
|
||
role := RNone | ||
|
||
curr := rtLower | ||
if str[i] <= unicode.MaxASCII { | ||
curr = runeType(rt[str[i]] - '0') | ||
} | ||
|
||
if curr == rtLower { | ||
if prev == rtNone || prev == rtPunct { | ||
role = RHead | ||
} else { | ||
role = RTail | ||
} | ||
} else if curr == rtUpper { | ||
role = RHead | ||
|
||
if prev == rtUpper { | ||
// This and previous characters are both upper case. | ||
|
||
if i+1 == len(str) { | ||
// This is last character, previous was also uppercase -> this is UCTail | ||
// i.e., (current char is C): aBC / BC / ABC | ||
role = RUCTail | ||
} | ||
} | ||
} else if curr == rtPunct { | ||
switch { | ||
case input == Filename && r == '/': | ||
role = RSep | ||
case input == Symbol && r == '.': | ||
role = RSep | ||
case input == Symbol && r == ':': | ||
role = RSep | ||
} | ||
} | ||
if curr != rtLower { | ||
if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead || output[i-2] == RUCTail) { | ||
// The previous two characters were uppercase. The current one is not a lower case, so the | ||
// previous one can't be a HEAD. Make it a UCTail. | ||
// i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB. | ||
output[i-1] = RUCTail | ||
} | ||
} | ||
|
||
output = append(output, role) | ||
prev2 = prev | ||
prev = curr | ||
} | ||
return output | ||
} | ||
|
||
type runeType byte | ||
|
||
const ( | ||
rtNone runeType = iota | ||
rtPunct | ||
rtLower | ||
rtUpper | ||
) | ||
|
||
const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000" | ||
|
||
// LastSegment returns the substring representing the last segment from the input, where each | ||
// byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol | ||
// or Filename type. | ||
func LastSegment(input string, roles []RuneRole) string { | ||
// Exclude ending separators. | ||
end := len(input) - 1 | ||
for end >= 0 && roles[end] == RSep { | ||
end-- | ||
} | ||
if end < 0 { | ||
return "" | ||
} | ||
|
||
start := end - 1 | ||
for start >= 0 && roles[start] != RSep { | ||
start-- | ||
} | ||
|
||
return input[start+1 : end+1] | ||
} | ||
|
||
// ToLower transforms the input string to lower case, which is stored in the output byte slice. | ||
// The lower casing considers only ASCII values - non ASCII values are left unmodified. | ||
// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets | ||
// created. | ||
func ToLower(input string, reuse []byte) []byte { | ||
output := reuse | ||
if cap(reuse) < len(input) { | ||
output = make([]byte, len(input)) | ||
} | ||
|
||
for i := 0; i < len(input); i++ { | ||
r := rune(input[i]) | ||
if r <= unicode.MaxASCII { | ||
if 'A' <= r && r <= 'Z' { | ||
r += 'a' - 'A' | ||
} | ||
} | ||
output[i] = byte(r) | ||
} | ||
return output[:len(input)] | ||
} | ||
|
||
// WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input | ||
// (start is inclusive, end is exclusive). | ||
type WordConsumer func(start, end int) | ||
|
||
// Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset | ||
// delimiters for each word are fed to the provided consumer function. | ||
func Words(roles []RuneRole, consume WordConsumer) { | ||
var wordStart int | ||
for i, r := range roles { | ||
switch r { | ||
case RUCTail, RTail: | ||
case RHead, RNone, RSep: | ||
if i != wordStart { | ||
consume(wordStart, i) | ||
} | ||
wordStart = i | ||
if r != RHead { | ||
// Skip this character. | ||
wordStart = i + 1 | ||
} | ||
} | ||
} | ||
if wordStart != len(roles) { | ||
consume(wordStart, len(roles)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
// Copyright 2019 The Go Authors. All rights reserved. | ||
// Use of this source code is governed by a BSD-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package fuzzy_test | ||
|
||
import ( | ||
"bytes" | ||
"sort" | ||
"testing" | ||
|
||
"golang.org/x/tools/internal/lsp/fuzzy" | ||
) | ||
|
||
var rolesTests = []struct { | ||
str string | ||
input fuzzy.Input | ||
want string | ||
}{ | ||
{str: "abc", want: "Ccc", input: fuzzy.Text}, | ||
{str: ".abc", want: " Ccc", input: fuzzy.Text}, | ||
{str: "abc def", want: "Ccc Ccc", input: fuzzy.Text}, | ||
{str: "SWT MyID", want: "Cuu CcCu", input: fuzzy.Text}, | ||
{str: "ID", want: "Cu", input: fuzzy.Text}, | ||
{str: "IDD", want: "Cuu", input: fuzzy.Text}, | ||
{str: " ID ", want: " Cu ", input: fuzzy.Text}, | ||
{str: "IDSome", want: "CuCccc", input: fuzzy.Text}, | ||
{str: "0123456789", want: "Cccccccccc", input: fuzzy.Text}, | ||
{str: "abcdefghigklmnopqrstuvwxyz", want: "Cccccccccccccccccccccccccc", input: fuzzy.Text}, | ||
{str: "ABCDEFGHIGKLMNOPQRSTUVWXYZ", want: "Cuuuuuuuuuuuuuuuuuuuuuuuuu", input: fuzzy.Text}, | ||
{str: "こんにちは", want: "Ccccccccccccccc", input: fuzzy.Text}, // We don't parse unicode | ||
{str: ":/.", want: " ", input: fuzzy.Text}, | ||
|
||
// Filenames | ||
{str: "abc/def", want: "Ccc/Ccc", input: fuzzy.Filename}, | ||
{str: " abc_def", want: " Ccc Ccc", input: fuzzy.Filename}, | ||
{str: " abc_DDf", want: " Ccc CCc", input: fuzzy.Filename}, | ||
{str: ":.", want: " ", input: fuzzy.Filename}, | ||
|
||
// Symbols | ||
{str: "abc::def::goo", want: "Ccc//Ccc//Ccc", input: fuzzy.Symbol}, | ||
{str: "proto::Message", want: "Ccccc//Ccccccc", input: fuzzy.Symbol}, | ||
{str: "AbstractSWTFactory", want: "CcccccccCuuCcccccc", input: fuzzy.Symbol}, | ||
{str: "Abs012", want: "Cccccc", input: fuzzy.Symbol}, | ||
{str: "/", want: " ", input: fuzzy.Symbol}, | ||
{str: "fOO", want: "CCu", input: fuzzy.Symbol}, | ||
{str: "fo_oo.o_oo", want: "Cc Cc/C Cc", input: fuzzy.Symbol}, | ||
} | ||
|
||
func rolesString(roles []fuzzy.RuneRole) string { | ||
var buf bytes.Buffer | ||
for _, r := range roles { | ||
buf.WriteByte(" /cuC"[int(r)]) | ||
} | ||
return buf.String() | ||
} | ||
|
||
func TestRoles(t *testing.T) { | ||
for _, tc := range rolesTests { | ||
gotRoles := make([]fuzzy.RuneRole, len(tc.str)) | ||
fuzzy.RuneRoles(tc.str, tc.input, gotRoles) | ||
got := rolesString(gotRoles) | ||
if got != tc.want { | ||
t.Errorf("roles(%s) = %v; want %v", tc.str, got, tc.want) | ||
} | ||
} | ||
} | ||
|
||
func words(strWords ...string) [][]byte { | ||
var ret [][]byte | ||
for _, w := range strWords { | ||
ret = append(ret, []byte(w)) | ||
} | ||
return ret | ||
} | ||
|
||
var wordSplitTests = []struct { | ||
input string | ||
want []string | ||
}{ | ||
{ | ||
input: "foo bar baz", | ||
want: []string{"foo", "bar", "baz"}, | ||
}, | ||
{ | ||
input: "fooBarBaz", | ||
want: []string{"foo", "Bar", "Baz"}, | ||
}, | ||
{ | ||
input: "FOOBarBAZ", | ||
want: []string{"FOO", "Bar", "BAZ"}, | ||
}, | ||
{ | ||
input: "foo123_bar2Baz3", | ||
want: []string{"foo123", "bar2", "Baz3"}, | ||
}, | ||
} | ||
|
||
func TestWordSplit(t *testing.T) { | ||
for _, tc := range wordSplitTests { | ||
roles := fuzzy.RuneRoles(tc.input, fuzzy.Symbol, nil) | ||
|
||
var got []string | ||
consumer := func(i, j int) { | ||
got = append(got, tc.input[i:j]) | ||
} | ||
fuzzy.Words(roles, consumer) | ||
|
||
if eq := diffStringLists(tc.want, got); !eq { | ||
t.Errorf("input %v: (want %v -> got %v)", tc.input, tc.want, got) | ||
} | ||
} | ||
} | ||
|
||
func diffStringLists(a, b []string) bool { | ||
if len(a) != len(b) { | ||
return false | ||
} | ||
sort.Strings(a) | ||
sort.Strings(b) | ||
for i := range a { | ||
if a[i] != b[i] { | ||
return false | ||
} | ||
} | ||
return true | ||
} | ||
|
||
var lastSegmentSplitTests = []struct { | ||
str string | ||
input fuzzy.Input | ||
want string | ||
}{ | ||
{ | ||
str: "identifier", | ||
input: fuzzy.Symbol, | ||
want: "identifier", | ||
}, | ||
{ | ||
str: "two_words", | ||
input: fuzzy.Symbol, | ||
want: "two_words", | ||
}, | ||
{ | ||
str: "first::second", | ||
input: fuzzy.Symbol, | ||
want: "second", | ||
}, | ||
{ | ||
str: "foo.bar.FOOBar_buz123_test", | ||
input: fuzzy.Symbol, | ||
want: "FOOBar_buz123_test", | ||
}, | ||
{ | ||
str: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go", | ||
input: fuzzy.Filename, | ||
want: "fuzzy_matcher.go", | ||
}, | ||
{ | ||
str: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go", | ||
input: fuzzy.Text, | ||
want: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go", | ||
}, | ||
} | ||
|
||
func TestLastSegment(t *testing.T) { | ||
for _, tc := range lastSegmentSplitTests { | ||
roles := fuzzy.RuneRoles(tc.str, tc.input, nil) | ||
|
||
got := fuzzy.LastSegment(tc.str, roles) | ||
|
||
if got != tc.want { | ||
t.Errorf("str %v: want %v; got %v", tc.str, tc.want, got) | ||
} | ||
} | ||
} | ||
|
||
func BenchmarkRoles(b *testing.B) { | ||
str := "AbstractSWTFactory" | ||
out := make([]fuzzy.RuneRole, len(str)) | ||
|
||
for i := 0; i < b.N; i++ { | ||
fuzzy.RuneRoles(str, fuzzy.Symbol, out) | ||
} | ||
b.SetBytes(int64(len(str))) | ||
} |
Oops, something went wrong.