Skip to content

Commit

Permalink
internal/lsp/fuzzy: add fuzzy matching library
Browse files Browse the repository at this point in the history
This change uses a fuzzy matching library to score completion results.

Updates golang/go#32754

Change-Id: Ia7771b33534de393a865443e05c0fcbf1e9a969b
Reviewed-on: https://go-review.googlesource.com/c/tools/+/184441
Run-TryBot: Rebecca Stambler <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Ian Cottrell <[email protected]>
  • Loading branch information
stamblerre committed Jul 3, 2019
1 parent 719fbf7 commit 2214986
Show file tree
Hide file tree
Showing 6 changed files with 1,169 additions and 4 deletions.
4 changes: 2 additions & 2 deletions internal/lsp/completion.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func (s *Server) completion(ctx context.Context, params *protocol.CompletionPara
if err != nil {
return nil, err
}
items, surrounding, err := source.Completion(ctx, view, f, rng.Start, source.CompletionOptions{
candidates, surrounding, err := source.Completion(ctx, view, f, rng.Start, source.CompletionOptions{
DeepComplete: s.useDeepCompletions,
})
if err != nil {
Expand Down Expand Up @@ -58,7 +58,7 @@ func (s *Server) completion(ctx context.Context, params *protocol.CompletionPara
}
return &protocol.CompletionList{
IsIncomplete: false,
Items: toProtocolCompletionItems(items, prefix, insertionRng, s.insertTextFormat, s.usePlaceholders, s.useDeepCompletions),
Items: toProtocolCompletionItems(candidates, prefix, insertionRng, s.insertTextFormat, s.usePlaceholders, s.useDeepCompletions),
}, nil
}

Expand Down
185 changes: 185 additions & 0 deletions internal/lsp/fuzzy/input.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package fuzzy

import (
"unicode"
)

// Input specifies the type of the input. This influences how the runes are interpreted wrt to
// segmenting the input.
type Input int

const (
// Text represents a text input type. Input is not segmented.
Text Input = iota
// Filename represents a filepath input type with '/' segment delimitors.
Filename
// Symbol represents a symbol input type with '.' and ':' segment delimitors.
Symbol
)

// RuneRole specifies the role of a rune in the context of an input.
type RuneRole byte

const (
// RNone specifies a rune without any role in the input (i.e., whitespace/non-ASCII).
RNone RuneRole = iota
// RSep specifies a rune with the role of segment separator.
RSep
// RTail specifies a rune which is a lower-case tail in a word in the input.
RTail
// RUCTail specifies a rune which is an upper-case tail in a word in the input.
RUCTail
// RHead specifies a rune which is the first character in a word in the input.
RHead
)

// RuneRoles detects the roles of each byte rune in an input string and stores it in the output
// slice. The rune role depends on the input type. Stops when it parsed all the runes in the string
// or when it filled the output. If output is nil, then it gets created.
func RuneRoles(str string, input Input, reuse []RuneRole) []RuneRole {
var output []RuneRole
if cap(reuse) < len(str) {
output = make([]RuneRole, 0, len(str))
} else {
output = reuse[:0]
}

prev, prev2 := rtNone, rtNone
for i := 0; i < len(str); i++ {
r := rune(str[i])

role := RNone

curr := rtLower
if str[i] <= unicode.MaxASCII {
curr = runeType(rt[str[i]] - '0')
}

if curr == rtLower {
if prev == rtNone || prev == rtPunct {
role = RHead
} else {
role = RTail
}
} else if curr == rtUpper {
role = RHead

if prev == rtUpper {
// This and previous characters are both upper case.

if i+1 == len(str) {
// This is last character, previous was also uppercase -> this is UCTail
// i.e., (current char is C): aBC / BC / ABC
role = RUCTail
}
}
} else if curr == rtPunct {
switch {
case input == Filename && r == '/':
role = RSep
case input == Symbol && r == '.':
role = RSep
case input == Symbol && r == ':':
role = RSep
}
}
if curr != rtLower {
if i > 1 && output[i-1] == RHead && prev2 == rtUpper && (output[i-2] == RHead || output[i-2] == RUCTail) {
// The previous two characters were uppercase. The current one is not a lower case, so the
// previous one can't be a HEAD. Make it a UCTail.
// i.e., (last char is current char - B must be a UCTail): ABC / ZABC / AB.
output[i-1] = RUCTail
}
}

output = append(output, role)
prev2 = prev
prev = curr
}
return output
}

type runeType byte

const (
rtNone runeType = iota
rtPunct
rtLower
rtUpper
)

const rt = "00000000000000000000000000000000000000000000001122222222221000000333333333333333333333333330000002222222222222222222222222200000"

// LastSegment returns the substring representing the last segment from the input, where each
// byte has an associated RuneRole in the roles slice. This makes sense only for inputs of Symbol
// or Filename type.
func LastSegment(input string, roles []RuneRole) string {
// Exclude ending separators.
end := len(input) - 1
for end >= 0 && roles[end] == RSep {
end--
}
if end < 0 {
return ""
}

start := end - 1
for start >= 0 && roles[start] != RSep {
start--
}

return input[start+1 : end+1]
}

// ToLower transforms the input string to lower case, which is stored in the output byte slice.
// The lower casing considers only ASCII values - non ASCII values are left unmodified.
// Stops when parsed all input or when it filled the output slice. If output is nil, then it gets
// created.
func ToLower(input string, reuse []byte) []byte {
output := reuse
if cap(reuse) < len(input) {
output = make([]byte, len(input))
}

for i := 0; i < len(input); i++ {
r := rune(input[i])
if r <= unicode.MaxASCII {
if 'A' <= r && r <= 'Z' {
r += 'a' - 'A'
}
}
output[i] = byte(r)
}
return output[:len(input)]
}

// WordConsumer defines a consumer for a word delimited by the [start,end) byte offsets in an input
// (start is inclusive, end is exclusive).
type WordConsumer func(start, end int)

// Words find word delimiters in an input based on its bytes' mappings to rune roles. The offset
// delimiters for each word are fed to the provided consumer function.
func Words(roles []RuneRole, consume WordConsumer) {
var wordStart int
for i, r := range roles {
switch r {
case RUCTail, RTail:
case RHead, RNone, RSep:
if i != wordStart {
consume(wordStart, i)
}
wordStart = i
if r != RHead {
// Skip this character.
wordStart = i + 1
}
}
}
if wordStart != len(roles) {
consume(wordStart, len(roles))
}
}
186 changes: 186 additions & 0 deletions internal/lsp/fuzzy/input_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package fuzzy_test

import (
"bytes"
"sort"
"testing"

"golang.org/x/tools/internal/lsp/fuzzy"
)

var rolesTests = []struct {
str string
input fuzzy.Input
want string
}{
{str: "abc", want: "Ccc", input: fuzzy.Text},
{str: ".abc", want: " Ccc", input: fuzzy.Text},
{str: "abc def", want: "Ccc Ccc", input: fuzzy.Text},
{str: "SWT MyID", want: "Cuu CcCu", input: fuzzy.Text},
{str: "ID", want: "Cu", input: fuzzy.Text},
{str: "IDD", want: "Cuu", input: fuzzy.Text},
{str: " ID ", want: " Cu ", input: fuzzy.Text},
{str: "IDSome", want: "CuCccc", input: fuzzy.Text},
{str: "0123456789", want: "Cccccccccc", input: fuzzy.Text},
{str: "abcdefghigklmnopqrstuvwxyz", want: "Cccccccccccccccccccccccccc", input: fuzzy.Text},
{str: "ABCDEFGHIGKLMNOPQRSTUVWXYZ", want: "Cuuuuuuuuuuuuuuuuuuuuuuuuu", input: fuzzy.Text},
{str: "こんにちは", want: "Ccccccccccccccc", input: fuzzy.Text}, // We don't parse unicode
{str: ":/.", want: " ", input: fuzzy.Text},

// Filenames
{str: "abc/def", want: "Ccc/Ccc", input: fuzzy.Filename},
{str: " abc_def", want: " Ccc Ccc", input: fuzzy.Filename},
{str: " abc_DDf", want: " Ccc CCc", input: fuzzy.Filename},
{str: ":.", want: " ", input: fuzzy.Filename},

// Symbols
{str: "abc::def::goo", want: "Ccc//Ccc//Ccc", input: fuzzy.Symbol},
{str: "proto::Message", want: "Ccccc//Ccccccc", input: fuzzy.Symbol},
{str: "AbstractSWTFactory", want: "CcccccccCuuCcccccc", input: fuzzy.Symbol},
{str: "Abs012", want: "Cccccc", input: fuzzy.Symbol},
{str: "/", want: " ", input: fuzzy.Symbol},
{str: "fOO", want: "CCu", input: fuzzy.Symbol},
{str: "fo_oo.o_oo", want: "Cc Cc/C Cc", input: fuzzy.Symbol},
}

func rolesString(roles []fuzzy.RuneRole) string {
var buf bytes.Buffer
for _, r := range roles {
buf.WriteByte(" /cuC"[int(r)])
}
return buf.String()
}

func TestRoles(t *testing.T) {
for _, tc := range rolesTests {
gotRoles := make([]fuzzy.RuneRole, len(tc.str))
fuzzy.RuneRoles(tc.str, tc.input, gotRoles)
got := rolesString(gotRoles)
if got != tc.want {
t.Errorf("roles(%s) = %v; want %v", tc.str, got, tc.want)
}
}
}

func words(strWords ...string) [][]byte {
var ret [][]byte
for _, w := range strWords {
ret = append(ret, []byte(w))
}
return ret
}

var wordSplitTests = []struct {
input string
want []string
}{
{
input: "foo bar baz",
want: []string{"foo", "bar", "baz"},
},
{
input: "fooBarBaz",
want: []string{"foo", "Bar", "Baz"},
},
{
input: "FOOBarBAZ",
want: []string{"FOO", "Bar", "BAZ"},
},
{
input: "foo123_bar2Baz3",
want: []string{"foo123", "bar2", "Baz3"},
},
}

func TestWordSplit(t *testing.T) {
for _, tc := range wordSplitTests {
roles := fuzzy.RuneRoles(tc.input, fuzzy.Symbol, nil)

var got []string
consumer := func(i, j int) {
got = append(got, tc.input[i:j])
}
fuzzy.Words(roles, consumer)

if eq := diffStringLists(tc.want, got); !eq {
t.Errorf("input %v: (want %v -> got %v)", tc.input, tc.want, got)
}
}
}

func diffStringLists(a, b []string) bool {
if len(a) != len(b) {
return false
}
sort.Strings(a)
sort.Strings(b)
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}

var lastSegmentSplitTests = []struct {
str string
input fuzzy.Input
want string
}{
{
str: "identifier",
input: fuzzy.Symbol,
want: "identifier",
},
{
str: "two_words",
input: fuzzy.Symbol,
want: "two_words",
},
{
str: "first::second",
input: fuzzy.Symbol,
want: "second",
},
{
str: "foo.bar.FOOBar_buz123_test",
input: fuzzy.Symbol,
want: "FOOBar_buz123_test",
},
{
str: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go",
input: fuzzy.Filename,
want: "fuzzy_matcher.go",
},
{
str: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go",
input: fuzzy.Text,
want: "golang.org/x/tools/internal/lsp/fuzzy_matcher.go",
},
}

func TestLastSegment(t *testing.T) {
for _, tc := range lastSegmentSplitTests {
roles := fuzzy.RuneRoles(tc.str, tc.input, nil)

got := fuzzy.LastSegment(tc.str, roles)

if got != tc.want {
t.Errorf("str %v: want %v; got %v", tc.str, tc.want, got)
}
}
}

func BenchmarkRoles(b *testing.B) {
str := "AbstractSWTFactory"
out := make([]fuzzy.RuneRole, len(str))

for i := 0; i < b.N; i++ {
fuzzy.RuneRoles(str, fuzzy.Symbol, out)
}
b.SetBytes(int64(len(str)))
}
Loading

0 comments on commit 2214986

Please sign in to comment.