-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Search for issues/pulls #530
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,5 +41,6 @@ coverage.out | |
/dist | ||
/custom | ||
/data | ||
/indexers | ||
/log | ||
/public/img/avatar |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
// Copyright 2017 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package models | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"strconv" | ||
"strings" | ||
|
||
"code.gitea.io/gitea/modules/log" | ||
"code.gitea.io/gitea/modules/setting" | ||
"code.gitea.io/gitea/modules/util" | ||
"github.com/blevesearch/bleve" | ||
"github.com/blevesearch/bleve/analysis/analyzer/simple" | ||
"github.com/blevesearch/bleve/search/query" | ||
) | ||
|
||
// issueIndexerUpdateQueue queue of issues that need to be updated in the issues | ||
// indexer | ||
var issueIndexerUpdateQueue chan *Issue | ||
|
||
// issueIndexer (thread-safe) index for searching issues | ||
var issueIndexer bleve.Index | ||
|
||
// issueIndexerData data stored in the issue indexer | ||
type issueIndexerData struct { | ||
ID int64 | ||
RepoID int64 | ||
|
||
Title string | ||
Content string | ||
} | ||
|
||
// numericQuery an numeric-equality query for the given value and field | ||
func numericQuery(value int64, field string) *query.NumericRangeQuery { | ||
f := float64(value) | ||
tru := true | ||
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru) | ||
q.SetField(field) | ||
return q | ||
} | ||
|
||
// SearchIssuesByKeyword searches for issues by given conditions. | ||
// Returns the matching issue IDs | ||
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) { | ||
fields := strings.Fields(strings.ToLower(keyword)) | ||
indexerQuery := bleve.NewConjunctionQuery( | ||
numericQuery(repoID, "RepoID"), | ||
bleve.NewDisjunctionQuery( | ||
bleve.NewPhraseQuery(fields, "Title"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At least we should have a Match Phrase Query, if not a Match Query or best a Query String Query. For both fields "Title" and "Content". |
||
bleve.NewPhraseQuery(fields, "Content"), | ||
)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Comments should be indexed, too, and queried here. |
||
search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false) | ||
search.Fields = []string{"ID"} | ||
|
||
result, err := issueIndexer.Search(search) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
issueIDs := make([]int64, len(result.Hits)) | ||
for i, hit := range result.Hits { | ||
issueIDs[i] = int64(hit.Fields["ID"].(float64)) | ||
} | ||
return issueIDs, nil | ||
} | ||
|
||
// InitIssueIndexer initialize issue indexer | ||
func InitIssueIndexer() { | ||
_, err := os.Stat(setting.Indexer.IssuePath) | ||
if err != nil { | ||
if os.IsNotExist(err) { | ||
if err = createIssueIndexer(); err != nil { | ||
log.Fatal(4, "CreateIssuesIndexer: %v", err) | ||
} | ||
if err = populateIssueIndexer(); err != nil { | ||
log.Fatal(4, "PopulateIssuesIndex: %v", err) | ||
} | ||
} else { | ||
log.Fatal(4, "InitIssuesIndexer: %v", err) | ||
} | ||
} else { | ||
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath) | ||
if err != nil { | ||
log.Fatal(4, "InitIssuesIndexer, open index: %v", err) | ||
} | ||
} | ||
issueIndexerUpdateQueue = make(chan *Issue, setting.Indexer.UpdateQueueLength) | ||
go processIssueIndexerUpdateQueue() | ||
// TODO close issueIndexer when Gitea closes | ||
} | ||
|
||
// createIssueIndexer create an issue indexer if one does not already exist | ||
func createIssueIndexer() error { | ||
mapping := bleve.NewIndexMapping() | ||
docMapping := bleve.NewDocumentMapping() | ||
|
||
docMapping.AddFieldMappingsAt("ID", bleve.NewNumericFieldMapping()) | ||
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping()) | ||
|
||
textFieldMapping := bleve.NewTextFieldMapping() | ||
textFieldMapping.Analyzer = simple.Name | ||
docMapping.AddFieldMappingsAt("Title", textFieldMapping) | ||
docMapping.AddFieldMappingsAt("Content", textFieldMapping) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Issue comments should also be indexed here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. More token filters should be added (http://www.blevesearch.com/docs/Token-Filters/), to allow partial matches. My suggestion:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @sha-red exactly. Would you have time to send some PRs to help improve it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lunny Basically I'd love to help with actual code, with pleasure, but because I'd have to set up a whole Go development environment and lot's of other work to do, this will take some time to get everything running. Please also have a look at our minimalistic issue search path for gogs: gogs/gogs#4015 IMHO whole phrase search is better than nothing, but at least comment indexing should be added before releasing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can improve the search in v1.2, comment index should be added in that release cycle and of course the UI should be changed for keyword highlight. |
||
mapping.AddDocumentMapping("issues", docMapping) | ||
|
||
var err error | ||
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping) | ||
return err | ||
} | ||
|
||
// populateIssueIndexer populate the issue indexer with issue data | ||
func populateIssueIndexer() error { | ||
for page := 1; ; page++ { | ||
repos, err := Repositories(&SearchRepoOptions{ | ||
Page: page, | ||
PageSize: 10, | ||
}) | ||
if err != nil { | ||
return fmt.Errorf("Repositories: %v", err) | ||
} | ||
if len(repos) == 0 { | ||
return nil | ||
} | ||
batch := issueIndexer.NewBatch() | ||
for _, repo := range repos { | ||
issues, err := Issues(&IssuesOptions{ | ||
RepoID: repo.ID, | ||
IsClosed: util.OptionalBoolNone, | ||
IsPull: util.OptionalBoolNone, | ||
Page: -1, // do not page | ||
}) | ||
if err != nil { | ||
return fmt.Errorf("Issues: %v", err) | ||
} | ||
for _, issue := range issues { | ||
err = batch.Index(issue.indexUID(), issue.issueData()) | ||
if err != nil { | ||
return fmt.Errorf("batch.Index: %v", err) | ||
} | ||
} | ||
} | ||
if err = issueIndexer.Batch(batch); err != nil { | ||
return fmt.Errorf("index.Batch: %v", err) | ||
} | ||
} | ||
} | ||
|
||
func processIssueIndexerUpdateQueue() { | ||
for { | ||
select { | ||
case issue := <-issueIndexerUpdateQueue: | ||
if err := issueIndexer.Index(issue.indexUID(), issue.issueData()); err != nil { | ||
log.Error(4, "issuesIndexer.Index: %v", err) | ||
} | ||
} | ||
} | ||
} | ||
|
||
// indexUID a unique identifier for an issue used in full-text indices | ||
func (issue *Issue) indexUID() string { | ||
return strconv.FormatInt(issue.ID, 36) | ||
} | ||
|
||
func (issue *Issue) issueData() *issueIndexerData { | ||
return &issueIndexerData{ | ||
ID: issue.ID, | ||
RepoID: issue.RepoID, | ||
Title: issue.Title, | ||
Content: issue.Content, | ||
} | ||
} | ||
|
||
// UpdateIssueIndexer add/update an issue to the issue indexer | ||
func UpdateIssueIndexer(issue *Issue) { | ||
go func() { | ||
issueIndexerUpdateQueue <- issue | ||
}() | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"fields" should actually be "terms", API: NewPhraseQuery(terms []string, field string)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variable name has since been updated to
terms
(#1031)