From 662cbc1fbdc55fd51c8331567e80e8188cde8d20 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Mon, 7 Jun 2021 20:58:27 +0100 Subject: [PATCH 1/7] More efficiently parse shas for shaPostProcessor The shaPostProcessor currently repeatedly calls git rev-parse --verify on both backends which is fine if there is only one thing that matches a sha - however if there are multiple things then this becomes wildly inefficient. This PR provides functions for both backends which are much faster to use. Fix #16092 Signed-off-by: Andrew Thornton --- modules/git/repo_branch_gogit.go | 24 ++++++++++++++++++++++++ modules/git/repo_branch_nogogit.go | 18 ++++++++++++++++++ modules/markup/html.go | 18 ++++++++++++++---- modules/markup/renderer.go | 27 +++++++++++++++++++++++++++ routers/org/home.go | 1 + routers/repo/issue.go | 5 +++++ routers/repo/milestone.go | 2 ++ routers/repo/projects.go | 2 ++ routers/repo/release.go | 2 ++ routers/repo/view.go | 3 +++ routers/user/home.go | 1 + routers/user/profile.go | 1 + 12 files changed, 100 insertions(+), 4 deletions(-) diff --git a/modules/git/repo_branch_gogit.go b/modules/git/repo_branch_gogit.go index b00253f6ffd66..b5ac079f3ebe0 100644 --- a/modules/git/repo_branch_gogit.go +++ b/modules/git/repo_branch_gogit.go @@ -13,6 +13,30 @@ import ( "github.com/go-git/go-git/v5/plumbing" ) +// IsReferenceExist returns true if given reference exists in the repository. +func (repo *Repository) IsObjectExist(name string) bool { + if name == "" { + return false + } + + _, err := repo.gogitRepo.ResolveRevision(plumbing.Revision(name)) + + return err == nil +} + +// IsReferenceExist returns true if given reference exists in the repository. +func (repo *Repository) IsReferenceExist(name string) bool { + if name == "" { + return false + } + + reference, err := repo.gogitRepo.Reference(plumbing.ReferenceName(name), true) + if err != nil { + return false + } + return reference.Type() != plumbing.InvalidReference +} + // IsBranchExist returns true if given branch exists in current repository. func (repo *Repository) IsBranchExist(name string) bool { if name == "" { diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 13ddcf06cf65e..8c93b54ab8bbb 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -9,10 +9,28 @@ package git import ( "bufio" + "bytes" "io" "strings" ) +// IsReferenceExist returns true if given reference exists in the repository. +func (repo *Repository) IsObjectExist(name string) bool { + if name == "" { + return false + } + + wr, rd, cancel := repo.CatFileBatchCheck() + defer cancel() + _, err := wr.Write([]byte(name + "\n")) + if err != nil { + log("Error writing to CatFileBatchCheck %v", err) + return false + } + sha, _, _, err := ReadBatchLine(rd) + return err == nil && bytes.HasPrefix(sha, []byte(strings.TrimSpace(name))) +} + // IsReferenceExist returns true if given reference exists in the repository. func (repo *Repository) IsReferenceExist(name string) bool { if name == "" { diff --git a/modules/markup/html.go b/modules/markup/html.go index e5e622068d1b8..f8c4a8f5cca4d 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -285,6 +285,7 @@ var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM var nulCleaner = strings.NewReplacer("\000", "") func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error { + defer ctx.Cancel() // FIXME: don't read all content to memory rawHTML, err := ioutil.ReadAll(input) if err != nil { @@ -947,6 +948,17 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { return } hash := node.Data[m[2]:m[3]] + + if ctx.GitRepo == nil { + var err error + ctx.GitRepo, err = git.OpenRepository(ctx.Metas["repo"]) + if err != nil { + log.Error("unable to open repository: %s Error: %v", ctx.Metas["repo"], err) + return + } + ctx.AddCancel(ctx.GitRepo.Close) + } + // The regex does not lie, it matches the hash pattern. // However, a regex cannot know if a hash actually exists or not. // We could assume that a SHA1 hash should probably contain alphas AND numerics @@ -955,10 +967,8 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { // as used by git and github for linking and thus we have to do similar. // Because of this, we check to make sure that a matched hash is actually // a commit in the repository before making it a link. - if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.Metas["repoPath"]); err != nil { - if !strings.Contains(err.Error(), "fatal: Needed a single revision") { - log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err) - } + + if !ctx.GitRepo.IsObjectExist(hash) { return } diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index 5d35bd5a67715..a2ab08ffc56b1 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -13,6 +13,7 @@ import ( "strings" "sync" + "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/setting" ) @@ -42,6 +43,32 @@ type RenderContext struct { URLPrefix string Metas map[string]string DefaultLink string + GitRepo *git.Repository + cancelFn func() +} + +// Cancel runs any cleanup functions that have been registered for this Ctx +func (ctx *RenderContext) Cancel() { + if ctx == nil || ctx.cancelFn == nil { + return + } + ctx.cancelFn() +} + +// AddCancel adds the provided fn as a Cleanup for this Ctx +func (ctx *RenderContext) AddCancel(fn func()) { + if ctx == nil { + return + } + oldCancelFn := ctx.cancelFn + if oldCancelFn == nil { + ctx.cancelFn = fn + return + } + ctx.cancelFn = func() { + defer oldCancelFn() + fn() + } } // Renderer defines an interface for rendering markup file to HTML diff --git a/routers/org/home.go b/routers/org/home.go index d84ae870ab6db..ad14f18454447 100644 --- a/routers/org/home.go +++ b/routers/org/home.go @@ -41,6 +41,7 @@ func Home(ctx *context.Context) { desc, err := markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: map[string]string{"mode": "document"}, + GitRepo: ctx.Repo.GitRepo, }, org.Description) if err != nil { ctx.ServerError("RenderString", err) diff --git a/routers/repo/issue.go b/routers/repo/issue.go index fd2877e7069d6..a7951b6bce180 100644 --- a/routers/repo/issue.go +++ b/routers/repo/issue.go @@ -1137,6 +1137,7 @@ func ViewIssue(ctx *context.Context) { issue.RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, issue.Content) if err != nil { ctx.ServerError("RenderString", err) @@ -1301,6 +1302,7 @@ func ViewIssue(ctx *context.Context) { comment.RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, comment.Content) if err != nil { ctx.ServerError("RenderString", err) @@ -1376,6 +1378,7 @@ func ViewIssue(ctx *context.Context) { comment.RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, comment.Content) if err != nil { ctx.ServerError("RenderString", err) @@ -1734,6 +1737,7 @@ func UpdateIssueContent(ctx *context.Context) { content, err := markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Query("context"), Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, issue.Content) if err != nil { ctx.ServerError("RenderString", err) @@ -2161,6 +2165,7 @@ func UpdateCommentContent(ctx *context.Context) { content, err := markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Query("context"), Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, comment.Content) if err != nil { ctx.ServerError("RenderString", err) diff --git a/routers/repo/milestone.go b/routers/repo/milestone.go index bb6b310cbe8d4..4cdca38dd02b6 100644 --- a/routers/repo/milestone.go +++ b/routers/repo/milestone.go @@ -88,6 +88,7 @@ func Milestones(ctx *context.Context) { m.RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, m.Content) if err != nil { ctx.ServerError("RenderString", err) @@ -280,6 +281,7 @@ func MilestoneIssuesAndPulls(ctx *context.Context) { milestone.RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, milestone.Content) if err != nil { ctx.ServerError("RenderString", err) diff --git a/routers/repo/projects.go b/routers/repo/projects.go index eb0719995cb55..c7490893d5fe6 100644 --- a/routers/repo/projects.go +++ b/routers/repo/projects.go @@ -81,6 +81,7 @@ func Projects(ctx *context.Context) { projects[i].RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, projects[i].Description) if err != nil { ctx.ServerError("RenderString", err) @@ -322,6 +323,7 @@ func ViewProject(ctx *context.Context) { project.RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, project.Description) if err != nil { ctx.ServerError("RenderString", err) diff --git a/routers/repo/release.go b/routers/repo/release.go index b7730e4ee25e9..3b700e80160c5 100644 --- a/routers/repo/release.go +++ b/routers/repo/release.go @@ -145,6 +145,7 @@ func releasesOrTags(ctx *context.Context, isTagList bool) { r.Note, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, r.Note) if err != nil { ctx.ServerError("RenderString", err) @@ -213,6 +214,7 @@ func SingleRelease(ctx *context.Context) { release.Note, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: ctx.Repo.Repository.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, release.Note) if err != nil { ctx.ServerError("RenderString", err) diff --git a/routers/repo/view.go b/routers/repo/view.go index cd5b0f43edbc7..74e2a29597724 100644 --- a/routers/repo/view.go +++ b/routers/repo/view.go @@ -338,6 +338,7 @@ func renderDirectory(ctx *context.Context, treeLink string) { Filename: readmeFile.name, URLPrefix: readmeTreelink, Metas: ctx.Repo.Repository.ComposeDocumentMetas(), + GitRepo: ctx.Repo.GitRepo, }, rd, &result) if err != nil { log.Error("Render failed: %v then fallback", err) @@ -512,6 +513,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st Filename: blob.Name(), URLPrefix: path.Dir(treeLink), Metas: ctx.Repo.Repository.ComposeDocumentMetas(), + GitRepo: ctx.Repo.GitRepo, }, rd, &result) if err != nil { ctx.ServerError("Render", err) @@ -570,6 +572,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st Filename: blob.Name(), URLPrefix: path.Dir(treeLink), Metas: ctx.Repo.Repository.ComposeDocumentMetas(), + GitRepo: ctx.Repo.GitRepo, }, rd, &result) if err != nil { ctx.ServerError("Render", err) diff --git a/routers/user/home.go b/routers/user/home.go index acf73f82fe362..402743bcdf9b5 100644 --- a/routers/user/home.go +++ b/routers/user/home.go @@ -271,6 +271,7 @@ func Milestones(ctx *context.Context) { milestones[i].RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: milestones[i].Repo.Link(), Metas: milestones[i].Repo.ComposeMetas(), + GitRepo: ctx.Repo.GitRepo, }, milestones[i].Content) if err != nil { ctx.ServerError("RenderString", err) diff --git a/routers/user/profile.go b/routers/user/profile.go index 8ff1ee24adc80..cc04f3cd9a709 100644 --- a/routers/user/profile.go +++ b/routers/user/profile.go @@ -117,6 +117,7 @@ func Profile(ctx *context.Context) { content, err := markdown.RenderString(&markup.RenderContext{ URLPrefix: ctx.Repo.RepoLink, Metas: map[string]string{"mode": "document"}, + GitRepo: ctx.Repo.GitRepo, }, ctxUser.Description) if err != nil { ctx.ServerError("RenderString", err) From 6bbd5376ec40ed7a98130f286cf4be856c35ed79 Mon Sep 17 00:00:00 2001 From: zeripath Date: Mon, 7 Jun 2021 22:37:26 +0100 Subject: [PATCH 2/7] Apply suggestions from code review --- modules/git/repo_branch_gogit.go | 2 +- modules/git/repo_branch_nogogit.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/git/repo_branch_gogit.go b/modules/git/repo_branch_gogit.go index b5ac079f3ebe0..e8386b2dbd98f 100644 --- a/modules/git/repo_branch_gogit.go +++ b/modules/git/repo_branch_gogit.go @@ -13,7 +13,7 @@ import ( "github.com/go-git/go-git/v5/plumbing" ) -// IsReferenceExist returns true if given reference exists in the repository. +// IsObjectExist returns true if given reference exists in the repository. func (repo *Repository) IsObjectExist(name string) bool { if name == "" { return false diff --git a/modules/git/repo_branch_nogogit.go b/modules/git/repo_branch_nogogit.go index 8c93b54ab8bbb..dd34e48899034 100644 --- a/modules/git/repo_branch_nogogit.go +++ b/modules/git/repo_branch_nogogit.go @@ -14,7 +14,7 @@ import ( "strings" ) -// IsReferenceExist returns true if given reference exists in the repository. +// IsObjectExist returns true if given reference exists in the repository. func (repo *Repository) IsObjectExist(name string) bool { if name == "" { return false From a06d2c08f850d11da6c130b1e1ceacd4dc715427 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 8 Jun 2021 17:46:20 +0100 Subject: [PATCH 3/7] helps if you try to open the repository path! Signed-off-by: Andrew Thornton --- modules/markup/html.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/markup/html.go b/modules/markup/html.go index f8c4a8f5cca4d..f58363b2db225 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -951,9 +951,9 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { if ctx.GitRepo == nil { var err error - ctx.GitRepo, err = git.OpenRepository(ctx.Metas["repo"]) + ctx.GitRepo, err = git.OpenRepository(ctx.Metas["repoPath"]) if err != nil { - log.Error("unable to open repository: %s Error: %v", ctx.Metas["repo"], err) + log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err) return } ctx.AddCancel(ctx.GitRepo.Close) From cafcee70b134349e14b4432ec3112c2d28138c05 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Tue, 8 Jun 2021 20:59:06 +0100 Subject: [PATCH 4/7] agh Signed-off-by: Andrew Thornton --- routers/user/home.go | 1 - 1 file changed, 1 deletion(-) diff --git a/routers/user/home.go b/routers/user/home.go index 402743bcdf9b5..acf73f82fe362 100644 --- a/routers/user/home.go +++ b/routers/user/home.go @@ -271,7 +271,6 @@ func Milestones(ctx *context.Context) { milestones[i].RenderedContent, err = markdown.RenderString(&markup.RenderContext{ URLPrefix: milestones[i].Repo.Link(), Metas: milestones[i].Repo.ComposeMetas(), - GitRepo: ctx.Repo.GitRepo, }, milestones[i].Content) if err != nil { ctx.ServerError("RenderString", err) From 5236139d7ff4105ea9f2d9df9de9225f25de9b37 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Wed, 9 Jun 2021 08:06:33 +0100 Subject: [PATCH 5/7] as per lunny Signed-off-by: Andrew Thornton --- modules/markup/html.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/markup/html.go b/modules/markup/html.go index f58363b2db225..d0e0517db0c55 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -956,7 +956,10 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err) return } - ctx.AddCancel(ctx.GitRepo.Close) + ctx.AddCancel(func() { + ctx.GitRepo.Close() + ctx.GitRepo = nil + }) } // The regex does not lie, it matches the hash pattern. From 797f707d395925c296dda13595c49723831b0e20 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Fri, 18 Jun 2021 21:41:01 +0200 Subject: [PATCH 6/7] add cache --- modules/markup/html.go | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/modules/markup/html.go b/modules/markup/html.go index bbaff17544bef..5eeea48edef20 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -997,6 +997,7 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { start := 0 next := node.NextSibling + cache := make(map[string]bool) for node != nil && node != next && start < len(node.Data) { m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:]) if m == nil { @@ -1015,20 +1016,27 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { // Because of this, we check to make sure that a matched hash is actually // a commit in the repository before making it a link. - if ctx.GitRepo == nil { - var err error - ctx.GitRepo, err = git.OpenRepository(ctx.Metas["repoPath"]) - if err != nil { - log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err) - return + // check cache first + exist, inCache := cache[hash] + if !inCache { + if ctx.GitRepo == nil { + var err error + ctx.GitRepo, err = git.OpenRepository(ctx.Metas["repoPath"]) + if err != nil { + log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err) + return + } + ctx.AddCancel(func() { + ctx.GitRepo.Close() + ctx.GitRepo = nil + }) } - ctx.AddCancel(func() { - ctx.GitRepo.Close() - ctx.GitRepo = nil - }) + + exist = ctx.GitRepo.IsObjectExist(hash) + cache[hash] = exist } - if !ctx.GitRepo.IsObjectExist(hash) { + if !exist { start = m[3] continue } From 76632392b1ecd3e825fe61dd42c0180e45fe2f76 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Fri, 18 Jun 2021 21:45:28 +0200 Subject: [PATCH 7/7] Add ShaExistCache to RenderContext --- modules/markup/html.go | 8 +++++--- modules/markup/renderer.go | 25 +++++++++++++++---------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/modules/markup/html.go b/modules/markup/html.go index 5eeea48edef20..edf860da4510d 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -997,7 +997,9 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { start := 0 next := node.NextSibling - cache := make(map[string]bool) + if ctx.ShaExistCache == nil { + ctx.ShaExistCache = make(map[string]bool) + } for node != nil && node != next && start < len(node.Data) { m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:]) if m == nil { @@ -1017,7 +1019,7 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { // a commit in the repository before making it a link. // check cache first - exist, inCache := cache[hash] + exist, inCache := ctx.ShaExistCache[hash] if !inCache { if ctx.GitRepo == nil { var err error @@ -1033,7 +1035,7 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) { } exist = ctx.GitRepo.IsObjectExist(hash) - cache[hash] = exist + ctx.ShaExistCache[hash] = exist } if !exist { diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index a2ab08ffc56b1..d60c8ad71066b 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -36,20 +36,25 @@ func Init() { // RenderContext represents a render context type RenderContext struct { - Ctx context.Context - Filename string - Type string - IsWiki bool - URLPrefix string - Metas map[string]string - DefaultLink string - GitRepo *git.Repository - cancelFn func() + Ctx context.Context + Filename string + Type string + IsWiki bool + URLPrefix string + Metas map[string]string + DefaultLink string + GitRepo *git.Repository + ShaExistCache map[string]bool + cancelFn func() } // Cancel runs any cleanup functions that have been registered for this Ctx func (ctx *RenderContext) Cancel() { - if ctx == nil || ctx.cancelFn == nil { + if ctx == nil { + return + } + ctx.ShaExistCache = map[string]bool{} + if ctx.cancelFn == nil { return } ctx.cancelFn()