Skip to content

Commit

Permalink
Fix a couple of CommentAsPatch issues. (go-gitea#14804)
Browse files Browse the repository at this point in the history
Backport go-gitea#14804

* CutDiffAroundLine makes the incorrect assumption that `---` and `+++` always represent part of the header of a diff.

This PR adds a flag to its parsing to prevent this problem and adds a streaming parsing technique to CutDiffAroundLine using an io.pipe instead of just sending data to an unbounded buffer.

Fix go-gitea#14711

Signed-off-by: Andrew Thornton <[email protected]>

* Handle unquoted comment patch files

When making comment patches unfortunately the patch does not always quote the filename
This makes the diff --git header ambiguous again.

This PR finally adds handling for ambiguity in to parse patch

Fix go-gitea#14812

Signed-off-by: Andrew Thornton <[email protected]>

* Add in testing for no error

There is no way currently for CutDiffAroundLine in this test to cause an
error however, it should still be tested.

Signed-off-by: Andrew Thornton <[email protected]>
  • Loading branch information
zeripath committed Feb 27, 2021
1 parent 90bf1e7 commit 6a38de8
Show file tree
Hide file tree
Showing 6 changed files with 270 additions and 41 deletions.
30 changes: 22 additions & 8 deletions modules/git/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,30 +125,39 @@ var hunkRegex = regexp.MustCompile(`^@@ -(?P<beginOld>[0-9]+)(,(?P<endOld>[0-9]+

const cmdDiffHead = "diff --git "

func isHeader(lof string) bool {
return strings.HasPrefix(lof, cmdDiffHead) || strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++")
func isHeader(lof string, inHunk bool) bool {
return strings.HasPrefix(lof, cmdDiffHead) || (!inHunk && (strings.HasPrefix(lof, "---") || strings.HasPrefix(lof, "+++")))
}

// CutDiffAroundLine cuts a diff of a file in way that only the given line + numberOfLine above it will be shown
// it also recalculates hunks and adds the appropriate headers to the new diff.
// Warning: Only one-file diffs are allowed.
func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) string {
func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLine int) (string, error) {
if line == 0 || numbersOfLine == 0 {
// no line or num of lines => no diff
return ""
return "", nil
}

scanner := bufio.NewScanner(originalDiff)
hunk := make([]string, 0)

// begin is the start of the hunk containing searched line
// end is the end of the hunk ...
// currentLine is the line number on the side of the searched line (differentiated by old)
// otherLine is the line number on the opposite side of the searched line (differentiated by old)
var begin, end, currentLine, otherLine int64
var headerLines int

inHunk := false

for scanner.Scan() {
lof := scanner.Text()
// Add header to enable parsing
if isHeader(lof) {

if isHeader(lof, inHunk) {
if strings.HasPrefix(lof, cmdDiffHead) {
inHunk = false
}
hunk = append(hunk, lof)
headerLines++
}
Expand All @@ -157,6 +166,7 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi
}
// Detect "hunk" with contains commented lof
if strings.HasPrefix(lof, "@@") {
inHunk = true
// Already got our hunk. End of hunk detected!
if len(hunk) > headerLines {
break
Expand Down Expand Up @@ -213,15 +223,19 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi
}
}
}
err := scanner.Err()
if err != nil {
return "", err
}

// No hunk found
if currentLine == 0 {
return ""
return "", nil
}
// headerLines + hunkLine (1) = totalNonCodeLines
if len(hunk)-headerLines-1 <= numbersOfLine {
// No need to cut the hunk => return existing hunk
return strings.Join(hunk, "\n")
return strings.Join(hunk, "\n"), nil
}
var oldBegin, oldNumOfLines, newBegin, newNumOfLines int64
if old {
Expand Down Expand Up @@ -256,5 +270,5 @@ func CutDiffAroundLine(originalDiff io.Reader, line int64, old bool, numbersOfLi
// construct the new hunk header
newHunk[headerLines] = fmt.Sprintf("@@ -%d,%d +%d,%d @@",
oldBegin, oldNumOfLines, newBegin, newNumOfLines)
return strings.Join(newHunk, "\n")
return strings.Join(newHunk, "\n"), nil
}
64 changes: 58 additions & 6 deletions modules/git/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,28 @@ const exampleDiff = `diff --git a/README.md b/README.md
+ cut off
+ cut off`

const breakingDiff = `diff --git a/aaa.sql b/aaa.sql
index d8e4c92..19dc8ad 100644
--- a/aaa.sql
+++ b/aaa.sql
@@ -1,9 +1,10 @@
--some comment
--- some comment 5
+--some coment 2
+-- some comment 3
create or replace procedure test(p1 varchar2)
is
begin
---new comment
dbms_output.put_line(p1);
+--some other comment
end;
/
`

func TestCutDiffAroundLine(t *testing.T) {
result := CutDiffAroundLine(strings.NewReader(exampleDiff), 4, false, 3)
result, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 4, false, 3)
assert.NoError(t, err)
resultByLine := strings.Split(result, "\n")
assert.Len(t, resultByLine, 7)
// Check if headers got transferred
Expand All @@ -37,18 +57,50 @@ func TestCutDiffAroundLine(t *testing.T) {
assert.Equal(t, "+ Build Status", resultByLine[4])

// Must be same result as before since old line 3 == new line 5
newResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3)
newResult, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 3, true, 3)
assert.NoError(t, err)
assert.Equal(t, result, newResult, "Must be same result as before since old line 3 == new line 5")

newResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 300)
newResult, err = CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 300)
assert.NoError(t, err)
assert.Equal(t, exampleDiff, newResult)

emptyResult := CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 0)
emptyResult, err := CutDiffAroundLine(strings.NewReader(exampleDiff), 6, false, 0)
assert.NoError(t, err)
assert.Empty(t, emptyResult)

// Line is out of scope
emptyResult = CutDiffAroundLine(strings.NewReader(exampleDiff), 434, false, 0)
emptyResult, err = CutDiffAroundLine(strings.NewReader(exampleDiff), 434, false, 0)
assert.NoError(t, err)
assert.Empty(t, emptyResult)

// Handle minus diffs properly
minusDiff, err := CutDiffAroundLine(strings.NewReader(breakingDiff), 2, false, 4)
assert.NoError(t, err)

expected := `diff --git a/aaa.sql b/aaa.sql
--- a/aaa.sql
+++ b/aaa.sql
@@ -1,9 +1,10 @@
--some comment
--- some comment 5
+--some coment 2`
assert.Equal(t, expected, minusDiff)

// Handle minus diffs properly
minusDiff, err = CutDiffAroundLine(strings.NewReader(breakingDiff), 3, false, 4)
assert.NoError(t, err)

expected = `diff --git a/aaa.sql b/aaa.sql
--- a/aaa.sql
+++ b/aaa.sql
@@ -1,9 +1,10 @@
--some comment
--- some comment 5
+--some coment 2
+-- some comment 3`

assert.Equal(t, expected, minusDiff)
}

func BenchmarkCutDiffAroundLine(b *testing.B) {
Expand All @@ -69,7 +121,7 @@ func ExampleCutDiffAroundLine() {
Docker Pulls
+ cut off
+ cut off`
result := CutDiffAroundLine(strings.NewReader(diff), 4, false, 3)
result, _ := CutDiffAroundLine(strings.NewReader(diff), 4, false, 3)
println(result)
}

Expand Down
22 changes: 14 additions & 8 deletions modules/migrations/gitea_uploader.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
package migrations

import (
"bytes"
"context"
"fmt"
"io"
Expand Down Expand Up @@ -811,13 +810,20 @@ func (g *GiteaLocalUploader) CreateReviews(reviews ...*base.Review) error {
}

var patch string
patchBuf := new(bytes.Buffer)
if err := git.GetRepoRawDiffForFile(g.gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, comment.TreePath, patchBuf); err != nil {
// We should ignore the error since the commit maybe removed when force push to the pull request
log.Warn("GetRepoRawDiffForFile failed when migrating [%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err)
} else {
patch = git.CutDiffAroundLine(patchBuf, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines)
}
reader, writer := io.Pipe()
defer func() {
_ = reader.Close()
_ = writer.Close()
}()
go func() {
if err := git.GetRepoRawDiffForFile(g.gitRepo, pr.MergeBase, headCommitID, git.RawDiffNormal, comment.TreePath, writer); err != nil {
// We should ignore the error since the commit maybe removed when force push to the pull request
log.Warn("GetRepoRawDiffForFile failed when migrating [%s, %s, %s, %s]: %v", g.gitRepo.Path, pr.MergeBase, headCommitID, comment.TreePath, err)
}
_ = writer.Close()
}()

patch, _ = git.CutDiffAroundLine(reader, int64((&models.Comment{Line: int64(line + comment.Position - 1)}).UnsignedLine()), line < 0, setting.UI.CodeCommentLines)

var c = models.Comment{
Type: models.CommentTypeCode,
Expand Down
95 changes: 89 additions & 6 deletions services/gitdiff/gitdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -583,6 +583,7 @@ type DiffFile struct {
IsBin bool
IsLFSFile bool
IsRenamed bool
IsAmbiguous bool
IsSubmodule bool
Sections []*DiffSection
IsIncomplete bool
Expand Down Expand Up @@ -776,12 +777,32 @@ parsingLoop:
if strings.HasSuffix(line, " 160000\n") {
curFile.IsSubmodule = true
}
case strings.HasPrefix(line, "rename from "):
curFile.IsRenamed = true
curFile.Type = DiffFileRename
if curFile.IsAmbiguous {
curFile.OldName = line[len("rename from ") : len(line)-1]
}
case strings.HasPrefix(line, "rename to "):
curFile.IsRenamed = true
curFile.Type = DiffFileRename
if curFile.IsAmbiguous {
curFile.Name = line[len("rename to ") : len(line)-1]
curFile.IsAmbiguous = false
}
case strings.HasPrefix(line, "copy from "):
curFile.IsRenamed = true
curFile.Type = DiffFileCopy
if curFile.IsAmbiguous {
curFile.OldName = line[len("copy from ") : len(line)-1]
}
case strings.HasPrefix(line, "copy to "):
curFile.IsRenamed = true
curFile.Type = DiffFileCopy
if curFile.IsAmbiguous {
curFile.Name = line[len("copy to ") : len(line)-1]
curFile.IsAmbiguous = false
}
case strings.HasPrefix(line, "new file"):
curFile.Type = DiffFileAdd
curFile.IsCreated = true
Expand All @@ -803,9 +824,35 @@ parsingLoop:
case strings.HasPrefix(line, "Binary"):
curFile.IsBin = true
case strings.HasPrefix(line, "--- "):
// Do nothing with this line
// Handle ambiguous filenames
if curFile.IsAmbiguous {
if len(line) > 6 && line[4] == 'a' {
curFile.OldName = line[6 : len(line)-1]
if line[len(line)-2] == '\t' {
curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
}
} else {
curFile.OldName = ""
}
}
// Otherwise do nothing with this line
case strings.HasPrefix(line, "+++ "):
// Do nothing with this line
// Handle ambiguous filenames
if curFile.IsAmbiguous {
if len(line) > 6 && line[4] == 'b' {
curFile.Name = line[6 : len(line)-1]
if line[len(line)-2] == '\t' {
curFile.Name = curFile.Name[:len(curFile.Name)-1]
}
if curFile.OldName == "" {
curFile.OldName = curFile.Name
}
} else {
curFile.Name = curFile.OldName
}
curFile.IsAmbiguous = false
}
// Otherwise do nothing with this line, but now switch to parsing hunks
lineBytes, isFragment, err := parseHunks(curFile, maxLines, maxLineCharacters, input)
diff.TotalAddition += curFile.Addition
diff.TotalDeletion += curFile.Deletion
Expand Down Expand Up @@ -1056,13 +1103,33 @@ func createDiffFile(diff *Diff, line string) *DiffFile {

rd := strings.NewReader(line[len(cmdDiffHead):] + " ")
curFile.Type = DiffFileChange
curFile.OldName = readFileName(rd)
curFile.Name = readFileName(rd)
oldNameAmbiguity := false
newNameAmbiguity := false

curFile.OldName, oldNameAmbiguity = readFileName(rd)
curFile.Name, newNameAmbiguity = readFileName(rd)
if oldNameAmbiguity && newNameAmbiguity {
curFile.IsAmbiguous = true
// OK we should bet that the oldName and the newName are the same if they can be made to be same
// So we need to start again ...
if (len(line)-len(cmdDiffHead)-1)%2 == 0 {
// diff --git a/b b/b b/b b/b b/b b/b
//
midpoint := (len(line) + len(cmdDiffHead) - 1) / 2
new, old := line[len(cmdDiffHead):midpoint], line[midpoint+1:]
if len(new) > 2 && len(old) > 2 && new[2:] == old[2:] {
curFile.OldName = old[2:]
curFile.Name = old[2:]
}
}
}

curFile.IsRenamed = curFile.Name != curFile.OldName
return curFile
}

func readFileName(rd *strings.Reader) string {
func readFileName(rd *strings.Reader) (string, bool) {
ambiguity := false
var name string
char, _ := rd.ReadByte()
_ = rd.UnreadByte()
Expand All @@ -1072,9 +1139,24 @@ func readFileName(rd *strings.Reader) string {
name = name[1:]
}
} else {
// This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone
ambiguity = true
fmt.Fscanf(rd, "%s ", &name)
char, _ := rd.ReadByte()
_ = rd.UnreadByte()
for !(char == 0 || char == '"' || char == 'b') {
var suffix string
fmt.Fscanf(rd, "%s ", &suffix)
name += " " + suffix
char, _ = rd.ReadByte()
_ = rd.UnreadByte()
}
}
if len(name) < 2 {
log.Error("Unable to determine name from reader: %v", rd)
return "", true
}
return name[2:]
return name[2:], ambiguity
}

// GetDiffRange builds a Diff between two commits of a repository.
Expand Down Expand Up @@ -1185,6 +1267,7 @@ func CommentAsDiff(c *models.Comment) (*Diff, error) {
diff, err := ParsePatch(setting.Git.MaxGitDiffLines,
setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch))
if err != nil {
log.Error("Unable to parse patch: %v", err)
return nil, err
}
if len(diff.Files) == 0 {
Expand Down
Loading

0 comments on commit 6a38de8

Please sign in to comment.