-
Notifications
You must be signed in to change notification settings - Fork 371
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Add the stdlib_diff tool to compare gno and go standard libraries #1425
Changes from 12 commits
c9534c7
a7ac60e
e72346d
02c0d0b
7a15992
1c6b428
158b41d
22f286c
640f056
6824c34
5989e22
8b350ca
6d9af8f
7c7a7ab
ff6f98f
019016f
95df663
5ffff78
b6b3ad4
8895e1c
d125294
f420a8c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Stdlibs_diff | ||
|
||
Stdlibs_diff is a tool that generates an html report indicating differences between gno standard libraries and go standrad libraries | ||
|
||
## Usage | ||
|
||
Compare the `go` standard libraries the `gno` standard libraries | ||
|
||
```shell | ||
./stdlibs_diff --src <path to go standard libraries> --dst <path to gno standard libraries> --out <output directory> | ||
``` | ||
|
||
Compare the `gno` standard libraries the `go` standard libraries | ||
|
||
```shell | ||
./stdlibs_diff --src <path to gno standard libraries> --dst <path to go standard libraries> --out <output directory> --src_is_gno | ||
``` | ||
|
||
|
||
## Parameters | ||
|
||
| Flag | Description | Default value | | ||
| ---------- | ------------------------------------------------------------------ | ------------- | | ||
| src | Directory containing packages that will be compared to destination | None | | ||
| dst | Directory containing packages; used to compare src packages | None | | ||
| out | Directory where the report will be created | None | | ||
| src_is_gno | Indicates if the src parameters is the gno standard library | false | | ||
|
||
## Tips | ||
|
||
An index.html is generated at the root of the report location. Utilize it to navigate easily through the report. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package main | ||
|
||
import "errors" | ||
|
||
const ( | ||
MYERS = "myers" | ||
) | ||
|
||
type Algorithm interface { | ||
Do() (srcDiff []LineDifferrence, dstDiff []LineDifferrence) | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
func AlgorithmFactory(src, dst []string, algoType string) (Algorithm, error) { | ||
switch algoType { | ||
case MYERS: | ||
return NewMyers(src, dst), nil | ||
default: | ||
return nil, errors.New("unknown algorithm type") | ||
} | ||
} | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package main | ||
|
||
type diffStatus uint | ||
|
||
const ( | ||
MISSING_IN_SRC diffStatus = 1 | ||
MISSING_IN_DST diffStatus = 2 | ||
HAS_DIFF diffStatus = 3 | ||
NO_DIFF diffStatus = 4 | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
func (status diffStatus) String() string { | ||
switch status { | ||
case MISSING_IN_SRC: | ||
return "missing in src" | ||
case MISSING_IN_DST: | ||
return "missing in dst" | ||
case HAS_DIFF: | ||
return "files differ" | ||
case NO_DIFF: | ||
return "files are equal" | ||
default: | ||
return "Unknown" | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
package main | ||
|
||
import ( | ||
"bufio" | ||
"os" | ||
) | ||
|
||
// FileDiff is a struct for comparing differences between two files. | ||
type FileDiff struct { | ||
Src []string // Lines of the source file. | ||
Dst []string // Lines of the destination file. | ||
DiffAlgorithm Algorithm // Algorithm used for comparison. | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// LineDifferrence represents a difference in a line during file comparison. | ||
type LineDifferrence struct { | ||
Line string // The line content. | ||
Operation string // The operation performed on the line (e.g., "add", "delete", "equal"). | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// NewFileDiff creates a new FileDiff instance for comparing differences between | ||
// the specified source and destination files. It initializes the source and | ||
// destination file lines and the specified diff algorithm. | ||
func NewFileDiff(srcPath, dstPath, algoType string) (*FileDiff, error) { | ||
src := getFileLines(srcPath) | ||
dst := getFileLines(dstPath) | ||
|
||
diffAlgorithm, err := AlgorithmFactory(src, dst, algoType) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
return &FileDiff{ | ||
Src: src, | ||
Dst: dst, | ||
DiffAlgorithm: diffAlgorithm, | ||
}, nil | ||
} | ||
|
||
// Differences returns the differences in lines between the source and | ||
// destination files using the configured diff algorithm. | ||
func (f *FileDiff) Differences() ([]LineDifferrence, []LineDifferrence) { | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
srcDiff, dstDiff := f.DiffAlgorithm.Do() | ||
return srcDiff, dstDiff | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// getFileLines reads and returns the lines of a file given its path. | ||
func getFileLines(p string) []string { | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
lines := make([]string, 0) | ||
|
||
f, err := os.Open(p) | ||
if err != nil { | ||
return lines | ||
} | ||
defer f.Close() | ||
|
||
scanner := bufio.NewScanner(f) | ||
for scanner.Scan() { | ||
lines = append(lines, scanner.Text()) | ||
} | ||
|
||
if err := scanner.Err(); err != nil { | ||
return lines | ||
} | ||
|
||
return lines | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
package main | ||
|
||
import ( | ||
"flag" | ||
"log" | ||
) | ||
|
||
func main() { | ||
var srcPath string | ||
var dstPath string | ||
var outDirectory string | ||
var srcIsGno bool | ||
|
||
flag.StringVar(&srcPath, "src", "", "Directory containing packages that will be compared to destination") | ||
flag.StringVar(&dstPath, "dst", "", "Directory containing packages; used to compare src packages") | ||
flag.StringVar(&outDirectory, "out", "", "Directory where the report will be created") | ||
flag.BoolVar(&srcIsGno, "src_is_gno", false, "If true, indicates that the src parameter corresponds to the gno standard libraries") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd avoid this in favour of just matching go and gno files ignoring their extensions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @thehowl I need a clarification on this. Do you want to compare all files ? I mean even files that are not .go or .gno ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean the following:
Ie. this is an example of which files should match:
|
||
flag.Parse() | ||
|
||
reportBuilder, err := NewReportBuilder(srcPath, dstPath, outDirectory, srcIsGno) | ||
if err != nil { | ||
log.Fatal("can't build report builder: ", err.Error()) | ||
} | ||
|
||
log.Println("Building report...") | ||
if err := reportBuilder.Build(); err != nil { | ||
log.Fatalln("can't build report: ", err.Error()) | ||
} | ||
log.Println("Report generation done!") | ||
} |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was kind of a NIH moment, considering a good library already exists :) I don't have too much trouble keeping this file if you want, though, since this is just in an external tool and the code is not that large, but try to use good external dependencies for common problems if they exist, instead of just reimplementing the algorithm from wikipedia. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I missed this library ! I will integrate it in the tool. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've checked and tested the library (v1.3.1), it seems that the current version is giving wrong results. I've tested with an older version (v.1.1.0) and it seems to work but I'm not convinced about the stability. I think that it is better to keep the actual myers implementation for now and if when the lib is fixed we switch to it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Can you specify what you mean? My concern is about performance. It doesn't need to be spectacular but I'd want the tool to be able to make a full, recursive analysis of the stdlibs directories in < 2min on the machines we test on. If you're able to do it with your code, it doesn't matter and I'm fine either way |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
package main | ||
|
||
import ( | ||
"slices" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. seeing as you are importing slices, please add a |
||
) | ||
|
||
var _ Algorithm = (*Myers)(nil) | ||
|
||
// Myers is a struct representing the Myers algorithm for line-based difference. | ||
type Myers struct { | ||
src []string // Lines of the source file. | ||
dst []string // Lines of the destination file. | ||
} | ||
|
||
// NewMyers creates a new Myers instance with the specified source and destination lines. | ||
func NewMyers(src, dst []string) *Myers { | ||
return &Myers{ | ||
src: src, | ||
dst: dst, | ||
} | ||
} | ||
|
||
// Do performs the Myers algorithm to find the differences between source and destination files. | ||
// It returns the differences as two slices of LineDifferrence representing source and destination changes. | ||
func (m *Myers) Do() ([]LineDifferrence, []LineDifferrence) { | ||
operations := m.doMyers() | ||
|
||
srcIndex, dstIndex, insertCount, deleteCount := 0, 0, 0, 0 | ||
dstDiff := make([]LineDifferrence, 0) | ||
srcDiff := make([]LineDifferrence, 0) | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
for _, op := range operations { | ||
switch op { | ||
case INSERT: | ||
dstDiff = append(dstDiff, LineDifferrence{Line: "+" + m.dst[dstIndex], Operation: op.String()}) | ||
srcDiff = append(srcDiff, LineDifferrence{Line: "", Operation: MOVE.String()}) | ||
dstIndex += 1 | ||
thehowl marked this conversation as resolved.
Show resolved
Hide resolved
|
||
insertCount++ | ||
continue | ||
|
||
case MOVE: | ||
dstDiff = append(dstDiff, LineDifferrence{Line: m.src[srcIndex], Operation: op.String()}) | ||
srcDiff = append(srcDiff, LineDifferrence{Line: m.src[srcIndex], Operation: op.String()}) | ||
srcIndex += 1 | ||
dstIndex += 1 | ||
continue | ||
|
||
case DELETE: | ||
dstDiff = append(dstDiff, LineDifferrence{Line: "", Operation: MOVE.String()}) | ||
srcDiff = append(srcDiff, LineDifferrence{Line: "-" + m.src[srcIndex], Operation: op.String()}) | ||
srcIndex += 1 | ||
deleteCount++ | ||
continue | ||
} | ||
} | ||
|
||
// Means that src file is empty. | ||
if insertCount == len(srcDiff) { | ||
srcDiff = make([]LineDifferrence, 0) | ||
} | ||
// Means that dst file is empty. | ||
if deleteCount == len(dstDiff) { | ||
dstDiff = make([]LineDifferrence, 0) | ||
} | ||
return srcDiff, dstDiff | ||
} | ||
|
||
// doMyers performs the Myers algorithm and returns the list of operations. | ||
func (m *Myers) doMyers() []operation { | ||
var tree []map[int]int | ||
var x, y int | ||
|
||
srcLen := len(m.src) | ||
dstLen := len(m.dst) | ||
max := srcLen + dstLen | ||
|
||
for pathLen := 0; pathLen <= max; pathLen++ { | ||
optimalCoordinates := make(map[int]int, pathLen+2) | ||
tree = append(tree, optimalCoordinates) | ||
|
||
if pathLen == 0 { | ||
commonPrefixLen := 0 | ||
for srcLen > commonPrefixLen && dstLen > commonPrefixLen && m.src[commonPrefixLen] == m.dst[commonPrefixLen] { | ||
commonPrefixLen++ | ||
} | ||
optimalCoordinates[0] = commonPrefixLen | ||
|
||
if commonPrefixLen == srcLen && commonPrefixLen == dstLen { | ||
return m.getAllOperations(tree) | ||
} | ||
continue | ||
} | ||
|
||
lastV := tree[pathLen-1] | ||
|
||
for k := -pathLen; k <= pathLen; k += 2 { | ||
if k == -pathLen || (k != pathLen && lastV[k-1] < lastV[k+1]) { | ||
x = lastV[k+1] | ||
} else { | ||
x = lastV[k-1] + 1 | ||
} | ||
|
||
y = x - k | ||
|
||
for x < srcLen && y < dstLen && m.src[x] == m.dst[y] { | ||
x, y = x+1, y+1 | ||
} | ||
|
||
optimalCoordinates[k] = x | ||
|
||
if x == srcLen && y == dstLen { | ||
return m.getAllOperations(tree) | ||
} | ||
} | ||
} | ||
|
||
return m.getAllOperations(tree) | ||
} | ||
|
||
// getAllOperations retrieves the list of operations from the calculated tree. | ||
func (m *Myers) getAllOperations(tree []map[int]int) []operation { | ||
var operations []operation | ||
var k, prevK, prevX, prevY int | ||
|
||
x := len(m.src) | ||
y := len(m.dst) | ||
|
||
for pathLen := len(tree) - 1; pathLen > 0; pathLen-- { | ||
k = x - y | ||
lastV := tree[pathLen-1] | ||
|
||
if k == -pathLen || (k != pathLen && lastV[k-1] < lastV[k+1]) { | ||
prevK = k + 1 | ||
} else { | ||
prevK = k - 1 | ||
} | ||
|
||
prevX = lastV[prevK] | ||
prevY = prevX - prevK | ||
|
||
for x > prevX && y > prevY { | ||
operations = append(operations, MOVE) | ||
x -= 1 | ||
y -= 1 | ||
} | ||
|
||
if x == prevX { | ||
operations = append(operations, INSERT) | ||
} else { | ||
operations = append(operations, DELETE) | ||
} | ||
|
||
x, y = prevX, prevY | ||
} | ||
|
||
if tree[0][0] != 0 { | ||
for i := 0; i < tree[0][0]; i++ { | ||
operations = append(operations, MOVE) | ||
} | ||
} | ||
|
||
slices.Reverse(operations) | ||
return operations | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
package main | ||
|
||
// operation is an enumeration type representing different types of operations. Used in diff algorithm | ||
// to indicates differences between files. | ||
type operation uint | ||
|
||
const ( | ||
// INSERT represents an insertion operation. | ||
INSERT operation = 1 | ||
// DELETE represents a deletion operation. | ||
DELETE operation = 2 | ||
// MOVE represents a move operation. | ||
MOVE operation = 3 | ||
) | ||
|
||
// String returns a string representation of the operation. | ||
func (op operation) String() string { | ||
switch op { | ||
case INSERT: | ||
return "INS" | ||
case DELETE: | ||
return "DEL" | ||
case MOVE: | ||
return "MOV" | ||
default: | ||
return "UNKNOWN" | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pretty sure with package flag it's a single dash, ie.
-src
not--src