Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a more general (and faster) method to sanitize URLs with credentials #19239

Merged
merged 16 commits into from
Mar 31, 2022
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion models/migrations/v180.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func removeCredentials(payload string) (string, error) {

opts.AuthPassword = ""
opts.AuthToken = ""
opts.CloneAddr = util.NewStringURLSanitizer(opts.CloneAddr, true).Replace(opts.CloneAddr)
opts.CloneAddr = util.SanitizeCredentialURLs(opts.CloneAddr)

confBytes, err := json.Marshal(opts)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion models/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ func FinishMigrateTask(task *Task) error {
}
conf.AuthPassword = ""
conf.AuthToken = ""
conf.CloneAddr = util.NewStringURLSanitizer(conf.CloneAddr, true).Replace(conf.CloneAddr)
conf.CloneAddr = util.SanitizeCredentialURLs(conf.CloneAddr)
conf.AuthPasswordEncrypted = ""
conf.AuthTokenEncrypted = ""
conf.CloneAddrEncrypted = ""
Expand Down
2 changes: 1 addition & 1 deletion modules/git/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func (c *Command) RunWithContext(rc *RunContext) error {
args = make([]string, len(c.args))
copy(args, c.args)
for _, urlArgIndex := range argSensitiveURLIndexes {
args[urlArgIndex] = util.NewStringURLSanitizer(args[urlArgIndex], true).Replace(args[urlArgIndex])
args[urlArgIndex] = util.SanitizeCredentialURLs(args[urlArgIndex])
}
}
desc = fmt.Sprintf("%s %s [repo_path: %s]", c.name, strings.Join(args, " "), rc.Dir)
Expand Down
4 changes: 2 additions & 2 deletions modules/git/repo.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func CloneWithArgs(ctx context.Context, from, to string, args []string, opts Clo
cmd.AddArguments("--", from, to)

if strings.Contains(from, "://") && strings.Contains(from, "@") {
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, util.NewStringURLSanitizer(from, true).Replace(from), to, opts.Shared, opts.Mirror, opts.Depth))
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, util.SanitizeCredentialURLs(from), to, opts.Shared, opts.Mirror, opts.Depth))
} else {
cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, from, to, opts.Shared, opts.Mirror, opts.Depth))
}
Expand Down Expand Up @@ -209,7 +209,7 @@ func Push(ctx context.Context, repoPath string, opts PushOptions) error {
cmd.AddArguments(opts.Branch)
}
if strings.Contains(opts.Remote, "://") && strings.Contains(opts.Remote, "@") {
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, util.NewStringURLSanitizer(opts.Remote, true).Replace(opts.Remote), opts.Force, opts.Mirror))
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, util.SanitizeCredentialURLs(opts.Remote), opts.Force, opts.Mirror))
} else {
cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, opts.Remote, opts.Force, opts.Mirror))
}
Expand Down
88 changes: 50 additions & 38 deletions modules/util/sanitize.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,59 +5,71 @@
package util

import (
"net/url"
"strings"
)
"bytes"
"unicode"

const (
userPlaceholder = "sanitized-credential"
unparsableURL = "(unparsable url)"
"github.com/yuin/goldmark/util"
)

type sanitizedError struct {
err error
replacer *strings.Replacer
err error
}

func (err sanitizedError) Error() string {
return err.replacer.Replace(err.err.Error())
return SanitizeCredentialURLs(err.err.Error())
}

// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text.
func NewSanitizedError(err error, oldnew ...string) error {
return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)}
func (err sanitizedError) Unwrap() error {
return err.err
}

// NewURLSanitizedError wraps an error and replaces the url credential or removes them.
func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error {
return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)}
// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
func SanitizeErrorCredentialURLs(err error) error {
return sanitizedError{err: err}
}

// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them.
// If the url can't get parsed it gets replaced with a placeholder string.
func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error {
return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)}
}
const userPlaceholder = "sanitized-credential"

// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed.
func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer {
old := u.String()
var schemeSep = []byte("://")

if u.User != nil && usePlaceholder {
u.User = url.User(userPlaceholder)
} else {
u.User = nil
// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:[email protected]" => "https://[email protected]"
func SanitizeCredentialURLs(s string) string {
bs := util.StringToReadOnlyBytes(s)
schemeSepPos := bytes.Index(bs, schemeSep)
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
return s // fast return if there is no URL scheme or no userinfo
}
return strings.NewReplacer(old, u.String())
}

// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed.
// If the url can't get parsed it gets replaced with a placeholder string
func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer {
u, err := url.Parse(unsanitizedURL)
if err != nil {
// don't log the error, since it might contain unsanitized URL.
return strings.NewReplacer(unsanitizedURL, unparsableURL)
out := make([]byte, 0, len(bs)+len(userPlaceholder))
for schemeSepPos != -1 {
schemeSepPos += 3 // skip the "://"
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host"
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
sepLoop:
for ; sepEndPos < len(bs); sepEndPos++ {
c := bs[sepEndPos]
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
continue
}
switch c {
case '@':
sepAtPos = sepEndPos
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
default:
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
}
}
// if there is '@', and the string is like "s://u@h", then hide the "u" part
if sepAtPos != -1 && (schemeSepPos >= 4 && unicode.IsLetter(rune(bs[schemeSepPos-4]))) && sepAtPos-schemeSepPos > 0 && sepEndPos-sepAtPos > 0 {
out = append(out, bs[:schemeSepPos]...)
out = append(out, userPlaceholder...)
out = append(out, bs[sepAtPos:sepEndPos]...)
} else {
out = append(out, bs[:sepEndPos]...)
}
bs = bs[sepEndPos:]
schemeSepPos = bytes.Index(bs, schemeSep)
}
return NewURLSanitizer(u, usePlaceholder)
out = append(out, bs...)
return util.BytesToReadOnlyString(out)
}
139 changes: 25 additions & 114 deletions modules/util/sanitize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,154 +11,65 @@ import (
"github.com/stretchr/testify/assert"
)

func TestNewSanitizedError(t *testing.T) {
err := errors.New("error while secret on test")
err2 := NewSanitizedError(err)
assert.Equal(t, err.Error(), err2.Error())

cases := []struct {
input error
oldnew []string
expected string
}{
// case 0
{
errors.New("error while secret on test"),
[]string{"secret", "replaced"},
"error while replaced on test",
},
// case 1
{
errors.New("error while sec-ret on test"),
[]string{"secret", "replaced"},
"error while sec-ret on test",
},
}

for n, c := range cases {
err := NewSanitizedError(c.input, c.oldnew...)

assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n)
}
func TestSanitizeErrorCredentialURLs(t *testing.T) {
err := errors.New("error with https://[email protected]")
se := SanitizeErrorCredentialURLs(err)
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
}

func TestNewStringURLSanitizer(t *testing.T) {
func TestSanitizeCredentialURLs(t *testing.T) {
cases := []struct {
input string
placeholder bool
expected string
input string
expected string
}{
// case 0
{
"https://github.com/go-gitea/test_repo.git",
true,
"https://github.com/go-gitea/test_repo.git",
},
// case 1
{
"https://github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
},
// case 2
{
"https://[email protected]/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
// case 3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need this normal cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which case? I think I have covered most.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean why we delete these original test cases?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All code are rewritten, all new cases cover old ones. If you feel which is missing, please just point out and add it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And many cases are indeed the old cases, for example, these github.com cases.

{
"https://[email protected]/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
},
// case 4
{
"https://user:[email protected]/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
},
// case 5
{
"https://user:[email protected]/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"ftp://x@",
"ftp://" + userPlaceholder + "@",
},
// case 6
{
"https://gi\nthub.com/go-gitea/test_repo.git",
false,
unparsableURL,
"ftp://x/@",
"ftp://x/@",
},
}

for n, c := range cases {
// uses NewURLSanitizer internally
result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input)

assert.Equal(t, c.expected, result, "case %d: error should match", n)
}
}

func TestNewStringURLSanitizedError(t *testing.T) {
cases := []struct {
input string
placeholder bool
expected string
}{
// case 0
{
"https://github.com/go-gitea/test_repo.git",
true,
"https://github.com/go-gitea/test_repo.git",
},
// case 1
{
"https://github.com/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"ftp://u@x/@", // test multiple @ chars
"ftp://" + userPlaceholder + "@x/@",
},
// case 2
{
"https://[email protected]/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"😊ftp://u@x😊", // test unicode
"😊ftp://" + userPlaceholder + "@x😊",
},
// case 3
{
"https://[email protected]/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"://@",
"://@",
},
// case 4
{
"https://user:[email protected]/go-gitea/test_repo.git",
true,
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
"//u:p@h", // do not process URLs without explicit scheme, they are not treated as "valid" URLs because there is no scheme context in string
"//u:p@h",
},
// case 5
{
"https://user:[email protected]/go-gitea/test_repo.git",
false,
"https://github.com/go-gitea/test_repo.git",
"s://u@h", // the minimal pattern to be sanitized
"s://" + userPlaceholder + "@h",
},
// case 6
{
"https://gi\nthub.com/go-gitea/test_repo.git",
false,
unparsableURL,
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and [email protected]",
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and [email protected]",
},
}

encloseText := func(input string) string {
return "test " + input + " test"
}

for n, c := range cases {
err := errors.New(encloseText(c.input))

result := NewStringURLSanitizedError(err, c.input, c.placeholder)

assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n)
result := SanitizeCredentialURLs(c.input)
assert.Equal(t, c.expected, result, "case %d: error should match", n)
}
}
2 changes: 1 addition & 1 deletion routers/api/v1/repo/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ func handleMigrateError(ctx *context.APIContext, repoOwner *user_model.User, rem
case base.IsErrNotSupported(err):
ctx.Error(http.StatusUnprocessableEntity, "", err)
default:
err = util.NewStringURLSanitizedError(err, remoteAddr, true)
err = util.SanitizeErrorCredentialURLs(err)
if strings.Contains(err.Error(), "Authentication failed") ||
strings.Contains(err.Error(), "Bad credentials") ||
strings.Contains(err.Error(), "could not read Username") {
Expand Down
3 changes: 1 addition & 2 deletions routers/web/repo/migrate.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,7 @@ func handleMigrateError(ctx *context.Context, owner *user_model.User, err error,
ctx.Data["Err_RepoName"] = true
ctx.RenderWithErr(ctx.Tr("repo.form.name_pattern_not_allowed", err.(db.ErrNamePatternNotAllowed).Pattern), tpl, form)
default:
remoteAddr, _ := forms.ParseRemoteAddr(form.CloneAddr, form.AuthUsername, form.AuthPassword)
err = util.NewStringURLSanitizedError(err, remoteAddr, true)
err = util.SanitizeErrorCredentialURLs(err)
if strings.Contains(err.Error(), "Authentication failed") ||
strings.Contains(err.Error(), "Bad credentials") ||
strings.Contains(err.Error(), "could not read Username") {
Expand Down
Loading