-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use a more general (and faster) method to sanitize URLs with credentials #19239
Changes from 2 commits
3d3f223
26cf692
7c7116c
ded6969
dafa3f0
9580607
80719d7
ca05837
fe5e461
9f5fa58
26f05b5
fc167f3
70bc63d
59046a5
0252a7b
74ea0d5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,59 +5,69 @@ | |
package util | ||
|
||
import ( | ||
"net/url" | ||
"strings" | ||
) | ||
"bytes" | ||
|
||
const ( | ||
userPlaceholder = "sanitized-credential" | ||
unparsableURL = "(unparsable url)" | ||
"github.com/yuin/goldmark/util" | ||
) | ||
|
||
type sanitizedError struct { | ||
err error | ||
replacer *strings.Replacer | ||
err error | ||
} | ||
|
||
func (err sanitizedError) Error() string { | ||
return err.replacer.Replace(err.err.Error()) | ||
return SanitizeCredentialURLs(err.err.Error()) | ||
} | ||
|
||
// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text. | ||
func NewSanitizedError(err error, oldnew ...string) error { | ||
return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)} | ||
func (err sanitizedError) Unwrap() error { | ||
return err.err | ||
} | ||
|
||
// NewURLSanitizedError wraps an error and replaces the url credential or removes them. | ||
func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error { | ||
return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)} | ||
// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs | ||
func SanitizeErrorCredentialURLs(err error) error { | ||
return sanitizedError{err: err} | ||
} | ||
|
||
// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them. | ||
// If the url can't get parsed it gets replaced with a placeholder string. | ||
func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error { | ||
return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)} | ||
} | ||
const userPlaceholder = "sanitized-credential" | ||
|
||
// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed. | ||
func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer { | ||
old := u.String() | ||
var schemeSep = []byte("://") | ||
|
||
if u.User != nil && usePlaceholder { | ||
u.User = url.User(userPlaceholder) | ||
} else { | ||
u.User = nil | ||
// SanitizeCredentialURLs remove all credentials in URLs for the input string: "https://user:[email protected]" => "https://[email protected]" | ||
func SanitizeCredentialURLs(s string) string { | ||
bs := util.StringToReadOnlyBytes(s) | ||
schemeSepPos := bytes.Index(bs, schemeSep) | ||
if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 { | ||
return s // fast return if there is no URL scheme or no userinfo | ||
} | ||
return strings.NewReplacer(old, u.String()) | ||
} | ||
|
||
// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed. | ||
// If the url can't get parsed it gets replaced with a placeholder string | ||
func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer { | ||
u, err := url.Parse(unsanitizedURL) | ||
if err != nil { | ||
// don't log the error, since it might contain unsanitized URL. | ||
return strings.NewReplacer(unsanitizedURL, unparsableURL) | ||
out := make([]byte, 0, len(bs)+len(userPlaceholder)) | ||
for schemeSepPos != -1 { | ||
schemeSepPos += 3 // skip the "://" | ||
sepAtPos := -1 // the possible '@' position: "https://foo@[^here]host" | ||
sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test" | ||
sepLoop: | ||
for ; sepEndPos < len(bs); sepEndPos++ { | ||
c := bs[sepEndPos] | ||
if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') { | ||
continue | ||
} | ||
switch c { | ||
case '@': | ||
sepAtPos = sepEndPos | ||
case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%': | ||
continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars | ||
default: | ||
break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop | ||
} | ||
} | ||
if sepAtPos != -1 { | ||
out = append(out, bs[:schemeSepPos]...) | ||
out = append(out, userPlaceholder...) | ||
out = append(out, bs[sepAtPos:sepEndPos]...) | ||
} else { | ||
out = append(out, bs[:sepEndPos]...) | ||
} | ||
bs = bs[sepEndPos:] | ||
schemeSepPos = bytes.Index(bs, schemeSep) | ||
} | ||
return NewURLSanitizer(u, usePlaceholder) | ||
out = append(out, bs...) | ||
return util.BytesToReadOnlyString(out) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,154 +11,53 @@ import ( | |
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestNewSanitizedError(t *testing.T) { | ||
err := errors.New("error while secret on test") | ||
err2 := NewSanitizedError(err) | ||
assert.Equal(t, err.Error(), err2.Error()) | ||
|
||
cases := []struct { | ||
input error | ||
oldnew []string | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
errors.New("error while secret on test"), | ||
[]string{"secret", "replaced"}, | ||
"error while replaced on test", | ||
}, | ||
// case 1 | ||
{ | ||
errors.New("error while sec-ret on test"), | ||
[]string{"secret", "replaced"}, | ||
"error while sec-ret on test", | ||
}, | ||
} | ||
|
||
for n, c := range cases { | ||
err := NewSanitizedError(c.input, c.oldnew...) | ||
|
||
assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n) | ||
} | ||
func TestSanitizeErrorCredentialURLs(t *testing.T) { | ||
err := errors.New("error with https://[email protected]") | ||
se := SanitizeErrorCredentialURLs(err) | ||
assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error()) | ||
} | ||
|
||
func TestNewStringURLSanitizer(t *testing.T) { | ||
func TestSanitizeCredentialURLs(t *testing.T) { | ||
cases := []struct { | ||
input string | ||
placeholder bool | ||
expected string | ||
input string | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
true, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 1 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 2 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 3 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We still need this normal cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Which case? I think I have covered most. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean why we delete these original test cases? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All code are rewritten, all new cases cover old ones. If you feel which is missing, please just point out and add it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And many cases are indeed the old cases, for example, these |
||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 4 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 5 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"ftp://x@", | ||
"ftp://" + userPlaceholder + "@", | ||
}, | ||
// case 6 | ||
{ | ||
"https://gi\nthub.com/go-gitea/test_repo.git", | ||
false, | ||
unparsableURL, | ||
"ftp://x/@", | ||
"ftp://x/@", | ||
}, | ||
} | ||
|
||
for n, c := range cases { | ||
// uses NewURLSanitizer internally | ||
result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input) | ||
|
||
assert.Equal(t, c.expected, result, "case %d: error should match", n) | ||
} | ||
} | ||
|
||
func TestNewStringURLSanitizedError(t *testing.T) { | ||
cases := []struct { | ||
input string | ||
placeholder bool | ||
expected string | ||
}{ | ||
// case 0 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
true, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"ftp://@x/@", // test multiple @ chars | ||
"ftp://" + userPlaceholder + "@x/@", | ||
}, | ||
// case 1 | ||
{ | ||
"https://github.com/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
"😊ftp://@x😊", // test unicode | ||
"😊ftp://" + userPlaceholder + "@x😊", | ||
}, | ||
// case 2 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and [email protected]", | ||
"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and [email protected]", | ||
}, | ||
// case 3 | ||
{ | ||
"https://[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 4 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
true, | ||
"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 5 | ||
{ | ||
"https://user:[email protected]/go-gitea/test_repo.git", | ||
false, | ||
"https://github.com/go-gitea/test_repo.git", | ||
}, | ||
// case 6 | ||
{ | ||
"https://gi\nthub.com/go-gitea/test_repo.git", | ||
false, | ||
unparsableURL, | ||
}, | ||
} | ||
|
||
encloseText := func(input string) string { | ||
return "test " + input + " test" | ||
} | ||
|
||
for n, c := range cases { | ||
err := errors.New(encloseText(c.input)) | ||
|
||
result := NewStringURLSanitizedError(err, c.input, c.placeholder) | ||
|
||
assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n) | ||
result := SanitizeCredentialURLs(c.input) | ||
assert.Equal(t, c.expected, result, "case %d: error should match", n) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code only removes the credential from the first URL it finds in the string
If we want to strip out credentials from all urls in the string we're gonna need to do something else. (Possibly iterate across all "://" in the string however see below...)
One other issue is that there are URLs which do not have the ":", for example
//username:[email protected]
which says use the current protocol. I think/\username:[email protected]
also works.IMO I don't think we need to necessarily sanitize these - we should probably just make it clear that we're not going to do this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK, I will add it to comment. IMO "//xxxx" couldn't be treated as a valid URL in strings. It's only valid in a context with scheme already (say, inside a browser)
And
/\
is not standard, it's just a browser's dirty hack. Golang's url.Parse doesn't support/\
either.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"This code only removes the credential from the first URL it finds in the string"
No, it removes all. See the test case