go-gitea · wxiaoguang · Mar 31, 2022 · Mar 28, 2022 · Mar 28, 2022 · Mar 28, 2022
diff --git a/models/migrations/v180.go b/models/migrations/v180.go
@@ -112,7 +112,7 @@ func removeCredentials(payload string) (string, error) {
 
 	opts.AuthPassword = ""
 	opts.AuthToken = ""
-	opts.CloneAddr = util.NewStringURLSanitizer(opts.CloneAddr, true).Replace(opts.CloneAddr)
+	opts.CloneAddr = util.SanitizeCredentialURLs(opts.CloneAddr)
 
 	confBytes, err := json.Marshal(opts)
 	if err != nil {

diff --git a/models/task.go b/models/task.go
@@ -245,7 +245,7 @@ func FinishMigrateTask(task *Task) error {
 	}
 	conf.AuthPassword = ""
 	conf.AuthToken = ""
-	conf.CloneAddr = util.NewStringURLSanitizer(conf.CloneAddr, true).Replace(conf.CloneAddr)
+	conf.CloneAddr = util.SanitizeCredentialURLs(conf.CloneAddr)
 	conf.AuthPasswordEncrypted = ""
 	conf.AuthTokenEncrypted = ""
 	conf.CloneAddrEncrypted = ""

diff --git a/modules/git/command.go b/modules/git/command.go
@@ -154,7 +154,7 @@ func (c *Command) RunWithContext(rc *RunContext) error {
 			args = make([]string, len(c.args))
 			copy(args, c.args)
 			for _, urlArgIndex := range argSensitiveURLIndexes {
-				args[urlArgIndex] = util.NewStringURLSanitizer(args[urlArgIndex], true).Replace(args[urlArgIndex])
+				args[urlArgIndex] = util.SanitizeCredentialURLs(args[urlArgIndex])
 			}
 		}
 		desc = fmt.Sprintf("%s %s [repo_path: %s]", c.name, strings.Join(args, " "), rc.Dir)

diff --git a/modules/git/repo.go b/modules/git/repo.go
@@ -156,7 +156,7 @@ func CloneWithArgs(ctx context.Context, from, to string, args []string, opts Clo
 	cmd.AddArguments("--", from, to)
 
 	if strings.Contains(from, "://") && strings.Contains(from, "@") {
-		cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, util.NewStringURLSanitizer(from, true).Replace(from), to, opts.Shared, opts.Mirror, opts.Depth))
+		cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, util.SanitizeCredentialURLs(from), to, opts.Shared, opts.Mirror, opts.Depth))
 	} else {
 		cmd.SetDescription(fmt.Sprintf("clone branch %s from %s to %s (shared: %t, mirror: %t, depth: %d)", opts.Branch, from, to, opts.Shared, opts.Mirror, opts.Depth))
 	}
@@ -209,7 +209,7 @@ func Push(ctx context.Context, repoPath string, opts PushOptions) error {
 		cmd.AddArguments(opts.Branch)
 	}
 	if strings.Contains(opts.Remote, "://") && strings.Contains(opts.Remote, "@") {
-		cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, util.NewStringURLSanitizer(opts.Remote, true).Replace(opts.Remote), opts.Force, opts.Mirror))
+		cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, util.SanitizeCredentialURLs(opts.Remote), opts.Force, opts.Mirror))
 	} else {
 		cmd.SetDescription(fmt.Sprintf("push branch %s to %s (force: %t, mirror: %t)", opts.Branch, opts.Remote, opts.Force, opts.Mirror))
 	}

diff --git a/modules/util/sanitize.go b/modules/util/sanitize.go
@@ -5,59 +5,69 @@
 package util
 
 import (
-	"net/url"
-	"strings"
-)
+	"bytes"
 
-const (
-	userPlaceholder = "sanitized-credential"
-	unparsableURL   = "(unparsable url)"
+	"github.com/yuin/goldmark/util"
 )
 
 type sanitizedError struct {
-	err      error
-	replacer *strings.Replacer
+	err error
 }
 
 func (err sanitizedError) Error() string {
-	return err.replacer.Replace(err.err.Error())
+	return SanitizeCredentialURLs(err.err.Error())
 }
 
-// NewSanitizedError wraps an error and replaces all old, new string pairs in the message text.
-func NewSanitizedError(err error, oldnew ...string) error {
-	return sanitizedError{err: err, replacer: strings.NewReplacer(oldnew...)}
+func (err sanitizedError) Unwrap() error {
+	return err.err
 }
 
-// NewURLSanitizedError wraps an error and replaces the url credential or removes them.
-func NewURLSanitizedError(err error, u *url.URL, usePlaceholder bool) error {
-	return sanitizedError{err: err, replacer: NewURLSanitizer(u, usePlaceholder)}
+// SanitizeErrorCredentialURLs wraps the error and make sure the returned error message doesn't contain sensitive credentials in URLs
+func SanitizeErrorCredentialURLs(err error) error {
+	return sanitizedError{err: err}
 }
 
-// NewStringURLSanitizedError wraps an error and replaces the url credential or removes them.
-// If the url can't get parsed it gets replaced with a placeholder string.
-func NewStringURLSanitizedError(err error, unsanitizedURL string, usePlaceholder bool) error {
-	return sanitizedError{err: err, replacer: NewStringURLSanitizer(unsanitizedURL, usePlaceholder)}
-}
+const userPlaceholder = "sanitized-credential"
 
-// NewURLSanitizer creates a replacer for the url with the credential sanitized or removed.
-func NewURLSanitizer(u *url.URL, usePlaceholder bool) *strings.Replacer {
-	old := u.String()
+var schemeSep = []byte("://")
 
-	if u.User != nil && usePlaceholder {
-		u.User = url.User(userPlaceholder)
-	} else {
-		u.User = nil
+// SanitizeCredentialURLs remove all credentials in URLs for the input string: "https://user:[email protected]" => "https://[email protected]"
-// SanitizeCredentialURLs remove all credentials in URLs for the input string: "https://user:[email protected]" => "https://[email protected]"
+// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:[email protected]" => "https://[email protected]"
-// SanitizeCredentialURLs remove all credentials in URLs for the input string: "https://user:[email protected]" => "https://[email protected]"
+// SanitizeCredentialURLs remove all credentials in URLs (starting with "scheme://") for the input string: "https://user:[email protected]" => "https://[email protected]"
+func SanitizeCredentialURLs(s string) string {
+	bs := util.StringToReadOnlyBytes(s)
+	schemeSepPos := bytes.Index(bs, schemeSep)
+	if schemeSepPos == -1 || bytes.IndexByte(bs[schemeSepPos:], '@') == -1 {
+		return s // fast return if there is no URL scheme or no userinfo
 	}
-	return strings.NewReplacer(old, u.String())
-}
-
-// NewStringURLSanitizer creates a replacer for the url with the credential sanitized or removed.
-// If the url can't get parsed it gets replaced with a placeholder string
-func NewStringURLSanitizer(unsanitizedURL string, usePlaceholder bool) *strings.Replacer {
-	u, err := url.Parse(unsanitizedURL)
-	if err != nil {
-		// don't log the error, since it might contain unsanitized URL.
-		return strings.NewReplacer(unsanitizedURL, unparsableURL)
+	out := make([]byte, 0, len(bs)+len(userPlaceholder))
+	for schemeSepPos != -1 {
+		schemeSepPos += 3         // skip the "://"
+		sepAtPos := -1            // the possible '@' position: "https://foo@[^here]host"
+		sepEndPos := schemeSepPos // the possible end position: "The https://host[^here] in log for test"
+	sepLoop:
+		for ; sepEndPos < len(bs); sepEndPos++ {
+			c := bs[sepEndPos]
+			if ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || ('0' <= c && c <= '9') {
+				continue
+			}
+			switch c {
+			case '@':
+				sepAtPos = sepEndPos
+			case '-', '.', '_', '~', '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '%':
+				continue // due to RFC 3986, userinfo can contain - . _ ~ ! $ & ' ( ) * + , ; = : and any percent-encoded chars
+			default:
+				break sepLoop // if it is an invalid char for URL (eg: space, '/', and others), stop the loop
+			}
+		}
+		if sepAtPos != -1 {
+			out = append(out, bs[:schemeSepPos]...)
+			out = append(out, userPlaceholder...)
+			out = append(out, bs[sepAtPos:sepEndPos]...)
+		} else {
+			out = append(out, bs[:sepEndPos]...)
+		}
+		bs = bs[sepEndPos:]
+		schemeSepPos = bytes.Index(bs, schemeSep)
 	}
-	return NewURLSanitizer(u, usePlaceholder)
+	out = append(out, bs...)
+	return util.BytesToReadOnlyString(out)
 }
diff --git a/modules/util/sanitize_test.go b/modules/util/sanitize_test.go
@@ -11,154 +11,53 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-func TestNewSanitizedError(t *testing.T) {
-	err := errors.New("error while secret on test")
-	err2 := NewSanitizedError(err)
-	assert.Equal(t, err.Error(), err2.Error())
-
-	cases := []struct {
-		input    error
-		oldnew   []string
-		expected string
-	}{
-		// case 0
-		{
-			errors.New("error while secret on test"),
-			[]string{"secret", "replaced"},
-			"error while replaced on test",
-		},
-		// case 1
-		{
-			errors.New("error while sec-ret on test"),
-			[]string{"secret", "replaced"},
-			"error while sec-ret on test",
-		},
-	}
-
-	for n, c := range cases {
-		err := NewSanitizedError(c.input, c.oldnew...)
-
-		assert.Equal(t, c.expected, err.Error(), "case %d: error should match", n)
-	}
+func TestSanitizeErrorCredentialURLs(t *testing.T) {
+	err := errors.New("error with https://[email protected]")
+	se := SanitizeErrorCredentialURLs(err)
+	assert.Equal(t, "error with https://"+userPlaceholder+"@b.com", se.Error())
 }
 
-func TestNewStringURLSanitizer(t *testing.T) {
+func TestSanitizeCredentialURLs(t *testing.T) {
 	cases := []struct {
-		input       string
-		placeholder bool
-		expected    string
+		input    string
+		expected string
 	}{
-		// case 0
-		{
-			"https://github.com/go-gitea/test_repo.git",
-			true,
-			"https://github.com/go-gitea/test_repo.git",
-		},
-		// case 1
 		{
 			"https://github.com/go-gitea/test_repo.git",
-			false,
 			"https://github.com/go-gitea/test_repo.git",
 		},
-		// case 2
 		{
 			"https://[email protected]/go-gitea/test_repo.git",
-			true,
 			"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
 		},
-		// case 3
-		{
-			"https://[email protected]/go-gitea/test_repo.git",
-			false,
-			"https://github.com/go-gitea/test_repo.git",
-		},
-		// case 4
 		{
 			"https://user:[email protected]/go-gitea/test_repo.git",
-			true,
 			"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
 		},
-		// case 5
 		{
-			"https://user:[email protected]/go-gitea/test_repo.git",
-			false,
-			"https://github.com/go-gitea/test_repo.git",
+			"ftp://x@",
+			"ftp://" + userPlaceholder + "@",
 		},
-		// case 6
 		{
-			"https://gi\nthub.com/go-gitea/test_repo.git",
-			false,
-			unparsableURL,
+			"ftp://x/@",
+			"ftp://x/@",
 		},
-	}
-
-	for n, c := range cases {
-		// uses NewURLSanitizer internally
-		result := NewStringURLSanitizer(c.input, c.placeholder).Replace(c.input)
-
-		assert.Equal(t, c.expected, result, "case %d: error should match", n)
-	}
-}
-
-func TestNewStringURLSanitizedError(t *testing.T) {
-	cases := []struct {
-		input       string
-		placeholder bool
-		expected    string
-	}{
-		// case 0
 		{
-			"https://github.com/go-gitea/test_repo.git",
-			true,
-			"https://github.com/go-gitea/test_repo.git",
+			"ftp://@x/@", // test multiple @ chars
+			"ftp://" + userPlaceholder + "@x/@",
 		},
-		// case 1
 		{
-			"https://github.com/go-gitea/test_repo.git",
-			false,
-			"https://github.com/go-gitea/test_repo.git",
+			"😊ftp://@x😊", // test unicode
+			"😊ftp://" + userPlaceholder + "@x😊",
 		},
-		// case 2
 		{
-			"https://[email protected]/go-gitea/test_repo.git",
-			true,
-			"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
+			"URLs in log https://u:b@h and https://u:b@h:80/, with https://h.com and [email protected]",
+			"URLs in log https://" + userPlaceholder + "@h and https://" + userPlaceholder + "@h:80/, with https://h.com and [email protected]",
 		},
-		// case 3
-		{
-			"https://[email protected]/go-gitea/test_repo.git",
-			false,
-			"https://github.com/go-gitea/test_repo.git",
-		},
-		// case 4
-		{
-			"https://user:[email protected]/go-gitea/test_repo.git",
-			true,
-			"https://" + userPlaceholder + "@github.com/go-gitea/test_repo.git",
-		},
-		// case 5
-		{
-			"https://user:[email protected]/go-gitea/test_repo.git",
-			false,
-			"https://github.com/go-gitea/test_repo.git",
-		},
-		// case 6
-		{
-			"https://gi\nthub.com/go-gitea/test_repo.git",
-			false,
-			unparsableURL,
-		},
-	}
-
-	encloseText := func(input string) string {
-		return "test " + input + " test"
 	}
 
 	for n, c := range cases {
-		err := errors.New(encloseText(c.input))
-
-		result := NewStringURLSanitizedError(err, c.input, c.placeholder)
-
-		assert.Equal(t, encloseText(c.expected), result.Error(), "case %d: error should match", n)
+		result := SanitizeCredentialURLs(c.input)
+		assert.Equal(t, c.expected, result, "case %d: error should match", n)
 	}
 }
diff --git a/routers/api/v1/repo/migrate.go b/routers/api/v1/repo/migrate.go
@@ -235,7 +235,7 @@ func handleMigrateError(ctx *context.APIContext, repoOwner *user_model.User, rem
 	case base.IsErrNotSupported(err):
 		ctx.Error(http.StatusUnprocessableEntity, "", err)
 	default:
-		err = util.NewStringURLSanitizedError(err, remoteAddr, true)
+		err = util.SanitizeErrorCredentialURLs(err)
 		if strings.Contains(err.Error(), "Authentication failed") ||
 			strings.Contains(err.Error(), "Bad credentials") ||
 			strings.Contains(err.Error(), "could not read Username") {

diff --git a/routers/web/repo/migrate.go b/routers/web/repo/migrate.go
@@ -106,8 +106,7 @@ func handleMigrateError(ctx *context.Context, owner *user_model.User, err error,
 		ctx.Data["Err_RepoName"] = true
 		ctx.RenderWithErr(ctx.Tr("repo.form.name_pattern_not_allowed", err.(db.ErrNamePatternNotAllowed).Pattern), tpl, form)
 	default:
-		remoteAddr, _ := forms.ParseRemoteAddr(form.CloneAddr, form.AuthUsername, form.AuthPassword)
-		err = util.NewStringURLSanitizedError(err, remoteAddr, true)
+		err = util.SanitizeErrorCredentialURLs(err)
 		if strings.Contains(err.Error(), "Authentication failed") ||
 			strings.Contains(err.Error(), "Bad credentials") ||
 			strings.Contains(err.Error(), "could not read Username") {