Skip to content

Commit

Permalink
Improve ref validity checking in fetchGit
Browse files Browse the repository at this point in the history
The previous regex was too strict and did not match what git was allowing. It
could lead to `fetchGit` not accepting valid branch names, even though they
exist in a repository (for example, branch names containing `/`, which are
pretty standard, like `release/1.0` branches).

The new regex defines what a branch name should **NOT** contain. It takes the
definitions from `refs.c` in https://github.com/git/git and `git help
check-ref-format` pages.

This change also introduces a test for ref name validity checking, which
compares the result from Nix with the result of `git check-ref-format --branch`.
  • Loading branch information
knl committed May 30, 2020
1 parent f60ce4f commit 77007d4
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/libfetchers/git.cc
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ struct GitInputScheme : InputScheme

auto input = std::make_unique<GitInput>(parseURL(getStrAttr(attrs, "url")));
if (auto ref = maybeGetStrAttr(attrs, "ref")) {
if (!std::regex_match(*ref, refRegex))
if (std::regex_search(*ref, badGitRefRegex))
throw BadURL("invalid Git branch/tag name '%s'", *ref);
input->ref = *ref;
}
Expand Down
1 change: 1 addition & 0 deletions src/libutil/url.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
namespace nix {

std::regex refRegex(refRegexS, std::regex::ECMAScript);
std::regex badGitRefRegex(badGitRefRegexS, std::regex::ECMAScript);
std::regex revRegex(revRegexS, std::regex::ECMAScript);
std::regex flakeIdRegex(flakeIdRegexS, std::regex::ECMAScript);

Expand Down
6 changes: 6 additions & 0 deletions src/libutil/url.hh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ const static std::string pathRegex = "(?:" + segmentRegex + "(?:/" + segmentRege
const static std::string refRegexS = "[a-zA-Z0-9][a-zA-Z0-9_.-]*"; // FIXME: check
extern std::regex refRegex;

// Instead of defining what a good Git Ref is, we define what a bad Git Ref is
// This is because of the definition of a ref in refs.c in https://github.com/git/git
// See tests/fetchGitRefs.sh for the full definition
const static std::string badGitRefRegexS = "//|^[./]|/\\.|\\.\\.|[[:cntrl:][:space:]:?^~\[]|\\\\|\\*|\\.lock$|\\.lock/|@\\{|[/.]$|^@$|^$";
extern std::regex badGitRefRegex;

// A Git revision (a SHA-1 commit hash).
const static std::string revRegexS = "[0-9a-fA-F]{40}";
extern std::regex revRegex;
Expand Down
111 changes: 111 additions & 0 deletions tests/fetchGitRefs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
source common.sh

if [[ -z $(type -p git) ]]; then
echo "Git not installed; skipping Git tests"
exit 99
fi

clearStore

repo="$TEST_ROOT/git"

rm -rf "$repo" "${repo}-tmp" "$TEST_HOME/.cache/nix"

git init "$repo"
git -C "$repo" config user.email "[email protected]"
git -C "$repo" config user.name "Foobar"

echo utrecht > "$repo"/hello
git -C "$repo" add hello
git -C "$repo" commit -m 'Bla1'

path=$(nix eval --raw "(builtins.fetchGit { url = $repo; ref = \"master\"; }).outPath")

# Test various combinations of ref names
# (taken from the git project)

# git help check-ref-format
# Git imposes the following rules on how references are named:
#
# 1. They can include slash / for hierarchical (directory) grouping, but no slash-separated component can begin with a dot . or end with the sequence .lock.
# 2. They must contain at least one /. This enforces the presence of a category like heads/, tags/ etc. but the actual names are not restricted. If the --allow-onelevel option is used, this rule is waived.
# 3. They cannot have two consecutive dots .. anywhere.
# 4. They cannot have ASCII control characters (i.e. bytes whose values are lower than \040, or \177 DEL), space, tilde ~, caret ^, or colon : anywhere.
# 5. They cannot have question-mark ?, asterisk *, or open bracket [ anywhere. See the --refspec-pattern option below for an exception to this rule.
# 6. They cannot begin or end with a slash / or contain multiple consecutive slashes (see the --normalize option below for an exception to this rule)
# 7. They cannot end with a dot ..
# 8. They cannot contain a sequence @{.
# 9. They cannot be the single character @.
# 10. They cannot contain a \.

valid_ref() {
{ set +x; printf >&2 '\n>>>>>>>>>> valid_ref %s\b <<<<<<<<<<\n' $(printf %s "$1" | sed -n -e l); set -x; }
git check-ref-format --branch "$1" >/dev/null
git -C "$repo" branch "$1" master >/dev/null
path1=$(nix eval --raw "(builtins.fetchGit { url = $repo; ref = ''$1''; }).outPath")
[[ $path1 = $path ]]
git -C "$repo" branch -D "$1" >/dev/null
}

invalid_ref() {
{ set +x; printf >&2 '\n>>>>>>>>>> invalid_ref %s\b <<<<<<<<<<\n' $(printf %s "$1" | sed -n -e l); set -x; }
# special case for a sole @:
# --branch @ will try to interpret @ as a branch reference and not fail. Thus we need --allow-onelevel
if [ "$1" = "@" ]; then
(! git check-ref-format --allow-onelevel "$1" >/dev/null 2>&1)
else
(! git check-ref-format --branch "$1" >/dev/null 2>&1)
fi
nix --debug eval --raw "(builtins.fetchGit { url = $repo; ref = ''$1''; }).outPath" 2>&1 | grep 'error: invalid Git branch/tag name' >/dev/null
}


valid_ref 'foox'
valid_ref '1337'
valid_ref 'foo.baz'
valid_ref 'foo/bar/baz'
valid_ref 'foo./bar'
valid_ref 'heads/foo@bar'
valid_ref "$(printf 'heads/fu\303\237')"
valid_ref 'foo-bar-baz'
valid_ref '$1'
valid_ref 'foo.locke'

invalid_ref 'refs///heads/foo'
invalid_ref 'heads/foo/'
invalid_ref '///heads/foo'
invalid_ref '.foo'
invalid_ref './foo'
invalid_ref './foo/bar'
invalid_ref 'foo/./bar'
invalid_ref 'foo/bar/.'
invalid_ref 'foo bar'
invalid_ref 'foo?bar'
invalid_ref 'foo^bar'
invalid_ref 'foo~bar'
invalid_ref 'foo:bar'
invalid_ref 'foo[bar'
invalid_ref 'foo/bar/.'
invalid_ref '.refs/foo'
invalid_ref 'refs/heads/foo.'
invalid_ref 'heads/foo..bar'
invalid_ref 'heads/foo?bar'
invalid_ref 'heads/foo.lock'
invalid_ref 'heads///foo.lock'
invalid_ref 'foo.lock/bar'
invalid_ref 'foo.lock///bar'
invalid_ref 'heads/v@{ation'
invalid_ref 'heads/foo\.ar' # should fail due to \
invalid_ref 'heads/foo\bar' # should fail due to \
invalid_ref "$(printf 'heads/foo\t')" # should fail because it has a TAB
invalid_ref "$(printf 'heads/foo\177')"
invalid_ref '@'

invalid_ref 'foo/*'
invalid_ref '*/foo'
invalid_ref 'foo/*/bar'
invalid_ref '*'
invalid_ref 'foo/*/*'
invalid_ref '*/foo/*'
invalid_ref '/foo'
invalid_ref ''
1 change: 1 addition & 0 deletions tests/local.mk
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ nix_tests = \
nar-access.sh \
structured-attrs.sh \
fetchGit.sh \
fetchGitRefs.sh \
fetchGitSubmodules.sh \
fetchMercurial.sh \
signing.sh \
Expand Down

0 comments on commit 77007d4

Please sign in to comment.