Skip to content

Commit

Permalink
Handle tarballs that don't consist of a single top-level directory
Browse files Browse the repository at this point in the history
Fixes #4785 (top-level directories are no longer merged into one).

Fixes #10983 (top-level non-directories are no longer discarded).
  • Loading branch information
edolstra committed Jul 26, 2024
1 parent d9ba2a1 commit 06b686b
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 20 deletions.
77 changes: 57 additions & 20 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ Object lookupObject(git_repository * repo, const git_oid & oid, git_object_t typ
}

template<typename T>
T peelObject(git_repository * repo, git_object * obj, git_object_t type)
T peelObject(git_object * obj, git_object_t type)
{
T obj2;
if (git_object_peel((git_object * *) (typename T::pointer *) Setter(obj2), obj, type)) {
Expand All @@ -136,6 +136,29 @@ T peelObject(git_repository * repo, git_object * obj, git_object_t type)
return obj2;
}

template<typename T>
T dupObject(typename T::pointer obj)
{
T obj2;
if (git_object_dup((git_object * *) (typename T::pointer *) Setter(obj2), (git_object *) obj))
throw Error("duplicating object '%s': %s", *git_object_id((git_object *) obj), git_error_last()->message);
return obj2;
}

/**
* Peel the specified object (i.e. follow tag and commit objects) to
* either a blob or a tree.
*/
static Object peelToTreeOrBlob(git_object * obj)
{
/* git_object_peel() doesn't handle blob objects, so handle those
specially. */
if (git_object_type(obj) == GIT_OBJECT_BLOB)
return dupObject<Object>(obj);
else
return peelObject<Object>(obj, GIT_OBJECT_TREE);
}

struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
{
/** Location of the repository on disk. */
Expand Down Expand Up @@ -166,7 +189,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
std::unordered_set<git_oid> done;
std::queue<Commit> todo;

todo.push(peelObject<Commit>(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT));
todo.push(peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT));

while (auto commit = pop(todo)) {
if (!done.insert(*git_commit_id(commit->get())).second) continue;
Expand All @@ -184,7 +207,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>

uint64_t getLastModified(const Hash & rev) override
{
auto commit = peelObject<Commit>(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT);
auto commit = peelObject<Commit>(lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT);

return git_commit_time(commit.get());
}
Expand Down Expand Up @@ -476,11 +499,11 @@ ref<GitRepo> GitRepo::openRepo(const std::filesystem::path & path, bool create,
struct GitSourceAccessor : SourceAccessor
{
ref<GitRepoImpl> repo;
Tree root;
Object root;

GitSourceAccessor(ref<GitRepoImpl> repo_, const Hash & rev)
: repo(repo_)
, root(peelObject<Tree>(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE))
, root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get()))
{
}

Expand All @@ -506,7 +529,7 @@ struct GitSourceAccessor : SourceAccessor
std::optional<Stat> maybeLstat(const CanonPath & path) override
{
if (path.isRoot())
return Stat { .type = tDirectory };
return Stat { .type = git_object_type(root.get()) == GIT_OBJECT_TREE ? tDirectory : tRegular };

auto entry = lookup(path);
if (!entry)
Expand Down Expand Up @@ -616,10 +639,10 @@ struct GitSourceAccessor : SourceAccessor
std::optional<Tree> lookupTree(const CanonPath & path)
{
if (path.isRoot()) {
Tree tree;
if (git_tree_dup(Setter(tree), root.get()))
throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message);
return tree;
if (git_object_type(root.get()) == GIT_OBJECT_TREE)
return dupObject<Tree>((git_tree *) &*root);
else
return std::nullopt;
}

auto entry = lookup(path);
Expand All @@ -646,10 +669,10 @@ struct GitSourceAccessor : SourceAccessor
std::variant<Tree, Submodule> getTree(const CanonPath & path)
{
if (path.isRoot()) {
Tree tree;
if (git_tree_dup(Setter(tree), root.get()))
throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message);
return tree;
if (git_object_type(root.get()) == GIT_OBJECT_TREE)
return dupObject<Tree>((git_tree *) &*root);
else
throw Error("Git root object '%s' is not a directory", *git_object_id(root.get()));
}

auto entry = need(path);
Expand All @@ -669,6 +692,9 @@ struct GitSourceAccessor : SourceAccessor

Blob getBlob(const CanonPath & path, bool expectSymlink)
{
if (!expectSymlink && git_object_type(root.get()) == GIT_OBJECT_BLOB)
return dupObject<Blob>((git_blob *) &*root);

auto notExpected = [&]()
{
throw Error(
Expand Down Expand Up @@ -782,8 +808,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink

std::vector<PendingDir> pendingDirs;

size_t componentsToStrip = 1;

void pushBuilder(std::string name)
{
git_treebuilder * b;
Expand Down Expand Up @@ -839,9 +863,6 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
{
std::span<const std::string> pathComponents2{pathComponents};

if (pathComponents2.size() <= componentsToStrip) return false;
pathComponents2 = pathComponents2.subspan(componentsToStrip);

updateBuilders(
isDir
? pathComponents2
Expand Down Expand Up @@ -964,11 +985,27 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
git_tree_entry_filemode(entry));
}

Hash sync() override {
Hash sync() override
{
updateBuilders({});

auto [oid, _name] = popBuilder();

/* If the root directory contains a single entry that is a
directory or a non-executable regular file, return that as
the top-level object. We don't do this for executables
because they don't have a tree hash in the Git object
model. */
auto _tree = lookupObject(*repo, oid, GIT_OBJECT_TREE);
auto tree = (const git_tree *) &*_tree;

if (git_tree_entrycount(tree) == 1) {
auto entry = git_tree_entry_byindex(tree, 0);
auto mode = git_tree_entry_filemode(entry);
if (mode == GIT_FILEMODE_BLOB || mode == GIT_FILEMODE_TREE)
oid = *git_tree_entry_id(entry);
}

return toHash(oid);
}
};
Expand Down
25 changes: 25 additions & 0 deletions tests/functional/tarball.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,28 @@ path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .s
[[ $(cat "$path/a/zzz") = bar ]]
[[ $(cat "$path/c/aap") = bar ]]
[[ $(cat "$path/fnord") = bar ]]

# Test a tarball that has multiple top-level directories.
rm -rf "$TEST_ROOT/tar_root"
mkdir -p "$TEST_ROOT/tar_root" "$TEST_ROOT/tar_root/foo" "$TEST_ROOT/tar_root/bar"
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
path="$(nix flake prefetch --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
[[ -d "$path/foo" ]]
[[ -d "$path/bar" ]]

# Test a tarball that has a single non-executable regular file.
rm -rf "$TEST_ROOT/tar_root"
mkdir -p "$TEST_ROOT/tar_root"
echo bar > "$TEST_ROOT/tar_root/foo"
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
path="$(nix flake prefetch --refresh --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
[[ $(cat "$path") = bar ]]

# Test a tarball that has a single executable regular file.
rm -rf "$TEST_ROOT/tar_root"
mkdir -p "$TEST_ROOT/tar_root"
echo bar > "$TEST_ROOT/tar_root/foo"
chmod +x "$TEST_ROOT/tar_root/foo"
tar cvf "$TEST_ROOT/tar.tar" -C "$TEST_ROOT/tar_root" .
path="$(nix flake prefetch --refresh --json "tarball+file://$TEST_ROOT/tar.tar" | jq -r .storePath)"
[[ $(cat "$path/foo") = bar ]]

0 comments on commit 06b686b

Please sign in to comment.