Skip to content

Commit

Permalink
Finish separating concerns with tarball cache
Browse files Browse the repository at this point in the history
There is no longer an `importTarball` method. Instead, there is a
`unpackTarfileToSink` function (back in libutil). The caller can use
thisw with the `getParseSink` method we added in the last commit easily
enough.

In addition, tarball cache functionality is separated from `git-utils`
and moved into `tarball-cache`. This ensures we are separating mechanism
and policy.
  • Loading branch information
Ericson2314 committed Jan 3, 2024
1 parent 8505346 commit 6854550
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 99 deletions.
86 changes: 0 additions & 86 deletions src/libfetchers/git-utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,6 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
return std::nullopt;
}

TarballInfo importTarball(Source & source) override;

std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) override;

std::string resolveSubmoduleUrl(
Expand Down Expand Up @@ -847,88 +845,4 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
return result;
}

ref<GitRepo> getTarballCache()
{
static CanonPath repoDir(getCacheDir() + "/nix/tarball-cache");

return make_ref<GitRepoImpl>(repoDir, true, true);
}

}

#include "tarfile.hh"
#include <archive_entry.h>

namespace nix {

GitRepo::TarballInfo GitRepoImpl::importTarball(Source & source)
{
TarArchive archive { source };

auto parseSink = getParseSink();

time_t lastModified = 0;

for (;;) {
// FIXME: merge with extract_archive
struct archive_entry * entry;
int r = archive_read_next_header(archive.archive, &entry);
if (r == ARCHIVE_EOF) break;
auto path = archive_entry_pathname(entry);
if (!path)
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
if (r == ARCHIVE_WARN)
warn(archive_error_string(archive.archive));
else
archive.check(r);

lastModified = std::max(lastModified, archive_entry_mtime(entry));

switch (archive_entry_filetype(entry)) {

case AE_IFDIR:
parseSink->createDirectory(path);
break;

case AE_IFREG: {
parseSink->createRegularFile(path, [&](auto & crf) {
if (archive_entry_mode(entry) & S_IXUSR)
crf.isExecutable();

while (true) {
std::vector<unsigned char> buf(128 * 1024);
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
if (n < 0)
throw Error("cannot read file '%s' from tarball", path);
if (n == 0) break;
crf(std::string_view {
(const char *) buf.data(),
(size_t) n,
});
}
});

break;
}

case AE_IFLNK: {
auto target = archive_entry_symlink(entry);

parseSink->createSymlink(path, target);

break;
}

default:
throw Error("file '%s' in tarball has unsupported file type", path);
}
}

return TarballInfo {
.treeHash = parseSink->sync(),
.lastModified = lastModified
};
}


}
10 changes: 0 additions & 10 deletions src/libfetchers/git-utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,6 @@ struct GitRepo
const std::string & url,
const std::string & base) = 0;

struct TarballInfo
{
Hash treeHash;
time_t lastModified;
};

virtual bool hasObject(const Hash & oid) = 0;

virtual ref<InputAccessor> getAccessor(const Hash & rev) = 0;
Expand All @@ -102,10 +96,6 @@ struct GitRepo
* serialisation. This is memoised on-disk.
*/
virtual Hash treeHashToNarHash(const Hash & treeHash) = 0;

virtual TarballInfo importTarball(Source & source) = 0;
};

ref<GitRepo> getTarballCache();

}
15 changes: 12 additions & 3 deletions src/libfetchers/github.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
#include "fetchers.hh"
#include "fetch-settings.hh"
#include "tarball.hh"
#include "tarfile.hh"
#include "git-utils.hh"
#include "tarball-cache.hh"

#include <optional>
#include <nlohmann/json.hpp>
Expand Down Expand Up @@ -191,7 +193,7 @@ struct GitArchiveInputScheme : InputScheme

virtual DownloadUrl getDownloadUrl(const Input & input) const = 0;

std::pair<Input, GitRepo::TarballInfo> downloadArchive(ref<Store> store, Input input) const
std::pair<Input, TarballInfo> downloadArchive(ref<Store> store, Input input) const
{
if (!maybeGetStrAttr(input.attrs, "ref")) input.attrs.insert_or_assign("ref", "HEAD");

Expand All @@ -218,7 +220,7 @@ struct GitArchiveInputScheme : InputScheme
auto treeHash = getRevAttr(*treeHashAttrs, "treeHash");
auto lastModified = getIntAttr(*lastModifiedAttrs, "lastModified");
if (getTarballCache()->hasObject(treeHash))
return {std::move(input), GitRepo::TarballInfo { .treeHash = treeHash, .lastModified = (time_t) lastModified }};
return {std::move(input), TarballInfo { .treeHash = treeHash, .lastModified = (time_t) lastModified }};
else
debug("Git tree with hash '%s' has disappeared from the cache, refetching...", treeHash.gitRev());
}
Expand All @@ -233,7 +235,14 @@ struct GitArchiveInputScheme : InputScheme
getFileTransfer()->download(std::move(req), sink);
});

auto tarballInfo = getTarballCache()->importTarball(*source);
TarArchive archive { *source };
auto parseSink = getTarballCache()->getParseSink();
auto lastModified = unpackTarfileToSink(archive, *parseSink);

TarballInfo tarballInfo {
.treeHash = parseSink->sync(),
.lastModified = lastModified
};

cache->upsert(treeHashKey, Attrs{{"treeHash", tarballInfo.treeHash.gitRev()}});
cache->upsert(lastModifiedKey, Attrs{{"lastModified", (uint64_t) tarballInfo.lastModified}});
Expand Down
13 changes: 13 additions & 0 deletions src/libfetchers/tarball-cache.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "tarball-cache.hh"
#include "users.hh"

namespace nix::fetchers {

ref<GitRepo> getTarballCache()
{
static CanonPath repoDir(getCacheDir() + "/nix/tarball-cache");

return GitRepo::openRepo(repoDir, true, true);
}

}
17 changes: 17 additions & 0 deletions src/libfetchers/tarball-cache.hh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#pragma once
///@file

#include "ref.hh"
#include "git-utils.hh"

namespace nix::fetchers {

struct TarballInfo
{
Hash treeHash;
time_t lastModified;
};

ref<GitRepo> getTarballCache();

}
62 changes: 62 additions & 0 deletions src/libutil/tarfile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,66 @@ void unpackTarfile(const Path & tarFile, const Path & destDir)
extract_archive(archive, destDir);
}

time_t unpackTarfileToSink(TarArchive & archive, ParseSink & parseSink)
{
time_t lastModified = 0;

for (;;) {
// FIXME: merge with extract_archive
struct archive_entry * entry;
int r = archive_read_next_header(archive.archive, &entry);
if (r == ARCHIVE_EOF) break;
auto path = archive_entry_pathname(entry);
if (!path)
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
if (r == ARCHIVE_WARN)
warn(archive_error_string(archive.archive));
else
archive.check(r);

lastModified = std::max(lastModified, archive_entry_mtime(entry));

switch (archive_entry_filetype(entry)) {

case AE_IFDIR:
parseSink.createDirectory(path);
break;

case AE_IFREG: {
parseSink.createRegularFile(path, [&](auto & crf) {
if (archive_entry_mode(entry) & S_IXUSR)
crf.isExecutable();

while (true) {
std::vector<unsigned char> buf(128 * 1024);
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
if (n < 0)
throw Error("cannot read file '%s' from tarball", path);
if (n == 0) break;
crf(std::string_view {
(const char *) buf.data(),
(size_t) n,
});
}
});

break;
}

case AE_IFLNK: {
auto target = archive_entry_symlink(entry);

parseSink.createSymlink(path, target);

break;
}

default:
throw Error("file '%s' in tarball has unsupported file type", path);
}
}

return lastModified;
}

}
3 changes: 3 additions & 0 deletions src/libutil/tarfile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
///@file

#include "serialise.hh"
#include "fs-sink.hh"
#include <archive.h>

namespace nix {
Expand Down Expand Up @@ -29,4 +30,6 @@ void unpackTarfile(Source & source, const Path & destDir);

void unpackTarfile(const Path & tarFile, const Path & destDir);

time_t unpackTarfileToSink(TarArchive & archive, ParseSink & parseSink);

}

0 comments on commit 6854550

Please sign in to comment.