Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add zim files in the "new" format as testing data. #535

Merged
merged 14 commits into from
Apr 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts/download_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import tarfile
import sys

TEST_DATA_VERSION = "0.1"
TEST_DATA_VERSION = "0.2"
ARCHIVE_URL_TEMPL = "https://github.com/openzim/zim-testing-suite/releases/download/v{version}/zim-testing-suite-{version}.tar.gz"

if __name__ == "__main__":
Expand Down
9 changes: 8 additions & 1 deletion src/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,14 @@ namespace zim

Archive::EntryRange<EntryOrder::titleOrder> Archive::iterByTitle() const
{
return EntryRange<EntryOrder::titleOrder>(m_impl, m_impl->getStartUserEntry().v, m_impl->getEndUserEntry().v);
if (m_impl->hasFrontArticlesIndex()) {
// We have a front articles index. We can "simply" loop over all front entries.
return EntryRange<EntryOrder::titleOrder>(m_impl, 0, m_impl->getFrontEntryCount().v);
} else {
// We don't have an index listing only front entry. We have to loop over user entry.
// (`C` namespace in new zim scheme, all namespace in old ones)
return EntryRange<EntryOrder::titleOrder>(m_impl, m_impl->getStartUserEntry().v, m_impl->getEndUserEntry().v);
}
}

Archive::EntryRange<EntryOrder::efficientOrder> Archive::iterEfficient() const
Expand Down
2 changes: 2 additions & 0 deletions src/fileimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ makeFileReader(std::shared_ptr<const FileCompound> zimFile, offset_t offset, zsi
direntReader(new DirentReader(zimReader)),
clusterCache(envValue("ZIM_CLUSTERCACHE", CLUSTER_CACHE_SIZE)),
m_newNamespaceScheme(false),
m_hasFrontArticlesIndex(true),
m_startUserEntry(0),
m_endUserEntry(0)
{
Expand Down Expand Up @@ -150,6 +151,7 @@ makeFileReader(std::shared_ptr<const FileCompound> zimFile, offset_t offset, zsi
offset_t titleOffset(header.getTitleIdxPos());
zsize_t titleSize(sizeof(entry_index_type)*header.getArticleCount());
mp_titleDirentAccessor = getTitleAccessor(titleOffset, titleSize, "Title index table");
const_cast<bool&>(m_hasFrontArticlesIndex) = false;
}

readMimeTypes();
Expand Down
2 changes: 2 additions & 0 deletions src/fileimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ namespace zim
ConcurrentCache<cluster_index_type, ClusterHandle> clusterCache;

const bool m_newNamespaceScheme;
const bool m_hasFrontArticlesIndex;
const entry_index_t m_startUserEntry;
const entry_index_t m_endUserEntry;

Expand Down Expand Up @@ -89,6 +90,7 @@ namespace zim
const Fileheader& getFileheader() const { return header; }
zsize_t getFilesize() const;
bool hasNewNamespaceScheme() const { return m_newNamespaceScheme; }
bool hasFrontArticlesIndex() const { return m_hasFrontArticlesIndex; }

FileCompound::PartRange getFileParts(offset_t offset, zsize_t size);
std::shared_ptr<const Dirent> getDirent(entry_index_t idx);
Expand Down
Loading