Skip to content

Commit

Permalink
Allow user to specify memory limit for dictionary training
Browse files Browse the repository at this point in the history
  • Loading branch information
embg committed Dec 14, 2021
1 parent 57383d2 commit 5cd8292
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 54 deletions.
7 changes: 6 additions & 1 deletion programs/dibio.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ static fileStats DiB_fileStats(const char** fileNamesTable, int nbFiles, size_t
int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
const char** fileNamesTable, int nbFiles, size_t chunkSize,
ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams,
ZDICT_fastCover_params_t* fastCoverParams, int optimize)
ZDICT_fastCover_params_t* fastCoverParams, int optimize, unsigned memLimit)
{
fileStats fs;
size_t* sampleSizes; /* vector of sample sizes. Each sample can be up to SAMPLESIZE_MAX */
Expand Down Expand Up @@ -341,6 +341,11 @@ int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
/* Limit the size of the training data to 2GB */
/* TODO: there is opportunity to stop DiB_fileStats() early when the data limit is reached */
loadedSize = (size_t)MIN( MIN((S64)maxMem, fs.totalSizeToLoad), MAX_SAMPLES_SIZE );
if (memLimit != 0) {
DISPLAYLEVEL(2, "! Warning : setting manual memory limit for dictionary training data at %u MB \n",
(unsigned)(memLimit / (1 MB)));
loadedSize = (size_t)MIN(loadedSize, memLimit);
}
srcBuffer = malloc(loadedSize+NOISELENGTH);
sampleSizes = (size_t*)malloc(fs.nbSamples * sizeof(size_t));
}
Expand Down
2 changes: 1 addition & 1 deletion programs/dibio.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,6 @@
int DiB_trainFromFiles(const char* dictFileName, size_t maxDictSize,
const char** fileNamesTable, int nbFiles, size_t chunkSize,
ZDICT_legacy_params_t* params, ZDICT_cover_params_t* coverParams,
ZDICT_fastCover_params_t* fastCoverParams, int optimize);
ZDICT_fastCover_params_t* fastCoverParams, int optimize, unsigned memLimit);

#endif
Loading

0 comments on commit 5cd8292

Please sign in to comment.