Skip to content

Commit

Permalink
Refactor string pool merging in repo_solv
Browse files Browse the repository at this point in the history
Move merging code into strpool.c where it belongs. Also clean up
hashmask handling a bit.
  • Loading branch information
mlschroe committed Jul 22, 2024
1 parent 27aa6a7 commit 8e390d8
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 90 deletions.
2 changes: 1 addition & 1 deletion src/poolid.c
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ pool_rel2id(Pool *pool, Id name, Id evr, int flags, int create)

/* extend hashtable if needed */
hashmask = pool->relhashmask;
if ((Hashval)pool->nrels * 2 > hashmask)
if ((Hashval)pool->nrels * 2 >= hashmask)
{
pool_resize_rels_hash(pool, REL_BLOCK);
hashmask = pool->relhashmask;
Expand Down
88 changes: 14 additions & 74 deletions src/repo_solv.c
Original file line number Diff line number Diff line change
Expand Up @@ -546,9 +546,7 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
int numkeys, numschemata;

Offset sizeid;
Offset *str; /* map Id -> Offset into string space */
char *strsp; /* repo string space */
char *sp; /* pointer into string space */
Id *idmap; /* map of repo Ids to pool Ids */
Id id, type;
Hashval hashmask, h, hh;
Expand Down Expand Up @@ -668,6 +666,8 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
/******* Part 1: string IDs *****************************************/

sizeid = read_u32(&data); /* size of string space */
if (sizeid >= 0xf0000000)
return pool_error(pool, SOLV_ERROR_CORRUPT, "bad string size");

/*
* read strings and Ids
Expand All @@ -682,22 +682,16 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
if (!(flags & REPO_LOCALPOOL))
{
spool = &pool->ss;
/* alloc max needed string buffer and string pointers, will shrink again later */
#if 0
spool->stringspace = solv_realloc(spool->stringspace, spool->sstrings + sizeid + 1);
spool->strings = solv_realloc2(spool->strings, spool->nstrings + numid, sizeof(Offset));
#else
spool->sstrings += sizeid + 1;
spool->nstrings += numid;
stringpool_shrink(spool); /* we misuse stringpool_shrink so that the correct BLOCK factor is used */
spool->sstrings -= sizeid + 1;
spool->nstrings -= numid;
#endif
/* reserve max needed string buffer and offsets, will shrink again later */
/* we use sizeid + 1 because we add a terminating zero */
stringpool_reserve(spool, numid, sizeid + 1);
}
else
{
data.localpool = 1;
spool = &data.spool;
/* the local pool is unlikely to get changed, so we do not use blocking and
* do not create a hash (see comment in stringpool_strn2id()) */
spool->stringspace = solv_malloc(7 + sizeid + 1);
spool->strings = solv_malloc2(numid < 2 ? 2 : numid, sizeof(Offset));
strcpy(spool->stringspace, "<NULL>");
Expand Down Expand Up @@ -761,12 +755,12 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
}
}
strsp[sizeid] = 0; /* make string space \0 terminated */
sp = strsp;

/* now merge */
str = spool->strings; /* array of offsets into strsp, indexed by Id */
if ((flags & REPO_LOCALPOOL) != 0)
{
Offset *str = spool->strings; /* array of offsets into strsp, indexed by Id */
char *sp = strsp;
/* no shared pool, thus no idmap and no unification needed */
idmap = 0;
spool->nstrings = numid < 2 ? 2 : numid; /* make sure we have at least id 0 and 1 */
Expand All @@ -790,69 +784,15 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
}
else
{
Offset oldsstrings = spool->sstrings;

/* alloc id map for name and rel Ids. this maps ids in the solv files
/* alloc id map for name and rel Ids. this maps ids in the solv file
* to the ids in our pool */
idmap = solv_calloc(numid + numrel, sizeof(Id));
stringpool_resize_hash(spool, numid);
hashtbl = spool->stringhashtbl;
hashmask = spool->stringhashmask;
#if 0
POOL_DEBUG(SOLV_DEBUG_STATS, "read %d strings\n", numid);
POOL_DEBUG(SOLV_DEBUG_STATS, "string hash buckets: %d\n", hashmask + 1);
#endif
/*
* run over strings and merge with pool.
* we could use stringpool_str2id, but this is faster.
* also populate id map (maps solv Id -> pool Id)
*/
for (i = 1; i < numid; i++)
if (!stringpool_integrate(spool, numid, sizeid, idmap))
{
if (sp >= strsp + sizeid)
{
solv_free(idmap);
spool->nstrings = oldnstrings;
spool->sstrings = oldsstrings;
stringpool_freehash(spool);
repodata_freedata(&data);
return pool_error(pool, SOLV_ERROR_OVERFLOW, "not enough strings %d %d", i, numid);
}
if (!*sp) /* empty string */
{
idmap[i] = ID_EMPTY;
sp++;
continue;
}

/* find hash slot */
h = strhash(sp) & hashmask;
hh = HASHCHAIN_START;
for (;;)
{
id = hashtbl[h];
if (!id)
break;
if (!strcmp(spool->stringspace + spool->strings[id], sp))
break; /* already in pool */
h = HASHCHAIN_NEXT(h, hh, hashmask);
}

/* length == offset to next string */
l = strlen(sp) + 1;
if (!id) /* end of hash chain -> new string */
{
id = spool->nstrings++;
hashtbl[h] = id;
str[id] = spool->sstrings; /* save offset */
if (sp != spool->stringspace + spool->sstrings)
memmove(spool->stringspace + spool->sstrings, sp, l);
spool->sstrings += l;
}
idmap[i] = id; /* repo relative -> pool relative */
sp += l; /* next string */
solv_free(idmap);
repodata_freedata(&data);
return pool_error(pool, SOLV_ERROR_OVERFLOW, "not enough strings");
}
stringpool_shrink(spool); /* vacuum */
}


Expand Down
104 changes: 90 additions & 14 deletions src/strpool.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ stringpool_clone(Stringpool *ss, Stringpool *from)
ss->sstrings = from->sstrings;
}

void
static void
stringpool_resize_hash(Stringpool *ss, int numnew)
{
Hashval h, hh, hashmask;
Expand Down Expand Up @@ -104,7 +104,7 @@ stringpool_resize_hash(Stringpool *ss, int numnew)
Id
stringpool_strn2id(Stringpool *ss, const char *str, unsigned int len, int create)
{
Hashval h, hh, hashmask, oldhashmask;
Hashval h, hh, hashmask;
Id id;
Hashtable hashtbl;

Expand All @@ -113,10 +113,17 @@ stringpool_strn2id(Stringpool *ss, const char *str, unsigned int len, int create
if (!len)
return STRID_EMPTY;

hashmask = oldhashmask = ss->stringhashmask;
hashmask = ss->stringhashmask;

/* expand hashtable if needed */
if ((Hashval)ss->nstrings * 2 > hashmask)
if ((Hashval)ss->nstrings * 2 >= hashmask)
{
/* this should be a test for a flag that tells us if the
* correct blocking is used, but adding a flag would break
* the ABI. So we use the existance of the hash area as
* indication instead */
if (!hashmask)
stringpool_reserve(ss, 1, len + 1);
stringpool_resize_hash(ss, STRING_BLOCK);
hashmask = ss->stringhashmask;
}
Expand All @@ -135,16 +142,6 @@ stringpool_strn2id(Stringpool *ss, const char *str, unsigned int len, int create
if (id || !create) /* exit here if string found */
return id;

/* this should be a test for a flag that tells us if the
* correct blocking is used, but adding a flag would break
* the ABI. So we use the existance of the hash area as
* indication instead */
if (!oldhashmask)
{
ss->stringspace = solv_extend_resize(ss->stringspace, ss->sstrings + len + 1, 1, STRINGSPACE_BLOCK);
ss->strings = solv_extend_resize(ss->strings, ss->nstrings + 1, sizeof(Offset), STRING_BLOCK);
}

/* generate next id and save in table */
id = ss->nstrings++;
hashtbl[h] = id;
Expand Down Expand Up @@ -176,3 +173,82 @@ stringpool_shrink(Stringpool *ss)
ss->stringspace = solv_extend_resize(ss->stringspace, ss->sstrings, 1, STRINGSPACE_BLOCK);
ss->strings = solv_extend_resize(ss->strings, ss->nstrings, sizeof(Offset), STRING_BLOCK);
}

void
stringpool_reserve(Stringpool *ss, int numid, Offset sizeid)
{
ss->stringspace = solv_extend_resize(ss->stringspace, ss->sstrings + sizeid, 1, STRINGSPACE_BLOCK);
ss->strings = solv_extend_resize(ss->strings, ss->nstrings + numid, sizeof(Offset), STRING_BLOCK);
}

int
stringpool_integrate(Stringpool *ss, int numid, Offset sizeid, Id *idmap)
{
int oldnstrings = ss->nstrings;
Offset oldsstrings = ss->sstrings;
Offset *str;
Id id;
int i, l;
char *strsp, *sp;
Hashval hashmask, h, hh;
Hashtable hashtbl;

stringpool_resize_hash(ss, numid);
hashtbl = ss->stringhashtbl;
hashmask = ss->stringhashmask;

/*
* run over new strings and merge with pool.
* we could use stringpool_str2id, but this is faster.
* also populate id map (maps solv Id -> pool Id)
*/
str = ss->strings;
sp = strsp = ss->stringspace + ss->sstrings;
for (i = 1; i < numid; i++)
{
if (sp >= strsp + sizeid)
{
ss->nstrings = oldnstrings;
ss->sstrings = oldsstrings;
stringpool_freehash(ss);
stringpool_shrink(ss); /* vacuum */
return 0;
}
if (!*sp) /* shortcut for empty strings */
{
idmap[i] = STRID_EMPTY;
sp++;
continue;
}

/* find hash slot */
h = strhash(sp) & hashmask;
hh = HASHCHAIN_START;
for (;;)
{
id = hashtbl[h];
if (!id)
break;
if (!strcmp(ss->stringspace + ss->strings[id], sp))
break; /* already in pool */
h = HASHCHAIN_NEXT(h, hh, hashmask);
}

/* length == offset to next string */
l = strlen(sp) + 1;
if (!id) /* end of hash chain -> new string */
{
id = ss->nstrings++;
hashtbl[h] = id;
str[id] = ss->sstrings; /* save offset */
if (sp != ss->stringspace + ss->sstrings)
memmove(ss->stringspace + ss->sstrings, sp, l);
ss->sstrings += l;
}
idmap[i] = id; /* repo relative -> pool relative */
sp += l; /* next string */
}
stringpool_shrink(ss); /* vacuum */
return 1;
}

4 changes: 3 additions & 1 deletion src/strpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,15 @@ void stringpool_init_empty(Stringpool *ss);
void stringpool_clone(Stringpool *ss, Stringpool *from);
void stringpool_free(Stringpool *ss);
void stringpool_freehash(Stringpool *ss);
void stringpool_resize_hash(Stringpool *ss, int numnew);

Id stringpool_str2id(Stringpool *ss, const char *str, int create);
Id stringpool_strn2id(Stringpool *ss, const char *str, unsigned int len, int create);

void stringpool_shrink(Stringpool *ss);

void stringpool_reserve(Stringpool *ss, int numid, Offset sizeid);
int stringpool_integrate(Stringpool *ss, int numid, Offset sizeid, Id *idmap);


static inline const char *
stringpool_id2str(Stringpool *ss, Id id)
Expand Down

0 comments on commit 8e390d8

Please sign in to comment.