Skip to content

Commit

Permalink
[NARS1] [estess] Improve performance of quicksort (and in turn string…
Browse files Browse the repository at this point in the history
…pool garbage collection in YottaDB) by choosing a better pivot

The primary enhancement is to stpg_sort.c. Previously, the pivot was chosen as the median of the left, right and (left+right)/2 indices in the array. This is the normally recommended pivot rule for quicksort. But it was observed to give us a pivot that ended up almost in one corner of the array (i.e. too close to left or too close to right). This in turn caused quicksort to degenerate into an O(n^2) algorithm which showed its colors when a garbage collection had to run with thousands of items. The pivot is now chosen as the median of 9 equally-spaced numbers in the array spanning [left,right] indices. And this was observed to give us a pivot that ended up almost in the midpoint of the array (45% most of the time) thus enabling quicksort to run in O(nlogn). With these changes, a garbage collection that used to take 83 seconds took 0.5 seconds.

In addition the following changes were done.

a) Enhance the stringpool to contain > 4Gi items ("stp_array_size" variable)
b) stp_expand_array.c : Expand the "stp_array" array (that holds the items for garbage collection) exponentially instead of linearly.
c) lv_getslot.c : And handle an edge case (numElems == MAXINT4) introduced in a prior commit for YottaDB#80
  • Loading branch information
nars1 committed Nov 9, 2017
1 parent 3b71315 commit 42fcf2d
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 46 deletions.
4 changes: 2 additions & 2 deletions sr_port/gbldefs.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,8 @@ GBLDEF int4 backup_close_errno,
exit_state,
restore_read_errno;
GBLDEF volatile int4 outofband, crit_count;
GBLDEF int mumps_status = SS_NORMAL,
stp_array_size;
GBLDEF int mumps_status = SS_NORMAL;
GBLDEF gtm_uint64_t stp_array_size;
GBLDEF gvzwrite_datablk *gvzwrite_block;
GBLDEF lvzwrite_datablk *lvzwrite_block;
GBLDEF io_log_name *io_root_log_name;
Expand Down
2 changes: 1 addition & 1 deletion sr_port/lv_getslot.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ lvTreeNode *lvtreenode_getslot(symval *sym)
numElems = p->numAlloc;
else
numElems = LV_NEWBLOCK_INIT_ALLOC;
lvtreenode_newblock(sym, (numElems > MAXINT4) ? MAXINT4 : (numElems * 2));
lvtreenode_newblock(sym, (numElems < MAXINT4) ? (numElems * 2) : MAXINT4);
p = sym->lvtreenode_first_block;
assert(NULL != p);
}
Expand Down
17 changes: 10 additions & 7 deletions sr_port/stp_expand_array.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
/****************************************************************
* *
* Copyright 2001, 2009 Fidelity Information Services, Inc *
* Copyright 2001, 2009 Fidelity Information Services, Inc *
* *
* Copyright (c) 2017 YottaDB LLC. and/or its subsidiaries. *
* All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
Expand All @@ -15,18 +18,18 @@
#include "stp_parms.h"
#include "longcpy.h"

GBLREF mstr **stp_array;
GBLREF int stp_array_size;
GBLREF mstr **stp_array;
GBLREF gtm_uint64_t stp_array_size;

void stp_expand_array(void)
{
mstr **a;
int n;
mstr **a;
gtm_uint64_t n;

n = stp_array_size;
stp_array_size += STP_MAXITEMS;
stp_array_size = ((MAXINT4 > n) ? (n * 2) : (n + MAXINT4));
a = stp_array;
stp_array = (mstr **) malloc(stp_array_size * SIZEOF(mstr *));
stp_array = (mstr **)malloc(stp_array_size * SIZEOF(mstr *));
longcpy((uchar_ptr_t)stp_array, (uchar_ptr_t)a, n * SIZEOF(mstr *));
free(a);
return;
Expand Down
2 changes: 1 addition & 1 deletion sr_port/stp_gcol_src.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ GBLREF int mlmax;
GBLREF int mvar_index;
GBLREF hash_table_objcode cache_table;
GBLREF unsigned char *msp, *stackbase, *stacktop, *stackwarn;
GBLREF int stp_array_size;
GBLREF gtm_uint64_t stp_array_size;
GBLREF io_log_name *io_root_log_name;
GBLREF lvzwrite_datablk *lvzwrite_block;
GBLREF mliteral literal_chain;
Expand Down
86 changes: 54 additions & 32 deletions sr_port/stpg_sort.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
/****************************************************************
* *
* Copyright 2001, 2007 Fidelity Information Services, Inc *
* Copyright 2001, 2007 Fidelity Information Services, Inc *
* *
* Copyright (c) 2017 YottaDB LLC. and/or its subsidiaries. *
* All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
Expand All @@ -10,60 +13,75 @@
****************************************************************/

#include "mdef.h"
#include "stpg_sort.h"

#define S_CUTOFF 15
#include "stpg_sort.h"

/* stpg_sort sorts an array of pointers to mstr's using the value
of each mstr's addr field as the key. The algorithm is a modified
QuickSort algorithm.
*/
#define NUM_ELEMS_FOR_MEDIAN 8
#define INSERTION_SORT_CUTOFF 15
#define MAX_QSORT_RECURSION_DEPTH 32

/* "stpg_quicksort" sorts an array of pointers to mstr's using the value of each mstr's addr field as the key.
* The algorithm used is a modified Quicksort (modification is in choice of Pivot).
*/
void stpg_sort (mstr **base, mstr **top)
{
mstr **stack[50], ***sp;
mstr **stack[2 * MAX_QSORT_RECURSION_DEPTH]; /* 2 entries are added to stack[] per recursion depth */
mstr ***sp;
mstr **median[NUM_ELEMS_FOR_MEDIAN + 1];
mstr *v, *t;
mstr **l, **r;
mstr **l, **r, **curl;
mstr **ix, **jx, **kx;
char *tval;
int cnt, cnti, cntj, max_depth_allowed, spacing;
# ifdef DEBUG
mstr ***sp_top;
# endif

sp = stack;
l = base;
r = top;
sp = stack;
DEBUG_ONLY(sp_top = stack + ARRAYSIZE(stack));
for (;;)
{
if (r - l < S_CUTOFF)
{
if (INSERTION_SORT_CUTOFF >= (r - l))
{ /* The # of elements is small enough so use Insertion sort (optimal for sorting small arrays) */
for (ix = l + 1; ix <= r; ix++)
{
for (jx = ix, t= *ix, tval = t->addr; jx > l && (*(jx - 1))->addr > tval; jx--)
{
*jx = *(jx - 1);
}
*jx = t;
}
/* Now that this subset of an array is sorted, check if there are any more array partitions that
* need sorting (stored in the "sp" stack).
*/
if (sp <= stack)
{
assert(sp == stack);
break;
}
else
} else
{
l = *--sp;
r = *--sp;
}
}
else
{
} else
{ /* Use Quicksort. Compute Pivot as median of 9 equally-spaced indices in the array [l,r] */
spacing = (int)(r - l) / NUM_ELEMS_FOR_MEDIAN;
assert(2 <= spacing);
for (curl = l, cnt = 0; cnt <= NUM_ELEMS_FOR_MEDIAN; curl += spacing)
median[cnt++] = curl;
for (cnti = 1; cnti <= NUM_ELEMS_FOR_MEDIAN; cnti++)
{
kx = median[cnti];
tval = (*kx)->addr;
for (cntj = cnti - 1; (0 <= cntj) && ((*median[cntj])->addr > tval); cntj--)
median[cntj + 1] = median[cntj];
median[cntj + 1] = kx;
}
/* Now that the median[] array is sorted, the median can be found from the array midpoint */
assert((NUM_ELEMS_FOR_MEDIAN + 1) == ARRAYSIZE(median));
kx = median[NUM_ELEMS_FOR_MEDIAN / 2]; /* This is the median and hence the pivot for quicksort */
ix = l;
jx = r;
kx = l + ((int)(r - l) / 2);
kx = ((*ix)->addr > (*jx)->addr) ?
(((*jx)->addr > (*kx)->addr) ?
jx :
(((*ix)->addr > (*kx)->addr) ? kx : ix)) :
(((*jx)->addr < (*kx)->addr) ?
jx :
(((*ix)->addr > (*kx)->addr) ? ix : kx));
v = *kx;
*kx = *jx;
*jx = v;
Expand All @@ -86,20 +104,24 @@ void stpg_sort (mstr **base, mstr **top)
*jx = *ix;
*ix = *r;
*r = t;
/* Done with partitioning the array [l,r] into 3 parts : [l,ix-1] [ix] [ix+1,r]
* where the pivot element is in [ix]. Now move on to sort the smaller of the two
* sub-arrays [l,ix-1] or [ix+1,r] in the next iteration of this for loop and
* store the other sub-array in the recursion stack "sp" for a later iteration.
*/
assert((sp + 1) < sp_top); /* Ensure no overflow in our recursion stack after below *sp++ operations */
if (ix - l > r - ix)
{
{ /* [ix+1,r] is the smaller sub-array so finish this first and store [l,ix-1] for later */
*sp++ = ix - 1;
*sp++ = l;
l = ix + 1;
}
else
{
} else
{ /* [l,ix-1] is the smaller sub-array so finish this first and store [ix+1,r] for later */
*sp++ = r;
*sp++ = ix + 1;
r = ix - 1;
}
}
}

return;
}
2 changes: 1 addition & 1 deletion sr_port/stpg_sort.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/****************************************************************
* *
* Copyright 2001 Sanchez Computer Associates, Inc. *
* Copyright 2001 Sanchez Computer Associates, Inc. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
Expand Down
7 changes: 5 additions & 2 deletions sr_port/stringpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
* Copyright (c) 2001-2017 Fidelity National Information *
* Services, Inc. and/or its subsidiaries. All rights reserved. *
* *
* Copyright (c) 2017 YottaDB LLC. and/or its subsidiaries. *
* All rights reserved. *
* *
* This source code contains the intellectual property *
* of its copyright holder(s), and is made available *
* under a license. If you do not know the terms of *
Expand Down Expand Up @@ -69,8 +72,8 @@ GBLREF boolean_t stringpool_unexpandable;

#define ADD_TO_STPARRAY(PTR, PTRARRAY, PTRARRAYCUR, PTRARRAYTOP, TYPE) \
{ \
GBLREF mstr **stp_array; \
GBLREF int stp_array_size; \
GBLREF mstr **stp_array; \
GBLREF gtm_uint64_t stp_array_size; \
\
if (NULL == PTRARRAY) \
{ \
Expand Down

0 comments on commit 42fcf2d

Please sign in to comment.