Replace the mp_to_decimal macro with a function...

that chooses a new internal function that uses Barrett reduction to speed up stringifying large integers to base 10 if it's available and the number is above a cutoff size, otherwise it just falls back to mp_to_radix.
libtom · Sep 7, 2019 · 3ee4c5e · 3ee4c5e
1 parent c63799c
commit 3ee4c5e
Show file tree

Hide file tree

Showing 13 changed files with 385 additions and 36 deletions.
diff --git a/bn_mp_to_decimal.c b/bn_mp_to_decimal.c
@@ -0,0 +1,23 @@
+#include "tommath_private.h"
+#ifdef BN_MP_TO_DECIMAL_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+/* stores a bignum as a decimal ASCII string, using Barrett
+ * reduction if available.
+ */
+
+mp_err mp_to_decimal(const mp_int *a, char *str, size_t maxlen)
+{
+   mp_err err;
+
+   if (MP_HAS(S_MP_TO_DECIMAL_FAST) && (a->used > 10)) {
+      err = s_mp_to_decimal_fast(a, str, maxlen);
+   } else {
+      err = mp_to_radix(a, str, maxlen, 10);
+   }
+
+   return err;
+}
+
+#endif
diff --git a/bn_s_mp_to_decimal_fast.c b/bn_s_mp_to_decimal_fast.c
@@ -0,0 +1,234 @@
+#include "tommath_private.h"
+#include <string.h>
+#ifdef BN_S_MP_TO_DECIMAL_FAST_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+/* store a bignum as a decimal ASCII string */
+mp_err s_mp_to_decimal_fast_rec(const mp_int *number, mp_int *nL, mp_int *shiftL, mp_int *mL, int precalc_array_index,
+                                int left,
+                                char **result,
+                                size_t *maxlen)
+{
+   mp_int q, nLq, r;
+   mp_err err;
+
+   if (precalc_array_index < 0) {
+      int n = mp_get_i32(number), n2 = n, t = 0, c;
+      char *i = *result;
+      char s[4] = "000";
+
+      while (n) {
+         s[2 - t] = mp_s_rmap[n % 10];
+         t++;
+         n /= 10;
+      }
+
+      if (!left && n2 < 100) {
+         t++;
+         if (n2 < 10) {
+            t++;
+         }
+         if (n2 == 0) {
+            t++;
+         }
+      }
+
+      if (*maxlen < (size_t)t || (*maxlen -= (size_t)t) < 1) {
+         /* no more room */
+         return MP_VAL;
+      }
+
+      for (c = 0; c < t; c++) {
+         i[c] = s[3 - t + c];
+      }
+
+      *result += t;
+
+      return MP_OKAY;
+   }
+
+   if ((err = mp_init_multi(&q, &nLq, &r, NULL)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+   if ((err = mp_mul(number, &mL[precalc_array_index], &q)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+   if ((err = mp_div_2d(&q, mp_get_i32(&shiftL[precalc_array_index]), &q, NULL)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+
+   if ((err = mp_mul(&nL[precalc_array_index], &q, &nLq)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+
+   if ((err = mp_sub(number, &nLq, &r)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+
+   if (mp_isneg(&r)) {
+      if ((err = mp_sub_d(&q, 1, &q)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+      if ((err = mp_add(&r, &nL[precalc_array_index], &r)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+   }
+
+   --precalc_array_index;
+   if (left && mp_iszero(&q)) {
+      if ((err = s_mp_to_decimal_fast_rec(&r, nL, shiftL, mL, precalc_array_index, 1, result, maxlen)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+   } else {
+      if ((err = s_mp_to_decimal_fast_rec(&q, nL, shiftL, mL, precalc_array_index, left, result, maxlen)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+      if ((err = s_mp_to_decimal_fast_rec(&r, nL, shiftL, mL, precalc_array_index, 0, result, maxlen)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+   }
+
+   err = MP_OKAY;
+
+LBL_ERR:
+   mp_clear_multi(&q, &nLq, &r, NULL);
+   return err;
+}
+
+mp_err s_mp_to_decimal_fast(const mp_int *a, char *result, size_t maxlen)
+{
+   mp_int number, n, shift, M, M2, M22, M4, M44;
+   mp_int nL[20], shiftL[20], mL[20];
+   mp_err err;
+   char **result_addr = &result;
+   int precalc_array_index = 1, c;
+
+   if ((err = mp_init_multi(&n, &M, &M2, &M22, &M4, &M44, &mL[0], NULL)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+
+   if ((err = mp_init_copy(&number, a)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+   if (mp_isneg(&number)) {
+      if ((err = mp_neg(&number, &number)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+      result[0] = '-';
+      *result_addr += 1;
+      maxlen -= 1;
+   }
+   mp_set_u32(&n, 1000);
+
+   if ((err = mp_init_copy(&nL[0], &n)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+
+   if ((err = mp_init_set(&shift, (mp_digit)20)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+
+   if ((err = mp_init_copy(&shiftL[0], &shift)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+
+   /* (8 * 2**$shift) / $n rounded up */
+   mp_set_u32(&M, 8389);
+
+   /* $M / 8, rounded up */
+   mp_set_u32(&mL[0], 1049);
+
+   while (1) {
+      if ((err = mp_sqr(&n, &n)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+      if (mp_cmp(&n, &number) == MP_GT) {
+         break;
+      }
+
+      if ((err = mp_mul_2(&shift, &shift)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+
+      /* The following is a Newton-Raphson step, to restore the invariant
+       * that $M is (8 * 2**$shift) / $n, rounded up. */
+      {
+         if ((err = mp_sqr(&M, &M2)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_sqr(&M2, &M4)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+
+         if ((err = mp_mul(&M4, &n, &M4)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_div_2d(&M4, mp_get_i32(&shift) + 6, &M4, NULL)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_mul_2(&M2, &M2)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_sub(&M4, &M2, &M4)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_add_d(&M4, 1, &M4)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_div_2d(&M4, 3, &M4, NULL)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_sub_d(&M4, 1, &M4)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_neg(&M4, &M)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+      }
+
+      if ((err = mp_init_copy(&nL[precalc_array_index], &n)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+      if ((err = mp_init_copy(&shiftL[precalc_array_index], &shift)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+
+      /* Divide by 8, round up */
+      {
+         if ((err = mp_add_d(&M4, 1, &M4)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_div_2d(&M4, 3, &M4, NULL)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_sub_d(&M4, 1, &M4)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+         if ((err = mp_neg(&M4, &M4)) != MP_OKAY) {
+            goto LBL_ERR;
+         }
+      }
+      if ((err = mp_init_copy(&mL[precalc_array_index], &M4)) != MP_OKAY) {
+         goto LBL_ERR;
+      }
+      precalc_array_index++;
+   }
+
+   if ((err = s_mp_to_decimal_fast_rec(&number, nL, shiftL, mL, precalc_array_index - 1, 1, result_addr,
+                                       &maxlen)) != MP_OKAY) {
+      goto LBL_ERR;
+   }
+   *result_addr[0] = '\0';
+
+   err = MP_OKAY;
+
+LBL_ERR:
+   mp_clear_multi(&number, &n, &shift, &M, &M2, &M22, &M4, &M44, NULL);
+   for (c = 0; c < precalc_array_index; c++) {
+      mp_clear_multi(&nL[c], &shiftL[c], &mL[c], NULL);
+   }
+   return err;
+}
+
+#endif
diff --git a/demo/test.c b/demo/test.c
@@ -2214,6 +2214,54 @@ static int test_s_mp_toom_sqr(void)
    return EXIT_FAILURE;
 }
 
+static int test_mp_to_decimal(void)
+{
+   mp_int a, b;
+   int size, err, strlength;
+   char *str;
+
+   if ((err = mp_init_multi(&a, &b, NULL)) != MP_OKAY) {
+      goto LTM_ERR;
+   }
+   for (size = 1; size < 1000; size += 10) {
+      int times;
+      printf("Testing mp_to_decimal: %5d bits    \r", size);
+      fflush(stdout);
+      for (times = 0; times < 5; times++) {
+         if ((err = mp_rand(&a, size)) != MP_OKAY) {
+            goto LTM_ERR;
+         }
+         if (times % 2) {
+            /* also test some negative numbers */
+            if ((err = mp_neg(&a, &a)) != MP_OKAY) {
+               goto LTM_ERR;
+            }
+         }
+         if ((err = mp_radix_size(&a, 10, &strlength)) != MP_OKAY) {
+            goto LTM_ERR;
+         }
+         str = (char *)malloc((size_t)strlength);
+         if ((err = mp_to_decimal(&a, str, (size_t)strlength)) != MP_OKAY) {
+            goto LTM_ERR;
+         }
+         if ((err = mp_read_radix(&b, str, 10)) != MP_OKAY) {
+            goto LTM_ERR;
+         }
+         free(str);
+         if (mp_cmp(&a, &b) != MP_EQ) {
+            fprintf(stderr, "s_mp_to_decimal_fast failed at size %d\n", size);
+            goto LTM_ERR;
+         }
+      }
+   }
+
+   mp_clear_multi(&a, &b, NULL);
+   return EXIT_SUCCESS;
+LTM_ERR:
+   mp_clear_multi(&a, &b, NULL);
+   return EXIT_FAILURE;
+}
+
 int unit_tests(int argc, char **argv)
 {
    static const struct {
@@ -2264,8 +2312,10 @@ int unit_tests(int argc, char **argv)
       T1(s_mp_karatsuba_sqr, S_MP_KARATSUBA_SQR),
       T1(s_mp_toom_mul, S_MP_TOOM_MUL),
       T1(s_mp_toom_sqr, S_MP_TOOM_SQR),
+      T1(mp_to_decimal, S_MP_TO_DECIMAL_FAST)
 #undef T2
 #undef T1
+#undef T
    };
    unsigned long i, ok, fail, nop;
    uint64_t t;

diff --git a/libtommath_VS2008.vcproj b/libtommath_VS2008.vcproj
@@ -816,6 +816,10 @@
 			RelativePath="bn_mp_submod.c"
 			>
 		</File>
+		<File
+			RelativePath="bn_mp_to_decimal.c"
+			>
+		</File>
 		<File
 			RelativePath="bn_mp_to_radix.c"
 			>
@@ -936,6 +940,10 @@
 			RelativePath="bn_s_mp_sub.c"
 			>
 		</File>
+		<File
+			RelativePath="bn_s_mp_to_decimal_fast.c"
+			>
+		</File>
 		<File
 			RelativePath="bn_s_mp_toom_mul.c"
 			>

diff --git a/makefile b/makefile
@@ -48,13 +48,14 @@ bn_mp_reduce_is_2k.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_setup.o bn_mp_root_u32.
 bn_mp_set.o bn_mp_set_double.o bn_mp_set_i32.o bn_mp_set_i64.o bn_mp_set_l.o bn_mp_set_ll.o \
 bn_mp_set_u32.o bn_mp_set_u64.o bn_mp_set_ul.o bn_mp_set_ull.o bn_mp_shrink.o bn_mp_signed_bin_size.o \
 bn_mp_signed_rsh.o bn_mp_sqr.o bn_mp_sqrmod.o bn_mp_sqrt.o bn_mp_sqrtmod_prime.o bn_mp_sub.o bn_mp_sub_d.o \
-bn_mp_submod.o bn_mp_to_radix.o bn_mp_to_signed_bin.o bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin.o \
-bn_mp_to_unsigned_bin_n.o bn_mp_unsigned_bin_size.o bn_mp_xor.o bn_mp_zero.o bn_prime_tab.o bn_s_mp_add.o \
-bn_s_mp_balance_mul.o bn_s_mp_exptmod.o bn_s_mp_exptmod_fast.o bn_s_mp_get_bit.o bn_s_mp_invmod_fast.o \
-bn_s_mp_invmod_slow.o bn_s_mp_karatsuba_mul.o bn_s_mp_karatsuba_sqr.o bn_s_mp_montgomery_reduce_fast.o \
-bn_s_mp_mul_digs.o bn_s_mp_mul_digs_fast.o bn_s_mp_mul_high_digs.o bn_s_mp_mul_high_digs_fast.o \
-bn_s_mp_prime_is_divisible.o bn_s_mp_rand_jenkins.o bn_s_mp_rand_platform.o bn_s_mp_reverse.o \
-bn_s_mp_sqr.o bn_s_mp_sqr_fast.o bn_s_mp_sub.o bn_s_mp_toom_mul.o bn_s_mp_toom_sqr.o
+bn_mp_submod.o bn_mp_to_decimal.o bn_mp_to_radix.o bn_mp_to_signed_bin.o bn_mp_to_signed_bin_n.o \
+bn_mp_to_unsigned_bin.o bn_mp_to_unsigned_bin_n.o bn_mp_unsigned_bin_size.o bn_mp_xor.o bn_mp_zero.o \
+bn_prime_tab.o bn_s_mp_add.o bn_s_mp_balance_mul.o bn_s_mp_exptmod.o bn_s_mp_exptmod_fast.o \
+bn_s_mp_get_bit.o bn_s_mp_invmod_fast.o bn_s_mp_invmod_slow.o bn_s_mp_karatsuba_mul.o \
+bn_s_mp_karatsuba_sqr.o bn_s_mp_montgomery_reduce_fast.o bn_s_mp_mul_digs.o bn_s_mp_mul_digs_fast.o \
+bn_s_mp_mul_high_digs.o bn_s_mp_mul_high_digs_fast.o bn_s_mp_prime_is_divisible.o \
+bn_s_mp_rand_jenkins.o bn_s_mp_rand_platform.o bn_s_mp_reverse.o bn_s_mp_sqr.o bn_s_mp_sqr_fast.o \
+bn_s_mp_sub.o bn_s_mp_to_decimal_fast.o bn_s_mp_toom_mul.o bn_s_mp_toom_sqr.o
 
 #END_INS
 

diff --git a/makefile.mingw b/makefile.mingw
@@ -51,13 +51,14 @@ bn_mp_reduce_is_2k.o bn_mp_reduce_is_2k_l.o bn_mp_reduce_setup.o bn_mp_root_u32.
 bn_mp_set.o bn_mp_set_double.o bn_mp_set_i32.o bn_mp_set_i64.o bn_mp_set_l.o bn_mp_set_ll.o \
 bn_mp_set_u32.o bn_mp_set_u64.o bn_mp_set_ul.o bn_mp_set_ull.o bn_mp_shrink.o bn_mp_signed_bin_size.o \
 bn_mp_signed_rsh.o bn_mp_sqr.o bn_mp_sqrmod.o bn_mp_sqrt.o bn_mp_sqrtmod_prime.o bn_mp_sub.o bn_mp_sub_d.o \
-bn_mp_submod.o bn_mp_to_radix.o bn_mp_to_signed_bin.o bn_mp_to_signed_bin_n.o bn_mp_to_unsigned_bin.o \
-bn_mp_to_unsigned_bin_n.o bn_mp_unsigned_bin_size.o bn_mp_xor.o bn_mp_zero.o bn_prime_tab.o bn_s_mp_add.o \
-bn_s_mp_balance_mul.o bn_s_mp_exptmod.o bn_s_mp_exptmod_fast.o bn_s_mp_get_bit.o bn_s_mp_invmod_fast.o \
-bn_s_mp_invmod_slow.o bn_s_mp_karatsuba_mul.o bn_s_mp_karatsuba_sqr.o bn_s_mp_montgomery_reduce_fast.o \
-bn_s_mp_mul_digs.o bn_s_mp_mul_digs_fast.o bn_s_mp_mul_high_digs.o bn_s_mp_mul_high_digs_fast.o \
-bn_s_mp_prime_is_divisible.o bn_s_mp_rand_jenkins.o bn_s_mp_rand_platform.o bn_s_mp_reverse.o \
-bn_s_mp_sqr.o bn_s_mp_sqr_fast.o bn_s_mp_sub.o bn_s_mp_toom_mul.o bn_s_mp_toom_sqr.o
+bn_mp_submod.o bn_mp_to_decimal.o bn_mp_to_radix.o bn_mp_to_signed_bin.o bn_mp_to_signed_bin_n.o \
+bn_mp_to_unsigned_bin.o bn_mp_to_unsigned_bin_n.o bn_mp_unsigned_bin_size.o bn_mp_xor.o bn_mp_zero.o \
+bn_prime_tab.o bn_s_mp_add.o bn_s_mp_balance_mul.o bn_s_mp_exptmod.o bn_s_mp_exptmod_fast.o \
+bn_s_mp_get_bit.o bn_s_mp_invmod_fast.o bn_s_mp_invmod_slow.o bn_s_mp_karatsuba_mul.o \
+bn_s_mp_karatsuba_sqr.o bn_s_mp_montgomery_reduce_fast.o bn_s_mp_mul_digs.o bn_s_mp_mul_digs_fast.o \
+bn_s_mp_mul_high_digs.o bn_s_mp_mul_high_digs_fast.o bn_s_mp_prime_is_divisible.o \
+bn_s_mp_rand_jenkins.o bn_s_mp_rand_platform.o bn_s_mp_reverse.o bn_s_mp_sqr.o bn_s_mp_sqr_fast.o \
+bn_s_mp_sub.o bn_s_mp_to_decimal_fast.o bn_s_mp_toom_mul.o bn_s_mp_toom_sqr.o
 
 HEADERS_PUB=tommath.h
 HEADERS=tommath_private.h tommath_class.h tommath_superclass.h $(HEADERS_PUB)