simplifications: add s_mp_zero_(digs|buf) and s_mp_copy_digs

minad · minad · commit 795cd2013ff9 · 2019-11-04T15:41:32.000+01:00
Originally I made those as macros. However we have many
other small functions like mp_clamp, mp_exch which are also not implemented
as macros right now.

If we would use c99, I would implement them as private static inline
functions. And mp_exch would be a public static inline function.

But since we are bound to c89, we simply use normal functions.
To achieve optimal performance one should either use link time
optimization or amalgamation.
diff --git a/.travis.yml b/.travis.yml
@@ -144,6 +144,7 @@ matrix:
     # clang for x86-64 architecture (64-bit longs and 64-bit pointers)
     - env: SANITIZER=1 CONV_WARNINGS=relaxed BUILDOPTIONS='--with-cc=clang-7 --with-m64 --with-travis-valgrind'
     - env: SANITIZER=1 CONV_WARNINGS=strict BUILDOPTIONS='--with-cc=clang-7 --with-m64 --with-travis-valgrind'
+    - env: SANITIZER=1 CONV_WARNINGS=strict BUILDOPTIONS='--with-cc=clang-7 --cflags=-DMP_USE_MEMOPS --with-m64 --with-travis-valgrind'
     - env: SANITIZER=1 CONV_WARNINGS=strict BUILDOPTIONS='--with-cc=clang-7 --c89 --with-m64 --with-travis-valgrind'
     - env: SANITIZER=1 BUILDOPTIONS='--with-cc=clang-7 --with-m64 --with-travis-valgrind --cflags=-DMP_PREC=MP_MIN_PREC'
     - env: SANITIZER=1 BUILDOPTIONS='--with-cc=clang-6.0 --with-m64 --with-travis-valgrind'
diff --git a/etc/tune.c b/etc/tune.c
@@ -292,7 +292,7 @@ int main(int argc, char **argv)
    s_number_of_test_loops = 64;
    s_stabilization_extra = 3;
 
-   MP_ZERO_BUFFER(&args, sizeof(args));
+   s_mp_zero_buf(&args, sizeof(args));
 
    args.testmode = 0;
    args.verbose = 0;
diff --git a/mp_add_d.c b/mp_add_d.c
@@ -80,7 +80,7 @@ mp_err mp_add_d(const mp_int *a, mp_digit b, mp_int *c)
    c->sign = MP_ZPOS;
 
    /* now zero to oldused */
-   MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);
+   s_mp_zero_digs(c->dp + c->used, oldused - c->used);
    mp_clamp(c);
 
    return MP_OKAY;
diff --git a/mp_clear.c b/mp_clear.c
@@ -9,7 +9,7 @@ void mp_clear(mp_int *a)
    /* only do anything if a hasn't been freed previously */
    if (a->dp != NULL) {
       /* free ram */
-      MP_FREE_DIGITS(a->dp, a->alloc);
+      MP_FREE_DIGS(a->dp, a->alloc);
 
       /* reset members to make debugging easier */
       a->dp    = NULL;
diff --git a/mp_copy.c b/mp_copy.c
@@ -6,8 +6,6 @@
 /* copy, b = a */
 mp_err mp_copy(const mp_int *a, mp_int *b)
 {
-   int n;
-
    /* if dst == src do nothing */
    if (a == b) {
       return MP_OKAY;
@@ -21,19 +19,12 @@ mp_err mp_copy(const mp_int *a, mp_int *b)
       }
    }
 
-   /* zero b and copy the parameters over */
-
-   /* copy all the digits */
-   for (n = 0; n < a->used; n++) {
-      b->dp[n] = a->dp[n];
-   }
-
-   /* clear high digits */
-   MP_ZERO_DIGITS(b->dp + a->used, b->used - a->used);
-
-   /* copy used count and sign */
+   /* copy everything over and zero high digits */
+   s_mp_copy_digs(b->dp, a->dp, a->used);
+   s_mp_zero_digs(b->dp + a->used, b->used - a->used);
    b->used = a->used;
    b->sign = a->sign;
+
    return MP_OKAY;
 }
 #endif
diff --git a/mp_div_2.c b/mp_div_2.c
@@ -33,7 +33,7 @@ mp_err mp_div_2(const mp_int *a, mp_int *b)
    }
 
    /* zero excess digits */
-   MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used);
+   s_mp_zero_digs(b->dp + b->used, oldused - b->used);
 
    b->sign = a->sign;
    mp_clamp(b);
diff --git a/mp_dr_reduce.c b/mp_dr_reduce.c
@@ -49,7 +49,7 @@ mp_err mp_dr_reduce(mp_int *x, const mp_int *n, mp_digit k)
       x->dp[i] = mu;
 
       /* zero words above m */
-      MP_ZERO_DIGITS(x->dp + m + 1, (x->used - m) - 1);
+      s_mp_zero_digs(x->dp + m + 1, (x->used - m) - 1);
 
       /* clamp, sub and return */
       mp_clamp(x);
diff --git a/mp_fwrite.c b/mp_fwrite.c
@@ -25,7 +25,7 @@ mp_err mp_fwrite(const mp_int *a, int radix, FILE *stream)
       }
    }
 
-   MP_FREE_BUFFER(buf, size);
+   MP_FREE_BUF(buf, size);
    return err;
 }
 #endif
diff --git a/mp_grow.c b/mp_grow.c
@@ -26,7 +26,7 @@ mp_err mp_grow(mp_int *a, int size)
       a->dp = dp;
 
       /* zero excess digits */
-      MP_ZERO_DIGITS(a->dp + a->alloc, size - a->alloc);
+      s_mp_zero_digs(a->dp + a->alloc, size - a->alloc);
       a->alloc = size;
    }
    return MP_OKAY;
diff --git a/mp_lshd.c b/mp_lshd.c
@@ -37,7 +37,7 @@ mp_err mp_lshd(mp_int *a, int b)
    }
 
    /* zero the lower digits */
-   MP_ZERO_DIGITS(a->dp, b);
+   s_mp_zero_digs(a->dp, b);
 
    return MP_OKAY;
 }
diff --git a/mp_mod_2d.c b/mp_mod_2d.c
@@ -29,7 +29,7 @@ mp_err mp_mod_2d(const mp_int *a, int b, mp_int *c)
 
    /* zero digits above the last digit of the modulus */
    x = (b / MP_DIGIT_BIT) + (((b % MP_DIGIT_BIT) == 0) ? 0 : 1);
-   MP_ZERO_DIGITS(c->dp + x, c->used - x);
+   s_mp_zero_digs(c->dp + x, c->used - x);
 
    /* clear the digit that is not completely outside/inside the modulus */
    c->dp[b / MP_DIGIT_BIT] &=
diff --git a/mp_mul_2.c b/mp_mul_2.c
@@ -47,7 +47,7 @@ mp_err mp_mul_2(const mp_int *a, mp_int *b)
    /* now zero any excess digits on the destination
     * that we didn't write to
     */
-   MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used);
+   s_mp_zero_digs(b->dp + b->used, oldused - b->used);
 
    b->sign = a->sign;
    return MP_OKAY;
diff --git a/mp_mul_d.c b/mp_mul_d.c
@@ -45,7 +45,7 @@ mp_err mp_mul_d(const mp_int *a, mp_digit b, mp_int *c)
    c->used = a->used + 1;
 
    /* now zero digits above the top */
-   MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);
+   s_mp_zero_digs(c->dp + c->used, oldused - c->used);
 
    mp_clamp(c);
 
diff --git a/mp_prime_rand.c b/mp_prime_rand.c
@@ -116,7 +116,7 @@ mp_err mp_prime_rand(mp_int *a, int t, int size, int flags)
 
    err = MP_OKAY;
 LBL_ERR:
-   MP_FREE_BUFFER(tmp, (size_t)bsize);
+   MP_FREE_BUF(tmp, (size_t)bsize);
    return err;
 }
 
diff --git a/mp_rshd.c b/mp_rshd.c
@@ -35,7 +35,7 @@ void mp_rshd(mp_int *a, int b)
    }
 
    /* zero the top digits */
-   MP_ZERO_DIGITS(a->dp + a->used - b, b);
+   s_mp_zero_digs(a->dp + a->used - b, b);
 
    /* remove excess digits */
    a->used -= b;
diff --git a/mp_set.c b/mp_set.c
@@ -10,6 +10,6 @@ void mp_set(mp_int *a, mp_digit b)
    a->dp[0] = b & MP_MASK;
    a->sign  = MP_ZPOS;
    a->used  = (a->dp[0] != 0u) ? 1 : 0;
-   MP_ZERO_DIGITS(a->dp + a->used, oldused - a->used);
+   s_mp_zero_digs(a->dp + a->used, oldused - a->used);
 }
 #endif
diff --git a/mp_sub_d.c b/mp_sub_d.c
@@ -72,7 +72,7 @@ mp_err mp_sub_d(const mp_int *a, mp_digit b, mp_int *c)
    }
 
    /* zero excess digits */
-   MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);
+   s_mp_zero_digs(c->dp + c->used, oldused - c->used);
 
    mp_clamp(c);
    return MP_OKAY;
diff --git a/mp_zero.c b/mp_zero.c
@@ -7,7 +7,7 @@
 void mp_zero(mp_int *a)
 {
    a->sign = MP_ZPOS;
-   MP_ZERO_DIGITS(a->dp, a->used);
+   s_mp_zero_digs(a->dp, a->used);
    a->used = 0;
 }
 #endif
diff --git a/s_mp_add.c b/s_mp_add.c
@@ -64,7 +64,7 @@ mp_err s_mp_add(const mp_int *a, const mp_int *b, mp_int *c)
    c->dp[i] = u;
 
    /* clear digits above oldused */
-   MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);
+   s_mp_zero_digs(c->dp + c->used, oldused - c->used);
 
    mp_clamp(c);
    return MP_OKAY;
diff --git a/s_mp_balance_mul.c b/s_mp_balance_mul.c
@@ -8,7 +8,7 @@ mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c)
 {
    mp_int a0, tmp, r;
    mp_err err;
-   int i, j, count,
+   int i, j,
        nblocks = MP_MAX(a->used, b->used) / MP_MIN(a->used, b->used),
        bsize = MP_MIN(a->used, b->used);
 
@@ -27,12 +27,11 @@ mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c)
 
    for (i = 0, j=0; i < nblocks; i++) {
       /* Cut a slice off of a */
-      a0.used = 0;
-      for (count = 0; count < bsize; count++) {
-         a0.dp[count] = a->dp[ j++ ];
-         a0.used++;
-      }
+      a0.used = bsize;
+      s_mp_copy_digs(a0.dp, a->dp + j, a0.used);
+      j += a0.used;
       mp_clamp(&a0);
+
       /* Multiply with b */
       if ((err = mp_mul(&a0, b, &tmp)) != MP_OKAY) {
          goto LBL_ERR;
@@ -48,12 +47,11 @@ mp_err s_mp_balance_mul(const mp_int *a, const mp_int *b, mp_int *c)
    }
    /* The left-overs; there are always left-overs */
    if (j < a->used) {
-      a0.used = 0;
-      for (count = 0; j < a->used; count++) {
-         a0.dp[count] = a->dp[ j++ ];
-         a0.used++;
-      }
+      a0.used = a->used - j;
+      s_mp_copy_digs(a0.dp, a->dp + j, a0.used);
+      j += a0.used;
       mp_clamp(&a0);
+
       if ((err = mp_mul(&a0, b, &tmp)) != MP_OKAY) {
          goto LBL_ERR;
       }
diff --git a/s_mp_copy_digs.c b/s_mp_copy_digs.c
@@ -0,0 +1,23 @@
+#include "tommath_private.h"
+#ifdef S_MP_COPY_DIGS_C
+/* LibTomMath, multiple-precision integer library -- Tom St Denis */
+/* SPDX-License-Identifier: Unlicense */
+
+#ifdef MP_USE_MEMOPS
+#  include <string.h>
+#endif
+
+void s_mp_copy_digs(mp_digit *d, const mp_digit *s, int digits)
+{
+#ifdef MP_USE_MEMOPS
+   if (digits > 0) {
+      memcpy(d, s, (size_t)digits * sizeof(mp_digit));
+   }
+#else
+   while (digits-- > 0) {
+      *d++ = *s++;
+   }
+#endif
+}
+
+#endif
diff --git a/s_mp_karatsuba_mul.c b/s_mp_karatsuba_mul.c
@@ -35,7 +35,7 @@
 mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c)
 {
    mp_int  x0, x1, y0, y1, t1, x0y0, x1y1;
-   int  B, i;
+   int  B;
    mp_err  err;
 
    /* min # of digits */
@@ -77,16 +77,10 @@ mp_err s_mp_karatsuba_mul(const mp_int *a, const mp_int *b, mp_int *c)
    /* we copy the digits directly instead of using higher level functions
     * since we also need to shift the digits
     */
-   for (i = 0; i < B; i++) {
-      x0.dp[i] = a->dp[i];
-      y0.dp[i] = b->dp[i];
-   }
-   for (i = B; i < a->used; i++) {
-      x1.dp[i - B] = a->dp[i];
-   }
-   for (i = B; i < b->used; i++) {
-      y1.dp[i - B] = b->dp[i];
-   }
+   s_mp_copy_digs(x0.dp, a->dp, x0.used);
+   s_mp_copy_digs(y0.dp, b->dp, y0.used);
+   s_mp_copy_digs(x1.dp, a->dp + B, x1.used);
+   s_mp_copy_digs(y1.dp, b->dp + B, y1.used);
 
    /* only need to clamp the lower words since by definition the
     * upper words x1/y1 must have a known number of digits
diff --git a/s_mp_karatsuba_sqr.c b/s_mp_karatsuba_sqr.c
@@ -13,7 +13,7 @@
 mp_err s_mp_karatsuba_sqr(const mp_int *a, mp_int *b)
 {
    mp_int  x0, x1, t1, t2, x0x0, x1x1;
-   int B, x;
+   int B;
    mp_err  err;
 
    /* min # of digits */
@@ -39,16 +39,10 @@ mp_err s_mp_karatsuba_sqr(const mp_int *a, mp_int *b)
       goto X0X0;
 
    /* now shift the digits */
-   for (x = 0; x < B; x++) {
-      x0.dp[x] = a->dp[x];
-   }
-   for (x = B; x < a->used; x++) {
-      x1.dp[x - B] = a->dp[x];
-   }
-
    x0.used = B;
    x1.used = a->used - B;
-
+   s_mp_copy_digs(x0.dp, a->dp, x0.used);
+   s_mp_copy_digs(x1.dp, a->dp + B, x1.used);
    mp_clamp(&x0);
 
    /* now calc the products x0*x0 and x1*x1 */
diff --git a/s_mp_montgomery_reduce_fast.c b/s_mp_montgomery_reduce_fast.c
@@ -42,7 +42,7 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho)
 
    /* zero the high words of W[a->used..m->used*2] */
    if (ix < ((n->used * 2) + 1)) {
-      MP_ZERO_BUFFER(W + x->used, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix));
+      s_mp_zero_buf(W + x->used, sizeof(mp_word) * (size_t)(((n->used * 2) + 1) - ix));
    }
 
    /* now we proceed to zero successive digits
@@ -108,7 +108,7 @@ mp_err s_mp_montgomery_reduce_fast(mp_int *x, const mp_int *n, mp_digit rho)
    /* zero oldused digits, if the input a was larger than
     * m->used+1 we'll have to clear the digits
     */
-   MP_ZERO_DIGITS(x->dp + x->used, oldused - x->used);
+   s_mp_zero_digs(x->dp + x->used, oldused - x->used);
 
    mp_clamp(x);
 
diff --git a/s_mp_mul_digs_fast.c b/s_mp_mul_digs_fast.c
@@ -72,7 +72,7 @@ mp_err s_mp_mul_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int digs)
    }
 
    /* clear unused digits [that existed in the old copy of c] */
-   MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);
+   s_mp_zero_digs(c->dp + c->used, oldused - c->used);
 
    mp_clamp(c);
    return MP_OKAY;
diff --git a/s_mp_mul_high_digs_fast.c b/s_mp_mul_high_digs_fast.c
@@ -64,7 +64,7 @@ mp_err s_mp_mul_high_digs_fast(const mp_int *a, const mp_int *b, mp_int *c, int
    }
 
    /* clear unused digits [that existed in the old copy of c] */
-   MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);
+   s_mp_zero_digs(c->dp + c->used, oldused - c->used);
 
    mp_clamp(c);
    return MP_OKAY;
diff --git a/s_mp_sqr_fast.c b/s_mp_sqr_fast.c
@@ -81,7 +81,7 @@ mp_err s_mp_sqr_fast(const mp_int *a, mp_int *b)
    }
 
    /* clear unused digits [that existed in the old copy of c] */
-   MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used);
+   s_mp_zero_digs(b->dp + b->used, oldused - b->used);
 
    mp_clamp(b);
    return MP_OKAY;
diff --git a/s_mp_sub.c b/s_mp_sub.c
@@ -49,7 +49,7 @@ mp_err s_mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
    }
 
    /* clear digits above used (since we may not have grown result above) */
-   MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);
+   s_mp_zero_digs(c->dp + c->used, oldused - c->used);
 
    mp_clamp(c);
    return MP_OKAY;
diff --git a/s_mp_toom_mul.c b/s_mp_toom_mul.c
diff --git a/s_mp_toom_sqr.c b/s_mp_toom_sqr.c
diff --git a/s_mp_zero_buf.c b/s_mp_zero_buf.c
diff --git a/s_mp_zero_digs.c b/s_mp_zero_digs.c
diff --git a/tommath_private.h b/tommath_private.h

Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ mp_err mp_div_2(const mp_int a, mp_int b)`
`33`	`33`	`}`
`34`	`34`
`35`	`35`	`/* zero excess digits */`
`36`		`- MP_ZERO_DIGITS(b->dp + b->used, oldused - b->used);`
	`36`	`+ s_mp_zero_digs(b->dp + b->used, oldused - b->used);`
`37`	`37`
`38`	`38`	`b->sign = a->sign;`
`39`	`39`	`mp_clamp(b);`
Original file line number	Diff line number	Diff line change
`@@ -25,7 +25,7 @@ mp_err mp_fwrite(const mp_int a, int radix, FILE stream)`
`25`	`25`	`}`
`26`	`26`	`}`
`27`	`27`
`28`		`- MP_FREE_BUFFER(buf, size);`
	`28`	`+ MP_FREE_BUF(buf, size);`
`29`	`29`	`return err;`
`30`	`30`	`}`
`31`	`31`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@ mp_err mp_grow(mp_int *a, int size)`
`26`	`26`	`a->dp = dp;`
`27`	`27`
`28`	`28`	`/* zero excess digits */`
`29`		`- MP_ZERO_DIGITS(a->dp + a->alloc, size - a->alloc);`
	`29`	`+ s_mp_zero_digs(a->dp + a->alloc, size - a->alloc);`
`30`	`30`	`a->alloc = size;`
`31`	`31`	`}`
`32`	`32`	`return MP_OKAY;`
Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ mp_err mp_lshd(mp_int *a, int b)`
`37`	`37`	`}`
`38`	`38`
`39`	`39`	`/* zero the lower digits */`
`40`		`- MP_ZERO_DIGITS(a->dp, b);`
	`40`	`+ s_mp_zero_digs(a->dp, b);`
`41`	`41`
`42`	`42`	`return MP_OKAY;`
`43`	`43`	`}`
Original file line number	Diff line number	Diff line change
`@@ -116,7 +116,7 @@ mp_err mp_prime_rand(mp_int *a, int t, int size, int flags)`
`116`	`116`
`117`	`117`	`err = MP_OKAY;`
`118`	`118`	`LBL_ERR:`
`119`		`- MP_FREE_BUFFER(tmp, (size_t)bsize);`
	`119`	`+ MP_FREE_BUF(tmp, (size_t)bsize);`
`120`	`120`	`return err;`
`121`	`121`	`}`
`122`	`122`
Original file line number	Diff line number	Diff line change
`@@ -35,7 +35,7 @@ void mp_rshd(mp_int *a, int b)`
`35`	`35`	`}`
`36`	`36`
`37`	`37`	`/* zero the top digits */`
`38`		`- MP_ZERO_DIGITS(a->dp + a->used - b, b);`
	`38`	`+ s_mp_zero_digs(a->dp + a->used - b, b);`
`39`	`39`
`40`	`40`	`/* remove excess digits */`
`41`	`41`	`a->used -= b;`
Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,6 @@ void mp_set(mp_int *a, mp_digit b)`
`10`	`10`	`a->dp[0] = b & MP_MASK;`
`11`	`11`	`a->sign = MP_ZPOS;`
`12`	`12`	`a->used = (a->dp[0] != 0u) ? 1 : 0;`
`13`		`- MP_ZERO_DIGITS(a->dp + a->used, oldused - a->used);`
	`13`	`+ s_mp_zero_digs(a->dp + a->used, oldused - a->used);`
`14`	`14`	`}`
`15`	`15`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -72,7 +72,7 @@ mp_err mp_sub_d(const mp_int a, mp_digit b, mp_int c)`
`72`	`72`	`}`
`73`	`73`
`74`	`74`	`/* zero excess digits */`
`75`		`- MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);`
	`75`	`+ s_mp_zero_digs(c->dp + c->used, oldused - c->used);`
`76`	`76`
`77`	`77`	`mp_clamp(c);`
`78`	`78`	`return MP_OKAY;`
Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`void mp_zero(mp_int *a)`
`8`	`8`	`{`
`9`	`9`	`a->sign = MP_ZPOS;`
`10`		`- MP_ZERO_DIGITS(a->dp, a->used);`
	`10`	`+ s_mp_zero_digs(a->dp, a->used);`
`11`	`11`	`a->used = 0;`
`12`	`12`	`}`
`13`	`13`	`#endif`
Original file line number	Diff line number	Diff line change
`@@ -72,7 +72,7 @@ mp_err s_mp_mul_digs_fast(const mp_int a, const mp_int b, mp_int *c, int digs)`
`72`	`72`	`}`
`73`	`73`
`74`	`74`	`/* clear unused digits [that existed in the old copy of c] */`
`75`		`- MP_ZERO_DIGITS(c->dp + c->used, oldused - c->used);`
	`75`	`+ s_mp_zero_digs(c->dp + c->used, oldused - c->used);`
`76`	`76`
`77`	`77`	`mp_clamp(c);`
`78`	`78`	`return MP_OKAY;`