diff --git a/quickjs.c b/quickjs.c
index 3d1f7229c..3be5b15d2 100644
--- a/quickjs.c
+++ b/quickjs.c
@@ -44727,7 +44727,7 @@ static JSValue js_math_sumPrecise(JSContext *ctx, JSValueConst this_val,
     JSValue iter, next, item, ret;
     int done;
     double d;
-    xsum_large_accumulator lacc;
+    xsum_small_accumulator acc;
     SumPreciseStateEnum state;
 
     iter = JS_GetIterator(ctx, argv[0], /*async*/false);
@@ -44737,7 +44737,7 @@ static JSValue js_math_sumPrecise(JSContext *ctx, JSValueConst this_val,
     next = JS_GetProperty(ctx, iter, JS_ATOM_next);
     if (JS_IsException(next))
         goto fail;
-    xsum_large_init(&lacc);
+    xsum_small_init(&acc);
     state = SUM_PRECISE_STATE_MINUS_ZERO;
     for (;;) {
         item = JS_IteratorNext(ctx, iter, next, 0, NULL, &done);
@@ -44773,7 +44773,7 @@ static JSValue js_math_sumPrecise(JSContext *ctx, JSValueConst this_val,
                     state = SUM_PRECISE_STATE_MINUS_INFINITY;
             else if (!(d == 0.0 && signbit(d)) && (state == SUM_PRECISE_STATE_MINUS_ZERO || state == SUM_PRECISE_STATE_FINITE)) {
                 state = SUM_PRECISE_STATE_FINITE;
-                xsum_large_add1(&lacc, d);
+                xsum_small_add1(&acc, d);
             }
         }
     }
@@ -44792,7 +44792,7 @@ static JSValue js_math_sumPrecise(JSContext *ctx, JSValueConst this_val,
         d = -0.0;
         break;
     case SUM_PRECISE_STATE_FINITE:
-        d = xsum_large_round(&lacc);
+        d = xsum_small_round(&acc);
         break;
     default:
         abort();
diff --git a/xsum.c b/xsum.c
index 0bbf4d9ee..98d970b87 100644
--- a/xsum.c
+++ b/xsum.c
@@ -22,8 +22,6 @@
    WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 
-
-#include <stdio.h>
 #include <string.h>
 #include <math.h>
 #include "xsum.h"
@@ -58,26 +56,13 @@
 #define USE_SIMD 1          /* Use SIMD intrinsics (SSE2/AVX) if available?   */
 
 #define USE_MEMSET_SMALL 1  /* Use memset rather than a loop (for small mem)? */
-#define USE_MEMSET_LARGE 1  /* Use memset rather than a loop (for large mem)? */
-#define USE_USED_LARGE 1    /* Use the used flags in a large accumulator? */
 
 #define OPT_SMALL 0         /* Class of manual optimization for operations on */
                             /*   small accumulator: 0 (none), 1, 2, 3 (SIMD)  */
 #define OPT_CARRY 1         /* Use manually optimized carry propagation?      */
 
-#define OPT_LARGE_SUM 1     /* Should manually optimized routines be used for */
-#define OPT_LARGE_SQNORM 1  /*   operations using the large accumulator?      */
-#define OPT_LARGE_DOT 1
-
-#define OPT_SIMPLE_SUM 1    /* Should manually optimized routines be used for */
-#define OPT_SIMPLE_SQNORM 1 /*   operations done with simple FP arithmetic?   */
-#define OPT_SIMPLE_DOT 1
-
-#define OPT_KAHAN_SUM 0     /* Use manually optimized routine for Kahan sum?  */
-
 #define INLINE_SMALL 1      /* Inline more of the small accumulator routines? */
                             /*   (Not currently used)                         */
-#define INLINE_LARGE 1      /* Inline more of the large accumulator routines? */
 
 
 /* INCLUDE INTEL INTRINSICS IF USED AND AVAILABLE. */
@@ -441,315 +426,6 @@ static NOINLINE int xsum_carry_propagate (xsum_small_accumulator *restrict sacc)
 }
 
 
-/* INITIALIZE LARGE ACCUMULATOR CHUNKS.  Sets all counts to -1. */
-
-static void xsum_large_init_chunks (xsum_large_accumulator *restrict lacc)
-{
-# if USE_MEMSET_LARGE
-  {
-    /* Since in two's complement representation, -1 consists of all 1 bits,
-       we can initialize 16-bit values to -1 by initializing their component
-       bytes to 0xff. */
-
-    memset (lacc->count, 0xff, XSUM_LCHUNKS * sizeof *lacc->count);
-  }
-# else
-  { xsum_lcount *p;
-    int n;
-    p = lacc->count;
-    n = XSUM_LCHUNKS;
-    do { *p++ = -1; n -= 1; } while (n > 0);
-  }
-# endif
-
-# if USE_USED_LARGE
-#   if USE_MEMSET_SMALL
-    { memset(lacc->chunks_used, 0, XSUM_LCHUNKS/64 * sizeof *lacc->chunks_used);
-    }
-#   elif USE_SIMD && __AVX__ && XSUM_LCHUNKS/64==64
-    { xsum_used *ch = lacc->chunks_used;
-      __m256i z = _mm256_setzero_si256();
-      _mm256_storeu_si256 ((__m256i *)(ch+0), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+4), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+8), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+12), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+16), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+20), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+24), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+28), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+32), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+36), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+40), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+44), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+48), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+52), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+56), z);
-      _mm256_storeu_si256 ((__m256i *)(ch+60), z);
-    }
-#   else
-    { xsum_lchunk *p;
-      int n;
-      p = lacc->chunks_used;
-      n = XSUM_LCHUNKS/64;
-      do { *p++ = 0; n -= 1; } while (n > 0);
-    }
-#   endif
-    lacc->used_used = 0;
-# endif
-}
-
-
-/* ADD CHUNK FROM A LARGE ACCUMULATOR TO THE SMALL ACCUMULATOR WITHIN IT.
-   The large accumulator chunk to add is indexed by ix.  This chunk will
-   be cleared to zero and its count reset after it has been added to the
-   small accumulator (except no add is done for a new chunk being initialized).
-   This procedure should not be called for the special chunks correspnding to
-   Inf or NaN, whose counts should always remain at -1. */
-
-#if INLINE_LARGE
-  INLINE
-#endif
-static void xsum_add_lchunk_to_small (xsum_large_accumulator *restrict lacc,
-                                      xsum_expint ix)
-{
-  xsum_expint exp, low_exp, high_exp;
-  xsum_uint low_chunk, mid_chunk, high_chunk;
-  xsum_lchunk chunk;
-
-  const xsum_expint count = lacc->count[ix];
-
-  /* Add to the small accumulator only if the count is not -1, which
-     indicates a chunk that contains nothing yet. */
-
-  if (count >= 0)
-  {
-    /* Propagate carries in the small accumulator if necessary. */
-
-    if (lacc->sacc.adds_until_propagate == 0)
-    { (void) xsum_carry_propagate(&lacc->sacc);
-    }
-
-    /* Get the chunk we will add.  Note that this chunk is the integer sum
-       of entire 64-bit floating-point representations, with sign, exponent,
-       and mantissa, but we want only the sum of the mantissas. */
-
-    chunk = lacc->chunk[ix];
-
-    /* If we added the maximum number of values to 'chunk', the sum of
-       the sign and exponent parts (all the same, equal to the index) will
-       have overflowed out the top, leaving only the sum of the mantissas.
-       If the count of how many more terms we could have summed is greater
-       than zero, we therefore add this count times the index (shifted to
-       the position of the sign and exponent) to get the unwanted bits to
-       overflow out the top. */
-
-    if (count > 0)
-    { chunk += (xsum_lchunk)(count*ix) << XSUM_MANTISSA_BITS;
-    }
-
-    /* Find the exponent for this chunk from the low bits of the index,
-       and split it into low and high parts, for accessing the small
-       accumulator.  Noting that for denormalized numbers where the
-       exponent part is zero, the actual exponent is 1 (before subtracting
-       the bias), not zero. */
-
-    exp = ix & XSUM_EXP_MASK;
-    if (exp == 0)
-    { low_exp = 1;
-      high_exp = 0;
-    }
-    else
-    { low_exp = exp & XSUM_LOW_EXP_MASK;
-      high_exp = exp >> XSUM_LOW_EXP_BITS;
-    }
-
-    /* Split the mantissa into three parts, for three consecutive chunks in
-       the small accumulator.  Except for denormalized numbers, add in the sum
-       of all the implicit 1 bits that are above the actual mantissa bits. */
-
-    low_chunk = (chunk << low_exp) & XSUM_LOW_MANTISSA_MASK;
-    mid_chunk = chunk >> (XSUM_LOW_MANTISSA_BITS - low_exp);
-    if (exp != 0) /* normalized */
-    { mid_chunk += (xsum_lchunk)((1 << XSUM_LCOUNT_BITS) - count)
-         << (XSUM_MANTISSA_BITS - XSUM_LOW_MANTISSA_BITS + low_exp);
-    }
-    high_chunk = mid_chunk >> XSUM_LOW_MANTISSA_BITS;
-    mid_chunk &= XSUM_LOW_MANTISSA_MASK;
-
-    /* Add or subtract the three parts of the mantissa from three small
-       accumulator chunks, according to the sign that is part of the index. */
-
-    if (ix & (1 << XSUM_EXP_BITS))
-    { lacc->sacc.chunk[high_exp] -= low_chunk;
-      lacc->sacc.chunk[high_exp+1] -= mid_chunk;
-      lacc->sacc.chunk[high_exp+2] -= high_chunk;
-    }
-    else
-    { lacc->sacc.chunk[high_exp] += low_chunk;
-      lacc->sacc.chunk[high_exp+1] += mid_chunk;
-      lacc->sacc.chunk[high_exp+2] += high_chunk;
-    }
-
-    /* The above additions/subtractions reduce by one the number we can
-       do before we need to do carry propagation again. */
-
-    lacc->sacc.adds_until_propagate -= 1;
-  }
-
-  /* We now clear the chunk to zero, and set the count to the number
-     of adds we can do before the mantissa would overflow.  We also
-     set the bit in chunks_used to indicate that this chunk is in use
-     (if that is enabled). */
-
-  lacc->chunk[ix] = 0;
-  lacc->count[ix] = 1 << XSUM_LCOUNT_BITS;
-
-# if USE_USED_LARGE
-    lacc->chunks_used[ix>>6] |= (xsum_used)1 << (ix & 0x3f);
-    lacc->used_used |= (xsum_used)1 << (ix>>6);
-# endif
-}
-
-
-/* ADD A CHUNK TO THE LARGE ACCUMULATOR OR PROCESS NAN OR INF.  This routine
-   is called when the count for a chunk is negative after decrementing, which
-   indicates either inf/nan, or that the chunk has not been initialized, or
-   that the chunk needs to be transferred to the small accumulator. */
-
-#if INLINE_LARGE
-  INLINE
-#endif
-static void xsum_large_add_value_inf_nan (xsum_large_accumulator *restrict lacc,
-                                          xsum_expint ix, xsum_lchunk uintv)
-{
-  if ((ix & XSUM_EXP_MASK) == XSUM_EXP_MASK)
-  { xsum_small_add_inf_nan (&lacc->sacc, uintv);
-  }
-  else
-  { xsum_add_lchunk_to_small (lacc, ix);
-    lacc->count[ix] -= 1;
-    lacc->chunk[ix] += uintv;
-  }
-}
-
-
-/* TRANSFER ALL CHUNKS IN LARGE ACCUMULATOR TO ITS SMALL ACCUMULATOR. */
-
-static void xsum_large_transfer_to_small (xsum_large_accumulator *restrict lacc)
-{
-# if USE_USED_LARGE
-  {
-    xsum_used *p, *e;
-    xsum_used u, uu;
-    int ix;
-
-    p = lacc->chunks_used;
-    e = p + XSUM_LCHUNKS/64;
-
-    /* Very quickly skip some unused low-order blocks of chunks by looking
-       at the used_used flags. */
-
-    uu = lacc->used_used;
-    if ((uu & 0xffffffff) == 0)
-    { uu >>= 32;
-      p += 32;
-    }
-    if ((uu & 0xffff) == 0)
-    { uu >>= 16;
-      p += 16;
-    }
-    if ((uu & 0xff) == 0)
-    { p += 8;
-    }
-
-    /* Loop over remaining blocks of chunks. */
-
-    do
-    {
-      /* Loop to quickly find the next non-zero block of used flags, or finish
-         up if we've added all the used blocks to the small accumulator. */
-
-      for (;;)
-      { u = *p;
-        if (u != 0)
-        { break;
-        }
-        p += 1;
-        if (p == e)
-        { return;
-        }
-        u = *p;
-        if (u != 0)
-        { break;
-        }
-        p += 1;
-        if (p == e)
-        { return;
-        }
-        u = *p;
-        if (u != 0)
-        { break;
-        }
-        p += 1;
-        if (p == e)
-        { return;
-        }
-        u = *p;
-        if (u != 0)
-        { break;
-        }
-        p += 1;
-        if (p == e)
-        { return;
-        }
-      }
-
-      /* Find and process the chunks in this block that are used.  We skip
-         forward based on the chunks_used flags until we're within eight
-         bits of a chunk that is in use. */
-
-      ix = (p - lacc->chunks_used) << 6;
-      if ((u & 0xffffffff) == 0)
-      { u >>= 32;
-        ix += 32;
-      }
-      if ((u & 0xffff) == 0)
-      { u >>= 16;
-        ix += 16;
-      }
-      if ((u & 0xff) == 0)
-      { u >>= 8;
-        ix += 8;
-      }
-
-      do
-      { if (lacc->count[ix] >= 0)
-        { xsum_add_lchunk_to_small (lacc, ix);
-        }
-        ix += 1;
-        u >>= 1;
-      } while (u != 0);
-
-      p += 1;
-
-    } while (p != e);
-  }
-# else
-  { xsum_expint ix;
-
-    /* When there are no used flags, we scan sequentially for chunks that
-       need to be added to the small accumulator. */
-
-    for (ix = 0; ix < XSUM_LCHUNKS; ix++)
-    { if (lacc->count[ix] >= 0)
-      { xsum_add_lchunk_to_small (lacc, ix);
-      }
-    }
-  }
-# endif
-}
-
-
 /* ------------------------ EXTERNAL ROUTINES ------------------------------- */
 
 
@@ -1312,549 +988,6 @@ done_rounding: ;
 }
 
 
-/* INITIALIZE A LARGE ACCUMULATOR TO ZERO. */
-
-void xsum_large_init (xsum_large_accumulator *restrict lacc)
-{
-  xsum_large_init_chunks (lacc);
-  xsum_small_init (&lacc->sacc);
-}
-
-
-/* ADD A VECTOR OF FLOATING-POINT NUMBERS TO A LARGE ACCUMULATOR. */
-
-void xsum_large_addv (xsum_large_accumulator *restrict lacc,
-                      const xsum_flt *restrict vec,
-                      xsum_length n)
-{
-# if OPT_LARGE_SUM
-  {
-    xsum_lcount count;
-    xsum_expint ix;
-    xsum_uint uintv;
-
-    while (n > 3)
-    {
-      COPY64 (uintv, *vec);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      COPY64 (uintv, *vec);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      COPY64 (uintv, *vec);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      COPY64 (uintv, *vec);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 4;
-    }
-
-    while (n > 0)
-    {
-      COPY64 (uintv, *vec);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 1;
-    }
-  }
-# else
-  {
-    /* Version not manually optimized - maybe the compiler can do better. */
-
-    if (n == 0)
-    { return;
-    }
-
-    xsum_lcount count;
-    xsum_expint ix;
-    xsum_uint uintv;
-
-    do
-    {
-      /* Fetch the next number, and convert to integer form in uintv. */
-
-      COPY64 (uintv, *vec);
-      vec += 1;
-
-      /* Isolate the upper sign+exponent bits that index the chunk. */
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      /* Find the count for this chunk, and subtract one. */
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      {
-        /* If the decremented count is negative, it's either a special
-           Inf/NaN chunk (in which case count will stay at -1), or one that
-           needs to be transferred to the small accumulator, or one that
-           has never been used before and needs to be initialized. */
-
-        xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      {
-        /* Store the decremented count of additions allowed before transfer,
-           and add this value to the chunk. */
-
-        lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 1;
-
-    } while (n > 0);
-  }
-# endif
-}
-
-
-/* ADD ONE DOUBLE TO A LARGE ACCUMULATOR.  Just calls xsum_large_addv. */
-
-void xsum_large_add1 (xsum_large_accumulator *restrict lacc, xsum_flt value)
-{
-  xsum_large_addv (lacc, &value, 1);
-}
-
-
-/* ADD SQUARED NORM OF VECTOR OF FLOATING-POINT NUMBERS TO LARGE ACCUMULATOR. */
-
-void xsum_large_add_sqnorm (xsum_large_accumulator *restrict lacc,
-                            const xsum_flt *restrict vec,
-                            xsum_length n)
-{
-# if OPT_LARGE_SQNORM
-  {
-    xsum_lcount count;
-    xsum_expint ix;
-    xsum_uint uintv;
-    double fltv;
-
-    while (n > 3)
-    {
-      fltv = *vec * *vec;
-      COPY64 (uintv, fltv);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      fltv = *vec * *vec;
-      COPY64 (uintv, fltv);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      fltv = *vec * *vec;
-      COPY64 (uintv, fltv);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      fltv = *vec * *vec;
-      COPY64 (uintv, fltv);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 4;
-    }
-
-    while (n > 0)
-    {
-      fltv = *vec * *vec;
-      COPY64 (uintv, fltv);
-      vec += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 1;
-    }
-  }
-# else
-  {
-    /* Version not manually optimized - maybe the compiler can do better. */
-
-    xsum_lcount count;
-    xsum_expint ix;
-    xsum_uint uintv;
-    double fltv;
-
-    if (n == 0)
-    { return;
-    }
-
-    do
-    {
-      /* Fetch the next number, square it, and convert to integer form in
-         uintv. */
-
-      fltv = *vec * *vec;
-      COPY64 (uintv, fltv);
-      vec += 1;
-
-      /* Isolate the upper sign+exponent bits that index the chunk. */
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      /* Find the count for this chunk, and subtract one. */
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      {
-        /* If the decremented count is negative, it's either a special
-           Inf/NaN chunk (in which case count will stay at -1), or one that
-           needs to be transferred to the small accumulator, or one that
-           has never been used before and needs to be initialized. */
-
-        xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      {
-        /* Store the decremented count of additions allowed before transfer,
-           and add this value to the chunk. */
-
-        lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 1;
-
-    } while (n > 0);
-  }
-# endif
-}
-
-
-/* ADD DOT PRODUCT OF VECTORS OF FLOATING-POINT NUMBERS TO LARGE ACCUMULATOR. */
-
-void xsum_large_add_dot (xsum_large_accumulator *restrict lacc,
-                         const xsum_flt *vec1,
-                         const xsum_flt *vec2,
-                         xsum_length n)
-{
-# if OPT_LARGE_DOT
-  {
-    xsum_lcount count;
-    xsum_expint ix;
-    xsum_uint uintv;
-    double fltv;
-
-    while (n > 3)
-    {
-      fltv = *vec1 * *vec2;
-      COPY64 (uintv, fltv);
-      vec1 += 1; vec2 += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      fltv = *vec1 * *vec2;
-      COPY64 (uintv, fltv);
-      vec1 += 1; vec2 += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      fltv = *vec1 * *vec2;
-      COPY64 (uintv, fltv);
-      vec1 += 1; vec2 += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      fltv = *vec1 * *vec2;
-      COPY64 (uintv, fltv);
-      vec1 += 1; vec2 += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 4;
-    }
-
-    while (n > 0)
-    {
-      fltv = *vec1 * *vec2;
-      COPY64 (uintv, fltv);
-      vec1 += 1; vec2 += 1;
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      { xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      { lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 1;
-    }
-  }
-# else
-  {
-    /* Version not manually optimized - maybe the compiler can do better. */
-
-    xsum_lcount count;
-    xsum_expint ix;
-    xsum_uint uintv;
-    double fltv;
-
-    if (n == 0)
-    { return;
-    }
-
-    do
-    {
-      /* Fetch the next numbers, multiply them, and convert the result to
-         integer form in uintv. */
-
-      fltv = *vec1 * *vec2;
-      COPY64 (uintv, fltv);
-      vec1 += 1; vec2 += 1;
-
-      /* Isolate the upper sign+exponent bits that index the chunk. */
-
-      ix = uintv >> XSUM_MANTISSA_BITS;
-
-      /* Find the count for this chunk, and subtract one. */
-
-      count = lacc->count[ix] - 1;
-
-      if (count < 0)
-      {
-        /* If the decremented count is negative, it's either a special
-           Inf/NaN chunk (in which case count will stay at -1), or one that
-           needs to be transferred to the small accumulator, or one that
-           has never been used before and needs to be initialized. */
-
-        xsum_large_add_value_inf_nan (lacc, ix, uintv);
-      }
-      else
-      {
-        /* Store the decremented count of additions allowed before transfer,
-           and add this value to the chunk. */
-
-        lacc->count[ix] = count;
-        lacc->chunk[ix] += uintv;
-      }
-
-      n -= 1;
-
-    } while (n > 0);
-  }
-# endif
-}
-
-
-/* ADD A LARGE ACCUMULATOR TO ANOTHER LARGE ACCUMULATOR.  The first argument
-   is the destination, which is modified.  The second is the accumulator to
-   add, which may also be modified, but should still represent the same
-   number.  Source and destination may be the same. */
-
-void xsum_large_add_accumulator (xsum_large_accumulator *dst_lacc,
-                                 xsum_large_accumulator *src_lacc)
-{
-  xsum_large_transfer_to_small (src_lacc);
-  xsum_small_add_accumulator (&dst_lacc->sacc, &src_lacc->sacc);
-}
-
-
-/* NEGATE THE VALUE IN A LARGE ACCUMULATOR. */
-
-void xsum_large_negate (xsum_large_accumulator *restrict lacc)
-{
-  xsum_large_transfer_to_small (lacc);
-  xsum_small_negate (&lacc->sacc);
-}
-
-
-
-/* RETURN RESULT OF ROUNDING A LARGE ACCUMULATOR.  Rounding mode is to nearest,
-   with ties to even.
-
-   This is done by adding all the chunks in the large accumulator to the
-   small accumulator, and then calling its rounding procedure. */
-
-xsum_flt xsum_large_round (xsum_large_accumulator *restrict lacc)
-{
-  xsum_large_transfer_to_small (lacc);
-
-  return xsum_small_round (&lacc->sacc);
-}
-
-
-/* TRANSFER NUMBER FROM A LARGE ACCUMULATOR TO A SMALL ACCUMULATOR. */
-
-void xsum_large_to_small_accumulator (xsum_small_accumulator *restrict sacc,
-                                      xsum_large_accumulator *restrict lacc)
-{
-  xsum_large_transfer_to_small (lacc);
-  *sacc = lacc->sacc;
-}
-
-
-/* TRANSFER NUMBER FROM A SMALL ACCUMULATOR TO A LARGE ACCUMULATOR. */
-
-void xsum_small_to_large_accumulator (xsum_large_accumulator *restrict lacc,
-                                      xsum_small_accumulator *restrict sacc)
-{
-  xsum_large_init_chunks (lacc);
-  lacc->sacc = *sacc;
-}
-
-
 /* FIND RESULT OF DIVIDING SMALL ACCUMULATOR BY UNSIGNED INTEGER. */
 
 xsum_flt xsum_small_div_unsigned
@@ -1987,236 +1120,3 @@ xsum_flt xsum_small_div_int
   { return xsum_small_div_unsigned (sacc, (unsigned) div);
   }
 }
-
-
-/* FIND RESULT OF DIVIDING LARGE ACCUMULATOR BY UNSIGNED INTEGER. */
-
-xsum_flt xsum_large_div_unsigned
-           (xsum_large_accumulator *restrict lacc, unsigned div)
-{
-  xsum_large_transfer_to_small (lacc);
-  return xsum_small_div_unsigned (&lacc->sacc, div);
-}
-
-
-/* FIND RESULT OF DIVIDING LARGE ACCUMULATOR BY SIGNED INTEGER. */
-
-xsum_flt xsum_large_div_int
-           (xsum_large_accumulator *restrict lacc, int div)
-{
-  xsum_large_transfer_to_small (lacc);
-  return xsum_small_div_int (&lacc->sacc, div);
-}
-
-
-/* ------------------- ROUTINES FOR NON-EXACT SUMMATION --------------------- */
-
-
-/* SUM A VECTOR WITH DOUBLE FP ACCUMULATOR. */
-
-xsum_flt xsum_sum_double (const xsum_flt *restrict vec,
-                          xsum_length n)
-{ double s;
-  xsum_length j;
-  s = 0.0;
-# if OPT_SIMPLE_SUM
-  { for (j = 3; j < n; j += 4)
-    { s += vec[j-3];
-      s += vec[j-2];
-      s += vec[j-1];
-      s += vec[j];
-    }
-    for (j = j-3; j < n; j++)
-    { s += vec[j];
-    }
-  }
-# else
-  { for (j = 0; j < n; j++)
-    { s += vec[j];
-    }
-  }
-# endif
-  return (xsum_flt) s;
-}
-
-
-/* SUM A VECTOR WITH FLOAT128 ACCUMULATOR. */
-
-#ifdef FLOAT128
-
-#include <quadmath.h>
-
-xsum_flt xsum_sum_float128 (const xsum_flt *restrict vec,
-                            xsum_length n)
-{ __float128 s;
-  xsum_length j;
-  s = 0.0;
-  for (j = 0; j < n; j++)
-  { s += vec[j];
-  }
-  return (xsum_flt) s;
-}
-
-#endif
-
-
-/* SUM A VECTOR WITH DOUBLE FP, NOT IN ORDER. */
-
-xsum_flt xsum_sum_double_not_ordered (const xsum_flt *restrict vec,
-                                      xsum_length n)
-{ double s[2] = { 0, 0 };
-  xsum_length j;
-  for (j = 1; j < n; j += 2)
-  { s[0] += vec[j-1];
-    s[1] += vec[j];
-  }
-  if (j == n)
-  { s[0] += vec[j-1];
-  }
-  return (xsum_flt) (s[0]+s[1]);
-}
-
-
-/* SUM A VECTOR WITH KAHAN'S METHOD. */
-
-xsum_flt xsum_sum_kahan (const xsum_flt *restrict vec,
-                         xsum_length n)
-{ double s, t, c, y;
-  xsum_length j;
-  s = 0.0;
-  c = 0.0;
-# if OPT_KAHAN_SUM
-  { for (j = 1; j < n; j += 2)
-    { y = vec[j-1] - c;
-      t = s;
-      s += y;
-      c = (s - t) - y;
-      y = vec[j] - c;
-      t = s;
-      s += y;
-      c = (s - t) - y;
-    }
-    for (j = j-1; j < n; j++)
-    { y = vec[j] - c;
-      t = s;
-      s += y;
-      c = (s - t) - y;
-    }
-  }
-# else
-  { for (j = 0; j < n; j++)
-    { y = vec[j] - c;
-      t = s;
-      s += y;
-      c = (s - t) - y;
-    }
-  }
-# endif
-  return (xsum_flt) s;
-}
-
-
-/* SQUARED NORM OF A VECTOR WITH DOUBLE FP ACCUMULATOR. */
-
-xsum_flt xsum_sqnorm_double (const xsum_flt *restrict vec,
-                             xsum_length n)
-{ double s;
-  xsum_length j;
-
-  s = 0.0;
-# if OPT_SIMPLE_SQNORM
-  { double a, b, c, d;
-    for (j = 3; j < n; j += 4)
-    { a = vec[j-3];
-      b = vec[j-2];
-      c = vec[j-1];
-      d = vec[j];
-      s += a*a;
-      s += b*b;
-      s += c*c;
-      s += d*d;
-    }
-    for (j = j-3; j < n; j++)
-    { a = vec[j];
-      s += a*a;
-    }
-  }
-# else
-  { double a;
-    for (j = 0; j < n; j++)
-    { a = vec[j];
-      s += a*a;
-    }
-  }
-# endif
-  return (xsum_flt) s;
-}
-
-
-/* SQUARED NORM OF A VECTOR WITH DOUBLE FP, NOT IN ORDER. */
-
-xsum_flt xsum_sqnorm_double_not_ordered (const xsum_flt *restrict vec,
-                                         xsum_length n)
-{ double s[2] = { 0, 0 };
-  double a[2];
-  xsum_length j;
-  for (j = 1; j < n; j += 2)
-  { a[0] = vec[j-1];
-    a[1] = vec[j];
-    s[0] += a[0]*a[0];
-    s[1] += a[1]*a[1];
-  }
-  if (j == n)
-  { a[0] = vec[j-1];
-    s[0] += a[0]*a[0];
-  }
-  return (xsum_flt) (s[0]+s[1]);
-}
-
-
-/* DOT PRODUCT OF VECTORS WITH DOUBLE FP ACCUMULATOR. */
-
-xsum_flt xsum_dot_double (const xsum_flt *vec1,
-                          const xsum_flt *vec2,
-                          xsum_length n)
-{ double s;
-  xsum_length j;
-
-  s = 0.0;
-# if OPT_SIMPLE_DOT
-  { for (j = 3; j < n; j += 4)
-    { s += vec1[j-3] * vec2[j-3];
-      s += vec1[j-2] * vec2[j-2];
-      s += vec1[j-1] * vec2[j-1];
-      s += vec1[j] * vec2[j];
-    }
-    for (j = j-3; j < n; j++)
-    { s += vec1[j] * vec2[j];
-    }
-  }
-# else
-  { for (j = 0; j < n; j++)
-    { s += vec1[j] * vec2[j];
-    }
-  }
-# endif
-  return (xsum_flt) s;
-}
-
-
-/* DOT PRODUCT OF VECTORS WITH DOUBLE FP, NOT IN ORDER. */
-
-xsum_flt xsum_dot_double_not_ordered (const xsum_flt *vec1,
-                                      const xsum_flt *vec2,
-                                      xsum_length n)
-{ double s[2] = { 0, 0 };
-  xsum_length j;
-  for (j = 1; j < n; j += 2)
-  { s[0] += vec1[j-1] * vec2[j-1];
-    s[1] += vec1[j] * vec2[j];
-  }
-  if (j == n)
-  { s[0] += vec1[j-1] * vec2[j-1];
-  }
-  return (xsum_flt) (s[0]+s[1]);
-}
diff --git a/xsum.h b/xsum.h
index 16522e3cf..2372cac61 100644
--- a/xsum.h
+++ b/xsum.h
@@ -99,30 +99,6 @@ typedef struct
 } xsum_small_accumulator;          /*     propagation must be done again    */
 
 
-/* CONSTANTS DEFINING THE LARGE ACCUMULATOR FORMAT. */
-
-#define XSUM_LCHUNK_BITS 64        /* Bits in chunk of the large accumulator */
-typedef uint64_t xsum_lchunk;      /* Integer type of large accumulator chunk,
-                                      must be EXACTLY 64 bits in size */
-
-#define XSUM_LCOUNT_BITS (64-XSUM_MANTISSA_BITS)  /* # of bits in count */
-typedef int_least16_t xsum_lcount; /* Signed int type of counts for large acc.*/
-
-#define XSUM_LCHUNKS \
-  (1 << (XSUM_EXP_BITS+1))         /* # of chunks in large accumulator */
-
-typedef uint64_t xsum_used;        /* Unsigned type for holding used flags */
-
-typedef struct
-{ xsum_lchunk chunk[XSUM_LCHUNKS]; /* Chunks making up large accumulator */
-  xsum_lcount count[XSUM_LCHUNKS]; /* Counts of # adds remaining for chunks,
-                                        or -1 if not used yet or special. */
-  xsum_used chunks_used[XSUM_LCHUNKS/64];  /* Bits indicate chunks in use */
-  xsum_used used_used;             /* Bits indicate chunk_used entries not 0 */
-  xsum_small_accumulator sacc;     /* The small accumulator to condense into */
-} xsum_large_accumulator;
-
-
 /* TYPE FOR LENGTHS OF ARRAYS.  Must be a signed integer type.  Set to
    ptrdiff_t here on the assumption that this will be big enough, but
    not unnecessarily big, which seems to be true. */
@@ -145,41 +121,8 @@ void xsum_small_add_accumulator (xsum_small_accumulator *,
 void xsum_small_negate (xsum_small_accumulator *restrict);
 xsum_flt xsum_small_round (xsum_small_accumulator *restrict);
 
-void xsum_large_init (xsum_large_accumulator *restrict);
-void xsum_large_add1 (xsum_large_accumulator *restrict, xsum_flt);
-void xsum_large_addv (xsum_large_accumulator *restrict,
-                      const xsum_flt *restrict, xsum_length);
-void xsum_large_add_sqnorm (xsum_large_accumulator *restrict,
-                            const xsum_flt *restrict, xsum_length);
-void xsum_large_add_dot (xsum_large_accumulator *restrict,
-                         const xsum_flt *, const xsum_flt *, xsum_length);
-void xsum_large_add_accumulator (xsum_large_accumulator *,
-                                 xsum_large_accumulator *);
-void xsum_large_negate (xsum_large_accumulator *restrict);
-xsum_flt xsum_large_round (xsum_large_accumulator *restrict);
-
-void xsum_large_to_small_accumulator (xsum_small_accumulator *restrict,
-                                      xsum_large_accumulator *restrict);
-void xsum_small_to_large_accumulator (xsum_large_accumulator *restrict,
-                                      xsum_small_accumulator *restrict);
-
 xsum_flt xsum_small_div_unsigned (xsum_small_accumulator *restrict, unsigned);
 xsum_flt xsum_small_div_int (xsum_small_accumulator *restrict, int);
-xsum_flt xsum_large_div_unsigned (xsum_large_accumulator *restrict, unsigned);
-xsum_flt xsum_large_div_int (xsum_large_accumulator *restrict, int);
-
-
-/* FUNCTIONS FOR DOUBLE AND OTHER INEXACT SUMMATION.  Used for testing. */
-
-xsum_flt xsum_sum_double (const xsum_flt *restrict, xsum_length);
-xsum_flt xsum_sum_double_not_ordered (const xsum_flt *restrict, xsum_length);
-xsum_flt xsum_sum_float128 (const xsum_flt *restrict, xsum_length);
-xsum_flt xsum_sum_kahan (const xsum_flt *restrict, xsum_length);
-xsum_flt xsum_sqnorm_double (const xsum_flt *restrict, xsum_length);
-xsum_flt xsum_sqnorm_double_not_ordered (const xsum_flt *restrict, xsum_length);
-xsum_flt xsum_dot_double (const xsum_flt *, const xsum_flt *, xsum_length);
-xsum_flt xsum_dot_double_not_ordered (const xsum_flt *, const xsum_flt *,
-                                      xsum_length);
 
 
 /* DEBUG FLAG.  Set to non-zero for debug ouptut.  Ignored unless xsum.c