From c3e5ae20087aba2a1bf2d96776f44d3a72c128c4 Mon Sep 17 00:00:00 2001 From: Fabrice Bellard Date: Mon, 29 Sep 2025 16:12:23 +0200 Subject: [PATCH] simplified math.sumPrecise() --- quickjs.c | 189 +++++++++++++++++++++++++++--------------------------- 1 file changed, 94 insertions(+), 95 deletions(-) diff --git a/quickjs.c b/quickjs.c index 2da7f52..f0f155d 100644 --- a/quickjs.c +++ b/quickjs.c @@ -45458,47 +45458,59 @@ static JSValue js_math_clz32(JSContext *ctx, JSValueConst this_val, return JS_NewInt32(ctx, r); } -/* we add one extra limb to avoid having to test for overflows during the sum */ -#define SUM_PRECISE_ACC_LEN 34 - typedef enum { - SUM_PRECISE_STATE_MINUS_ZERO, SUM_PRECISE_STATE_FINITE, SUM_PRECISE_STATE_INFINITY, SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */ SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */ } SumPreciseStateEnum; +#define SP_LIMB_BITS 56 +#define SP_RND_BITS (SP_LIMB_BITS - 53) +/* we add one extra limb to avoid having to test for overflows during the sum */ +#define SUM_PRECISE_ACC_LEN 39 + +#define SUM_PRECISE_COUNTER_INIT 250 + typedef struct { - uint64_t acc[SUM_PRECISE_ACC_LEN]; - int n_limbs; /* acc is not necessarily normalized */ SumPreciseStateEnum state; + uint32_t counter; + int n_limbs; /* 'acc' contains n_limbs and is not necessarily + acc[n_limb - 1] may be 0. 0 indicates minus zero + result when state = SUM_PRECISE_STATE_FINITE */ + int64_t acc[SUM_PRECISE_ACC_LEN]; } SumPreciseState; static void sum_precise_init(SumPreciseState *s) { - s->state = SUM_PRECISE_STATE_MINUS_ZERO; - s->acc[0] = 0; - s->n_limbs = 1; + memset(s->acc, 0, sizeof(s->acc)); + s->state = SUM_PRECISE_STATE_FINITE; + s->counter = SUM_PRECISE_COUNTER_INIT; + s->n_limbs = 0; } -#define ADDC64(res, carry_out, op1, op2, carry_in) \ -do { \ - uint64_t __v, __a, __k, __k1; \ - __v = (op1); \ - __a = __v + (op2); \ - __k1 = __a < __v; \ - __k = (carry_in); \ - __a = __a + __k; \ - carry_out = (__a < __k) | __k1; \ - res = __a; \ -} while (0) +static void sum_precise_renorm(SumPreciseState *s) +{ + int64_t v, carry; + int i; + + carry = 0; + for(i = 0; i < s->n_limbs; i++) { + v = s->acc[i] + carry; + s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1); + carry = v >> SP_LIMB_BITS; + } + /* we add a failsafe but it should be never reached in a + reasonnable amount of time */ + if (carry != 0 && s->n_limbs < SUM_PRECISE_ACC_LEN) + s->acc[s->n_limbs++] = carry; +} static void sum_precise_add(SumPreciseState *s, double d) { - uint64_t a, m, a0, carry, acc_sign, a_sign; - int sgn, e, p, n, i; - unsigned shift; + uint64_t a, m, a0, a1; + int sgn, e, p; + unsigned int shift; a = float64_as_uint64(d); sgn = a >> 63; @@ -45521,8 +45533,8 @@ static void sum_precise_add(SumPreciseState *s, double d) } else if (e == 0) { if (likely(m == 0)) { /* zero */ - if (s->state == SUM_PRECISE_STATE_MINUS_ZERO && !sgn) - s->state = SUM_PRECISE_STATE_FINITE; + if (s->n_limbs == 0 && !sgn) + s->n_limbs = 1; } else { /* subnormal */ p = 0; @@ -45530,69 +45542,41 @@ static void sum_precise_add(SumPreciseState *s, double d) goto add; } } else { + /* Note: we sum even if state != SUM_PRECISE_STATE_FINITE to + avoid tests */ m |= (uint64_t)1 << 52; shift = e - 1; - p = shift / 64; - /* 'p' is the position of a0 in acc */ - shift %= 64; + /* 'p' is the position of a0 in acc. The division is normally + implementation as a multiplication by the compiler. */ + p = shift / SP_LIMB_BITS; + shift %= SP_LIMB_BITS; add: - if (s->state >= SUM_PRECISE_STATE_INFINITY) - return; - s->state = SUM_PRECISE_STATE_FINITE; - n = s->n_limbs; - - acc_sign = (int64_t)s->acc[n - 1] >> 63; - - /* sign extend acc */ - for(i = n; i <= p; i++) - s->acc[i] = acc_sign; - - carry = sgn; - a_sign = -sgn; - a0 = m << shift; - ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry); - if (shift >= 12) { - p++; - if (p >= n) - s->acc[p] = acc_sign; - a0 = m >> (64 - shift); - ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry); - } - p++; - if (p >= n) { - n = p; + a0 = (m << shift) & (((uint64_t)1 << SP_LIMB_BITS) - 1); + a1 = m >> (SP_LIMB_BITS - shift); + if (!sgn) { + s->acc[p] += a0; + s->acc[p + 1] += a1; } else { - /* carry */ - for(i = p; i < n; i++) { - /* if 'a' positive: stop condition: carry = 0. - if 'a' negative: stop condition: carry = 1. */ - if (carry == sgn) - goto done; - ADDC64(s->acc[i], carry, s->acc[i], a_sign, carry); - } + s->acc[p] -= a0; + s->acc[p + 1] -= a1; } - - /* extend the accumulator if needed */ - a0 = carry + acc_sign + a_sign; - /* -1 <= a0 <= 1 (if both acc and a are negative, carry is set) */ - if (a0 != ((int64_t)s->acc[n - 1] >> 63)) { - s->acc[n++] = a0; + s->n_limbs = max_int(s->n_limbs, p + 2); + + if (unlikely(--s->counter == 0)) { + s->counter = SUM_PRECISE_COUNTER_INIT; + sum_precise_renorm(s); } - done: - s->n_limbs = n; } } static double sum_precise_get_result(SumPreciseState *s) { - int n, shift, e, p, is_neg, i; - uint64_t m, addend, carry; + int n, shift, e, p, is_neg; + uint64_t m, addend; if (s->state != SUM_PRECISE_STATE_FINITE) { switch(s->state) { default: - case SUM_PRECISE_STATE_MINUS_ZERO: - return -0.0; case SUM_PRECISE_STATE_INFINITY: return INFINITY; case SUM_PRECISE_STATE_MINUS_INFINITY: @@ -45602,38 +45586,53 @@ static double sum_precise_get_result(SumPreciseState *s) } } + sum_precise_renorm(s); + /* extract the sign and absolute value */ - n = s->n_limbs; - is_neg = s->acc[n - 1] >> 63; - if (is_neg) { - /* acc = -acc */ - carry = 1; - for(i = 0; i < n; i++) { - ADDC64(s->acc[i], carry, ~s->acc[i], 0, carry); - } - } - /* normalize */ - while (n > 0 && s->acc[n - 1] == 0) - n--; #if 0 { - printf("res="); - for(i = n - 1; i >= 0; i--) - printf(" %016lx", s->acc[i]); + int i; + printf("len=%d:", s->n_limbs); + for(i = s->n_limbs - 1; i >= 0; i--) + printf(" %014lx", s->acc[i]); printf("\n"); } #endif + n = s->n_limbs; + /* minus zero result */ + if (n == 0) + return -0.0; + + /* normalize */ + while (n > 0 && s->acc[n - 1] == 0) + n--; /* zero result. The spec tells it is always positive in the finite case */ if (n == 0) return 0.0; + is_neg = (s->acc[n - 1] < 0); + if (is_neg) { + uint64_t v, carry; + int i; + /* negate */ + /* XXX: do it only when needed */ + carry = 1; + for(i = 0; i < n - 1; i++) { + v = (((uint64_t)1 << SP_LIMB_BITS) - 1) - s->acc[i] + carry; + carry = v >> SP_LIMB_BITS; + s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1); + } + s->acc[n - 1] = -s->acc[n - 1] + carry - 1; + while (n > 1 && s->acc[n - 1] == 0) + n--; + } /* subnormal case */ if (n == 1 && s->acc[0] < ((uint64_t)1 << 52)) return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]); /* normal case */ - e = n * 64; + e = n * SP_LIMB_BITS; p = n - 1; m = s->acc[p]; - shift = clz64(m); + shift = clz64(m) - (64 - SP_LIMB_BITS); e = e - shift - 52; if (shift != 0) { m <<= shift; @@ -45641,12 +45640,12 @@ static double sum_precise_get_result(SumPreciseState *s) int shift1; uint64_t nz; p--; - shift1 = 64 - shift; + shift1 = SP_LIMB_BITS - shift; nz = s->acc[p] & (((uint64_t)1 << shift1) - 1); m = m | (s->acc[p] >> shift1) | (nz != 0); } } - if ((m & ((1 << 10) - 1)) == 0) { + if ((m & ((1 << SP_RND_BITS) - 1)) == (1 << (SP_RND_BITS - 1))) { /* see if the LSB part is non zero for the final rounding */ while (p > 0) { p--; @@ -45657,10 +45656,10 @@ static double sum_precise_get_result(SumPreciseState *s) } } /* rounding to nearest with ties to even */ - addend = (1 << 10) - 1 + ((m >> 11) & 1); - m = (m + addend) >> 11; + addend = (1 << (SP_RND_BITS - 1)) - 1 + ((m >> SP_RND_BITS) & 1); + m = (m + addend) >> SP_RND_BITS; /* handle overflow in the rounding */ - if (m == 0) + if (m == ((uint64_t)1 << 53)) e++; if (unlikely(e >= 2047)) { /* infinity */