simplified math.sumPrecise()

This commit is contained in:
Fabrice Bellard
2025-09-29 16:12:23 +02:00
parent 00608769df
commit c3e5ae2008

189
quickjs.c
View File

@@ -45458,47 +45458,59 @@ static JSValue js_math_clz32(JSContext *ctx, JSValueConst this_val,
return JS_NewInt32(ctx, r); return JS_NewInt32(ctx, r);
} }
/* we add one extra limb to avoid having to test for overflows during the sum */
#define SUM_PRECISE_ACC_LEN 34
typedef enum { typedef enum {
SUM_PRECISE_STATE_MINUS_ZERO,
SUM_PRECISE_STATE_FINITE, SUM_PRECISE_STATE_FINITE,
SUM_PRECISE_STATE_INFINITY, SUM_PRECISE_STATE_INFINITY,
SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */ SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */
SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */ SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */
} SumPreciseStateEnum; } SumPreciseStateEnum;
#define SP_LIMB_BITS 56
#define SP_RND_BITS (SP_LIMB_BITS - 53)
/* we add one extra limb to avoid having to test for overflows during the sum */
#define SUM_PRECISE_ACC_LEN 39
#define SUM_PRECISE_COUNTER_INIT 250
typedef struct { typedef struct {
uint64_t acc[SUM_PRECISE_ACC_LEN];
int n_limbs; /* acc is not necessarily normalized */
SumPreciseStateEnum state; SumPreciseStateEnum state;
uint32_t counter;
int n_limbs; /* 'acc' contains n_limbs and is not necessarily
acc[n_limb - 1] may be 0. 0 indicates minus zero
result when state = SUM_PRECISE_STATE_FINITE */
int64_t acc[SUM_PRECISE_ACC_LEN];
} SumPreciseState; } SumPreciseState;
static void sum_precise_init(SumPreciseState *s) static void sum_precise_init(SumPreciseState *s)
{ {
s->state = SUM_PRECISE_STATE_MINUS_ZERO; memset(s->acc, 0, sizeof(s->acc));
s->acc[0] = 0; s->state = SUM_PRECISE_STATE_FINITE;
s->n_limbs = 1; s->counter = SUM_PRECISE_COUNTER_INIT;
s->n_limbs = 0;
} }
#define ADDC64(res, carry_out, op1, op2, carry_in) \ static void sum_precise_renorm(SumPreciseState *s)
do { \ {
uint64_t __v, __a, __k, __k1; \ int64_t v, carry;
__v = (op1); \ int i;
__a = __v + (op2); \
__k1 = __a < __v; \ carry = 0;
__k = (carry_in); \ for(i = 0; i < s->n_limbs; i++) {
__a = __a + __k; \ v = s->acc[i] + carry;
carry_out = (__a < __k) | __k1; \ s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
res = __a; \ carry = v >> SP_LIMB_BITS;
} while (0) }
/* we add a failsafe but it should be never reached in a
reasonnable amount of time */
if (carry != 0 && s->n_limbs < SUM_PRECISE_ACC_LEN)
s->acc[s->n_limbs++] = carry;
}
static void sum_precise_add(SumPreciseState *s, double d) static void sum_precise_add(SumPreciseState *s, double d)
{ {
uint64_t a, m, a0, carry, acc_sign, a_sign; uint64_t a, m, a0, a1;
int sgn, e, p, n, i; int sgn, e, p;
unsigned shift; unsigned int shift;
a = float64_as_uint64(d); a = float64_as_uint64(d);
sgn = a >> 63; sgn = a >> 63;
@@ -45521,8 +45533,8 @@ static void sum_precise_add(SumPreciseState *s, double d)
} else if (e == 0) { } else if (e == 0) {
if (likely(m == 0)) { if (likely(m == 0)) {
/* zero */ /* zero */
if (s->state == SUM_PRECISE_STATE_MINUS_ZERO && !sgn) if (s->n_limbs == 0 && !sgn)
s->state = SUM_PRECISE_STATE_FINITE; s->n_limbs = 1;
} else { } else {
/* subnormal */ /* subnormal */
p = 0; p = 0;
@@ -45530,69 +45542,41 @@ static void sum_precise_add(SumPreciseState *s, double d)
goto add; goto add;
} }
} else { } else {
/* Note: we sum even if state != SUM_PRECISE_STATE_FINITE to
avoid tests */
m |= (uint64_t)1 << 52; m |= (uint64_t)1 << 52;
shift = e - 1; shift = e - 1;
p = shift / 64; /* 'p' is the position of a0 in acc. The division is normally
/* 'p' is the position of a0 in acc */ implementation as a multiplication by the compiler. */
shift %= 64; p = shift / SP_LIMB_BITS;
shift %= SP_LIMB_BITS;
add: add:
if (s->state >= SUM_PRECISE_STATE_INFINITY) a0 = (m << shift) & (((uint64_t)1 << SP_LIMB_BITS) - 1);
return; a1 = m >> (SP_LIMB_BITS - shift);
s->state = SUM_PRECISE_STATE_FINITE; if (!sgn) {
n = s->n_limbs; s->acc[p] += a0;
s->acc[p + 1] += a1;
acc_sign = (int64_t)s->acc[n - 1] >> 63;
/* sign extend acc */
for(i = n; i <= p; i++)
s->acc[i] = acc_sign;
carry = sgn;
a_sign = -sgn;
a0 = m << shift;
ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
if (shift >= 12) {
p++;
if (p >= n)
s->acc[p] = acc_sign;
a0 = m >> (64 - shift);
ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
}
p++;
if (p >= n) {
n = p;
} else { } else {
/* carry */ s->acc[p] -= a0;
for(i = p; i < n; i++) { s->acc[p + 1] -= a1;
/* if 'a' positive: stop condition: carry = 0.
if 'a' negative: stop condition: carry = 1. */
if (carry == sgn)
goto done;
ADDC64(s->acc[i], carry, s->acc[i], a_sign, carry);
}
} }
s->n_limbs = max_int(s->n_limbs, p + 2);
/* extend the accumulator if needed */
a0 = carry + acc_sign + a_sign; if (unlikely(--s->counter == 0)) {
/* -1 <= a0 <= 1 (if both acc and a are negative, carry is set) */ s->counter = SUM_PRECISE_COUNTER_INIT;
if (a0 != ((int64_t)s->acc[n - 1] >> 63)) { sum_precise_renorm(s);
s->acc[n++] = a0;
} }
done:
s->n_limbs = n;
} }
} }
static double sum_precise_get_result(SumPreciseState *s) static double sum_precise_get_result(SumPreciseState *s)
{ {
int n, shift, e, p, is_neg, i; int n, shift, e, p, is_neg;
uint64_t m, addend, carry; uint64_t m, addend;
if (s->state != SUM_PRECISE_STATE_FINITE) { if (s->state != SUM_PRECISE_STATE_FINITE) {
switch(s->state) { switch(s->state) {
default: default:
case SUM_PRECISE_STATE_MINUS_ZERO:
return -0.0;
case SUM_PRECISE_STATE_INFINITY: case SUM_PRECISE_STATE_INFINITY:
return INFINITY; return INFINITY;
case SUM_PRECISE_STATE_MINUS_INFINITY: case SUM_PRECISE_STATE_MINUS_INFINITY:
@@ -45602,38 +45586,53 @@ static double sum_precise_get_result(SumPreciseState *s)
} }
} }
sum_precise_renorm(s);
/* extract the sign and absolute value */ /* extract the sign and absolute value */
n = s->n_limbs;
is_neg = s->acc[n - 1] >> 63;
if (is_neg) {
/* acc = -acc */
carry = 1;
for(i = 0; i < n; i++) {
ADDC64(s->acc[i], carry, ~s->acc[i], 0, carry);
}
}
/* normalize */
while (n > 0 && s->acc[n - 1] == 0)
n--;
#if 0 #if 0
{ {
printf("res="); int i;
for(i = n - 1; i >= 0; i--) printf("len=%d:", s->n_limbs);
printf(" %016lx", s->acc[i]); for(i = s->n_limbs - 1; i >= 0; i--)
printf(" %014lx", s->acc[i]);
printf("\n"); printf("\n");
} }
#endif #endif
n = s->n_limbs;
/* minus zero result */
if (n == 0)
return -0.0;
/* normalize */
while (n > 0 && s->acc[n - 1] == 0)
n--;
/* zero result. The spec tells it is always positive in the finite case */ /* zero result. The spec tells it is always positive in the finite case */
if (n == 0) if (n == 0)
return 0.0; return 0.0;
is_neg = (s->acc[n - 1] < 0);
if (is_neg) {
uint64_t v, carry;
int i;
/* negate */
/* XXX: do it only when needed */
carry = 1;
for(i = 0; i < n - 1; i++) {
v = (((uint64_t)1 << SP_LIMB_BITS) - 1) - s->acc[i] + carry;
carry = v >> SP_LIMB_BITS;
s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
}
s->acc[n - 1] = -s->acc[n - 1] + carry - 1;
while (n > 1 && s->acc[n - 1] == 0)
n--;
}
/* subnormal case */ /* subnormal case */
if (n == 1 && s->acc[0] < ((uint64_t)1 << 52)) if (n == 1 && s->acc[0] < ((uint64_t)1 << 52))
return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]); return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]);
/* normal case */ /* normal case */
e = n * 64; e = n * SP_LIMB_BITS;
p = n - 1; p = n - 1;
m = s->acc[p]; m = s->acc[p];
shift = clz64(m); shift = clz64(m) - (64 - SP_LIMB_BITS);
e = e - shift - 52; e = e - shift - 52;
if (shift != 0) { if (shift != 0) {
m <<= shift; m <<= shift;
@@ -45641,12 +45640,12 @@ static double sum_precise_get_result(SumPreciseState *s)
int shift1; int shift1;
uint64_t nz; uint64_t nz;
p--; p--;
shift1 = 64 - shift; shift1 = SP_LIMB_BITS - shift;
nz = s->acc[p] & (((uint64_t)1 << shift1) - 1); nz = s->acc[p] & (((uint64_t)1 << shift1) - 1);
m = m | (s->acc[p] >> shift1) | (nz != 0); m = m | (s->acc[p] >> shift1) | (nz != 0);
} }
} }
if ((m & ((1 << 10) - 1)) == 0) { if ((m & ((1 << SP_RND_BITS) - 1)) == (1 << (SP_RND_BITS - 1))) {
/* see if the LSB part is non zero for the final rounding */ /* see if the LSB part is non zero for the final rounding */
while (p > 0) { while (p > 0) {
p--; p--;
@@ -45657,10 +45656,10 @@ static double sum_precise_get_result(SumPreciseState *s)
} }
} }
/* rounding to nearest with ties to even */ /* rounding to nearest with ties to even */
addend = (1 << 10) - 1 + ((m >> 11) & 1); addend = (1 << (SP_RND_BITS - 1)) - 1 + ((m >> SP_RND_BITS) & 1);
m = (m + addend) >> 11; m = (m + addend) >> SP_RND_BITS;
/* handle overflow in the rounding */ /* handle overflow in the rounding */
if (m == 0) if (m == ((uint64_t)1 << 53))
e++; e++;
if (unlikely(e >= 2047)) { if (unlikely(e >= 2047)) {
/* infinity */ /* infinity */