mirror of
https://github.com/bellard/quickjs.git
synced 2025-11-15 10:12:14 +03:00
simplified math.sumPrecise()
This commit is contained in:
189
quickjs.c
189
quickjs.c
@@ -45458,47 +45458,59 @@ static JSValue js_math_clz32(JSContext *ctx, JSValueConst this_val,
|
|||||||
return JS_NewInt32(ctx, r);
|
return JS_NewInt32(ctx, r);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* we add one extra limb to avoid having to test for overflows during the sum */
|
|
||||||
#define SUM_PRECISE_ACC_LEN 34
|
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
SUM_PRECISE_STATE_MINUS_ZERO,
|
|
||||||
SUM_PRECISE_STATE_FINITE,
|
SUM_PRECISE_STATE_FINITE,
|
||||||
SUM_PRECISE_STATE_INFINITY,
|
SUM_PRECISE_STATE_INFINITY,
|
||||||
SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */
|
SUM_PRECISE_STATE_MINUS_INFINITY, /* must be after SUM_PRECISE_STATE_INFINITY */
|
||||||
SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */
|
SUM_PRECISE_STATE_NAN, /* must be after SUM_PRECISE_STATE_MINUS_INFINITY */
|
||||||
} SumPreciseStateEnum;
|
} SumPreciseStateEnum;
|
||||||
|
|
||||||
|
#define SP_LIMB_BITS 56
|
||||||
|
#define SP_RND_BITS (SP_LIMB_BITS - 53)
|
||||||
|
/* we add one extra limb to avoid having to test for overflows during the sum */
|
||||||
|
#define SUM_PRECISE_ACC_LEN 39
|
||||||
|
|
||||||
|
#define SUM_PRECISE_COUNTER_INIT 250
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
uint64_t acc[SUM_PRECISE_ACC_LEN];
|
|
||||||
int n_limbs; /* acc is not necessarily normalized */
|
|
||||||
SumPreciseStateEnum state;
|
SumPreciseStateEnum state;
|
||||||
|
uint32_t counter;
|
||||||
|
int n_limbs; /* 'acc' contains n_limbs and is not necessarily
|
||||||
|
acc[n_limb - 1] may be 0. 0 indicates minus zero
|
||||||
|
result when state = SUM_PRECISE_STATE_FINITE */
|
||||||
|
int64_t acc[SUM_PRECISE_ACC_LEN];
|
||||||
} SumPreciseState;
|
} SumPreciseState;
|
||||||
|
|
||||||
static void sum_precise_init(SumPreciseState *s)
|
static void sum_precise_init(SumPreciseState *s)
|
||||||
{
|
{
|
||||||
s->state = SUM_PRECISE_STATE_MINUS_ZERO;
|
memset(s->acc, 0, sizeof(s->acc));
|
||||||
s->acc[0] = 0;
|
s->state = SUM_PRECISE_STATE_FINITE;
|
||||||
s->n_limbs = 1;
|
s->counter = SUM_PRECISE_COUNTER_INIT;
|
||||||
|
s->n_limbs = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ADDC64(res, carry_out, op1, op2, carry_in) \
|
static void sum_precise_renorm(SumPreciseState *s)
|
||||||
do { \
|
{
|
||||||
uint64_t __v, __a, __k, __k1; \
|
int64_t v, carry;
|
||||||
__v = (op1); \
|
int i;
|
||||||
__a = __v + (op2); \
|
|
||||||
__k1 = __a < __v; \
|
carry = 0;
|
||||||
__k = (carry_in); \
|
for(i = 0; i < s->n_limbs; i++) {
|
||||||
__a = __a + __k; \
|
v = s->acc[i] + carry;
|
||||||
carry_out = (__a < __k) | __k1; \
|
s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
|
||||||
res = __a; \
|
carry = v >> SP_LIMB_BITS;
|
||||||
} while (0)
|
}
|
||||||
|
/* we add a failsafe but it should be never reached in a
|
||||||
|
reasonnable amount of time */
|
||||||
|
if (carry != 0 && s->n_limbs < SUM_PRECISE_ACC_LEN)
|
||||||
|
s->acc[s->n_limbs++] = carry;
|
||||||
|
}
|
||||||
|
|
||||||
static void sum_precise_add(SumPreciseState *s, double d)
|
static void sum_precise_add(SumPreciseState *s, double d)
|
||||||
{
|
{
|
||||||
uint64_t a, m, a0, carry, acc_sign, a_sign;
|
uint64_t a, m, a0, a1;
|
||||||
int sgn, e, p, n, i;
|
int sgn, e, p;
|
||||||
unsigned shift;
|
unsigned int shift;
|
||||||
|
|
||||||
a = float64_as_uint64(d);
|
a = float64_as_uint64(d);
|
||||||
sgn = a >> 63;
|
sgn = a >> 63;
|
||||||
@@ -45521,8 +45533,8 @@ static void sum_precise_add(SumPreciseState *s, double d)
|
|||||||
} else if (e == 0) {
|
} else if (e == 0) {
|
||||||
if (likely(m == 0)) {
|
if (likely(m == 0)) {
|
||||||
/* zero */
|
/* zero */
|
||||||
if (s->state == SUM_PRECISE_STATE_MINUS_ZERO && !sgn)
|
if (s->n_limbs == 0 && !sgn)
|
||||||
s->state = SUM_PRECISE_STATE_FINITE;
|
s->n_limbs = 1;
|
||||||
} else {
|
} else {
|
||||||
/* subnormal */
|
/* subnormal */
|
||||||
p = 0;
|
p = 0;
|
||||||
@@ -45530,69 +45542,41 @@ static void sum_precise_add(SumPreciseState *s, double d)
|
|||||||
goto add;
|
goto add;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
/* Note: we sum even if state != SUM_PRECISE_STATE_FINITE to
|
||||||
|
avoid tests */
|
||||||
m |= (uint64_t)1 << 52;
|
m |= (uint64_t)1 << 52;
|
||||||
shift = e - 1;
|
shift = e - 1;
|
||||||
p = shift / 64;
|
/* 'p' is the position of a0 in acc. The division is normally
|
||||||
/* 'p' is the position of a0 in acc */
|
implementation as a multiplication by the compiler. */
|
||||||
shift %= 64;
|
p = shift / SP_LIMB_BITS;
|
||||||
|
shift %= SP_LIMB_BITS;
|
||||||
add:
|
add:
|
||||||
if (s->state >= SUM_PRECISE_STATE_INFINITY)
|
a0 = (m << shift) & (((uint64_t)1 << SP_LIMB_BITS) - 1);
|
||||||
return;
|
a1 = m >> (SP_LIMB_BITS - shift);
|
||||||
s->state = SUM_PRECISE_STATE_FINITE;
|
if (!sgn) {
|
||||||
n = s->n_limbs;
|
s->acc[p] += a0;
|
||||||
|
s->acc[p + 1] += a1;
|
||||||
acc_sign = (int64_t)s->acc[n - 1] >> 63;
|
|
||||||
|
|
||||||
/* sign extend acc */
|
|
||||||
for(i = n; i <= p; i++)
|
|
||||||
s->acc[i] = acc_sign;
|
|
||||||
|
|
||||||
carry = sgn;
|
|
||||||
a_sign = -sgn;
|
|
||||||
a0 = m << shift;
|
|
||||||
ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
|
|
||||||
if (shift >= 12) {
|
|
||||||
p++;
|
|
||||||
if (p >= n)
|
|
||||||
s->acc[p] = acc_sign;
|
|
||||||
a0 = m >> (64 - shift);
|
|
||||||
ADDC64(s->acc[p], carry, s->acc[p], a0 ^ a_sign, carry);
|
|
||||||
}
|
|
||||||
p++;
|
|
||||||
if (p >= n) {
|
|
||||||
n = p;
|
|
||||||
} else {
|
} else {
|
||||||
/* carry */
|
s->acc[p] -= a0;
|
||||||
for(i = p; i < n; i++) {
|
s->acc[p + 1] -= a1;
|
||||||
/* if 'a' positive: stop condition: carry = 0.
|
|
||||||
if 'a' negative: stop condition: carry = 1. */
|
|
||||||
if (carry == sgn)
|
|
||||||
goto done;
|
|
||||||
ADDC64(s->acc[i], carry, s->acc[i], a_sign, carry);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
s->n_limbs = max_int(s->n_limbs, p + 2);
|
||||||
/* extend the accumulator if needed */
|
|
||||||
a0 = carry + acc_sign + a_sign;
|
if (unlikely(--s->counter == 0)) {
|
||||||
/* -1 <= a0 <= 1 (if both acc and a are negative, carry is set) */
|
s->counter = SUM_PRECISE_COUNTER_INIT;
|
||||||
if (a0 != ((int64_t)s->acc[n - 1] >> 63)) {
|
sum_precise_renorm(s);
|
||||||
s->acc[n++] = a0;
|
|
||||||
}
|
}
|
||||||
done:
|
|
||||||
s->n_limbs = n;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static double sum_precise_get_result(SumPreciseState *s)
|
static double sum_precise_get_result(SumPreciseState *s)
|
||||||
{
|
{
|
||||||
int n, shift, e, p, is_neg, i;
|
int n, shift, e, p, is_neg;
|
||||||
uint64_t m, addend, carry;
|
uint64_t m, addend;
|
||||||
|
|
||||||
if (s->state != SUM_PRECISE_STATE_FINITE) {
|
if (s->state != SUM_PRECISE_STATE_FINITE) {
|
||||||
switch(s->state) {
|
switch(s->state) {
|
||||||
default:
|
default:
|
||||||
case SUM_PRECISE_STATE_MINUS_ZERO:
|
|
||||||
return -0.0;
|
|
||||||
case SUM_PRECISE_STATE_INFINITY:
|
case SUM_PRECISE_STATE_INFINITY:
|
||||||
return INFINITY;
|
return INFINITY;
|
||||||
case SUM_PRECISE_STATE_MINUS_INFINITY:
|
case SUM_PRECISE_STATE_MINUS_INFINITY:
|
||||||
@@ -45602,38 +45586,53 @@ static double sum_precise_get_result(SumPreciseState *s)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sum_precise_renorm(s);
|
||||||
|
|
||||||
/* extract the sign and absolute value */
|
/* extract the sign and absolute value */
|
||||||
n = s->n_limbs;
|
|
||||||
is_neg = s->acc[n - 1] >> 63;
|
|
||||||
if (is_neg) {
|
|
||||||
/* acc = -acc */
|
|
||||||
carry = 1;
|
|
||||||
for(i = 0; i < n; i++) {
|
|
||||||
ADDC64(s->acc[i], carry, ~s->acc[i], 0, carry);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* normalize */
|
|
||||||
while (n > 0 && s->acc[n - 1] == 0)
|
|
||||||
n--;
|
|
||||||
#if 0
|
#if 0
|
||||||
{
|
{
|
||||||
printf("res=");
|
int i;
|
||||||
for(i = n - 1; i >= 0; i--)
|
printf("len=%d:", s->n_limbs);
|
||||||
printf(" %016lx", s->acc[i]);
|
for(i = s->n_limbs - 1; i >= 0; i--)
|
||||||
|
printf(" %014lx", s->acc[i]);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
n = s->n_limbs;
|
||||||
|
/* minus zero result */
|
||||||
|
if (n == 0)
|
||||||
|
return -0.0;
|
||||||
|
|
||||||
|
/* normalize */
|
||||||
|
while (n > 0 && s->acc[n - 1] == 0)
|
||||||
|
n--;
|
||||||
/* zero result. The spec tells it is always positive in the finite case */
|
/* zero result. The spec tells it is always positive in the finite case */
|
||||||
if (n == 0)
|
if (n == 0)
|
||||||
return 0.0;
|
return 0.0;
|
||||||
|
is_neg = (s->acc[n - 1] < 0);
|
||||||
|
if (is_neg) {
|
||||||
|
uint64_t v, carry;
|
||||||
|
int i;
|
||||||
|
/* negate */
|
||||||
|
/* XXX: do it only when needed */
|
||||||
|
carry = 1;
|
||||||
|
for(i = 0; i < n - 1; i++) {
|
||||||
|
v = (((uint64_t)1 << SP_LIMB_BITS) - 1) - s->acc[i] + carry;
|
||||||
|
carry = v >> SP_LIMB_BITS;
|
||||||
|
s->acc[i] = v & (((uint64_t)1 << SP_LIMB_BITS) - 1);
|
||||||
|
}
|
||||||
|
s->acc[n - 1] = -s->acc[n - 1] + carry - 1;
|
||||||
|
while (n > 1 && s->acc[n - 1] == 0)
|
||||||
|
n--;
|
||||||
|
}
|
||||||
/* subnormal case */
|
/* subnormal case */
|
||||||
if (n == 1 && s->acc[0] < ((uint64_t)1 << 52))
|
if (n == 1 && s->acc[0] < ((uint64_t)1 << 52))
|
||||||
return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]);
|
return uint64_as_float64(((uint64_t)is_neg << 63) | s->acc[0]);
|
||||||
/* normal case */
|
/* normal case */
|
||||||
e = n * 64;
|
e = n * SP_LIMB_BITS;
|
||||||
p = n - 1;
|
p = n - 1;
|
||||||
m = s->acc[p];
|
m = s->acc[p];
|
||||||
shift = clz64(m);
|
shift = clz64(m) - (64 - SP_LIMB_BITS);
|
||||||
e = e - shift - 52;
|
e = e - shift - 52;
|
||||||
if (shift != 0) {
|
if (shift != 0) {
|
||||||
m <<= shift;
|
m <<= shift;
|
||||||
@@ -45641,12 +45640,12 @@ static double sum_precise_get_result(SumPreciseState *s)
|
|||||||
int shift1;
|
int shift1;
|
||||||
uint64_t nz;
|
uint64_t nz;
|
||||||
p--;
|
p--;
|
||||||
shift1 = 64 - shift;
|
shift1 = SP_LIMB_BITS - shift;
|
||||||
nz = s->acc[p] & (((uint64_t)1 << shift1) - 1);
|
nz = s->acc[p] & (((uint64_t)1 << shift1) - 1);
|
||||||
m = m | (s->acc[p] >> shift1) | (nz != 0);
|
m = m | (s->acc[p] >> shift1) | (nz != 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ((m & ((1 << 10) - 1)) == 0) {
|
if ((m & ((1 << SP_RND_BITS) - 1)) == (1 << (SP_RND_BITS - 1))) {
|
||||||
/* see if the LSB part is non zero for the final rounding */
|
/* see if the LSB part is non zero for the final rounding */
|
||||||
while (p > 0) {
|
while (p > 0) {
|
||||||
p--;
|
p--;
|
||||||
@@ -45657,10 +45656,10 @@ static double sum_precise_get_result(SumPreciseState *s)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* rounding to nearest with ties to even */
|
/* rounding to nearest with ties to even */
|
||||||
addend = (1 << 10) - 1 + ((m >> 11) & 1);
|
addend = (1 << (SP_RND_BITS - 1)) - 1 + ((m >> SP_RND_BITS) & 1);
|
||||||
m = (m + addend) >> 11;
|
m = (m + addend) >> SP_RND_BITS;
|
||||||
/* handle overflow in the rounding */
|
/* handle overflow in the rounding */
|
||||||
if (m == 0)
|
if (m == ((uint64_t)1 << 53))
|
||||||
e++;
|
e++;
|
||||||
if (unlikely(e >= 2047)) {
|
if (unlikely(e >= 2047)) {
|
||||||
/* infinity */
|
/* infinity */
|
||||||
|
|||||||
Reference in New Issue
Block a user