+ /* Now use the usual power series to evaluate exp(x). The */
+ /* series starts as 1 + x + x^2/2 ... so prime ready for the */
+ /* third term by setting the term variable t=x, the accumulator */
+ /* a=1, and the divisor d=2. */
+
+ /* First determine the working precision. From Hull & Abrham */
+ /* this is set->digits+h+2. However, if x is 'over-precise' we */
+ /* need to allow for all its digits to potentially participate */
+ /* (consider an x where all the excess digits are 9s) so in */
+ /* this case use x->digits+h+2 */
+ p=MAXI(x->digits, set->digits)+h+2; /* [h<=8] */
+
+ /* a and t are variable precision, and depend on p, so space */
+ /* must be allocated for them if necessary */
+
+ /* the accumulator needs to be able to hold 2p digits so that */
+ /* the additions on the second and subsequent iterations are */
+ /* sufficiently exact. */
+ needbytes=sizeof(decNumber)+(D2U(p*2)-1)*sizeof(Unit);
+ if (needbytes>sizeof(bufa)) { /* need malloc space */
+ allocbufa=(decNumber *)malloc(needbytes);
+ if (allocbufa==NULL) { /* hopeless -- abandon */
+ *status|=DEC_Insufficient_storage;
+ break;}
+ a=allocbufa; /* use the allocated space */
+ }
+ /* the term needs to be able to hold p digits (which is */
+ /* guaranteed to be larger than x->digits, so the initial copy */
+ /* is safe); it may also be used for the raise-to-power */
+ /* calculation below, which needs an extra two digits */
+ needbytes=sizeof(decNumber)+(D2U(p+2)-1)*sizeof(Unit);
+ if (needbytes>sizeof(buft)) { /* need malloc space */
+ allocbuft=(decNumber *)malloc(needbytes);
+ if (allocbuft==NULL) { /* hopeless -- abandon */
+ *status|=DEC_Insufficient_storage;
+ break;}
+ t=allocbuft; /* use the allocated space */
+ }
+
+ decNumberCopy(t, x); /* term=x */
+ decNumberZero(a); *a->lsu=1; /* accumulator=1 */
+ decNumberZero(d); *d->lsu=2; /* divisor=2 */
+ decNumberZero(&numone); *numone.lsu=1; /* constant 1 for increment */
+
+ /* set up the contexts for calculating a, t, and d */
+ decContextDefault(&tset, DEC_INIT_DECIMAL64);
+ dset=tset;
+ /* accumulator bounds are set above, set precision now */
+ aset.digits=p*2; /* double */
+ /* term bounds avoid any underflow or overflow */
+ tset.digits=p;
+ tset.emin=DEC_MIN_EMIN; /* [emax is plenty] */
+ /* [dset.digits=16, etc., are sufficient] */
+
+ /* finally ready to roll */
+ for (;;) {
+ #if DECCHECK
+ iterations++;
+ #endif
+ /* only the status from the accumulation is interesting */
+ /* [but it should remain unchanged after first add] */
+ decAddOp(a, a, t, &aset, 0, status); /* a=a+t */
+ decMultiplyOp(t, t, x, &tset, &ignore); /* t=t*x */
+ decDivideOp(t, t, d, &tset, DIVIDE, &ignore); /* t=t/d */
+ /* the iteration ends when the term cannot affect the result, */
+ /* if rounded to p digits, which is when its value is smaller */
+ /* than the accumulator by p+1 digits. There must also be */
+ /* full precision in a. */
+ if (((a->digits+a->exponent)>=(t->digits+t->exponent+p+1))
+ && (a->digits>=p)) break;
+ decAddOp(d, d, &numone, &dset, 0, &ignore); /* d=d+1 */
+ } /* iterate */
+
+ #if DECCHECK
+ /* just a sanity check; comment out test to show always */
+ if (iterations>p+3)
+ printf("Exp iterations=%ld, status=%08lx, p=%ld, d=%ld\n",
+ (LI)iterations, (LI)*status, (LI)p, (LI)x->digits);
+ #endif
+ } /* h<=8 */
+
+ /* apply postconditioning: a=a**(10**h) -- this is calculated */
+ /* at a slightly higher precision than Hull & Abrham suggest */
+ if (h>0) {
+ Int seenbit=0; /* set once a 1-bit is seen */
+ Int i; /* counter */
+ Int n=powers[h]; /* always positive */
+ aset.digits=p+2; /* sufficient precision */
+ /* avoid the overhead and many extra digits of decNumberPower */
+ /* as all that is needed is the short 'multipliers' loop; here */
+ /* accumulate the answer into t */
+ decNumberZero(t); *t->lsu=1; /* acc=1 */
+ for (i=1;;i++){ /* for each bit [top bit ignored] */
+ /* abandon if have had overflow or terminal underflow */
+ if (*status & (DEC_Overflow|DEC_Underflow)) { /* interesting? */
+ if (*status&DEC_Overflow || ISZERO(t)) break;}
+ n=n<<1; /* move next bit to testable position */
+ if (n<0) { /* top bit is set */
+ seenbit=1; /* OK, have a significant bit */
+ decMultiplyOp(t, t, a, &aset, status); /* acc=acc*x */
+ }
+ if (i==31) break; /* that was the last bit */
+ if (!seenbit) continue; /* no need to square 1 */
+ decMultiplyOp(t, t, t, &aset, status); /* acc=acc*acc [square] */
+ } /*i*/ /* 32 bits */
+ /* decNumberShow(t); */
+ a=t; /* and carry on using t instead of a */
+ }
+
+ /* Copy and round the result to res */
+ residue=1; /* indicate dirt to right .. */
+ if (ISZERO(a)) residue=0; /* .. unless underflowed to 0 */
+ aset.digits=set->digits; /* [use default rounding] */
+ decCopyFit(res, a, &aset, &residue, status); /* copy & shorten */
+ decFinish(res, set, &residue, status); /* cleanup/set flags */
+ } while(0); /* end protected */
+
+ free(allocrhs); /* drop any storage used */
+ free(allocbufa); /* .. */
+ free(allocbuft); /* .. */
+ /* [status is handled by caller] */
+ return res;
+ } /* decExpOp */
+
+/* ------------------------------------------------------------------ */
+/* Initial-estimate natural logarithm table */
+/* */
+/* LNnn -- 90-entry 16-bit table for values from .10 through .99. */
+/* The result is a 4-digit encode of the coefficient (c=the */
+/* top 14 bits encoding 0-9999) and a 2-digit encode of the */
+/* exponent (e=the bottom 2 bits encoding 0-3) */
+/* */
+/* The resulting value is given by: */
+/* */
+/* v = -c * 10**(-e-3) */
+/* */
+/* where e and c are extracted from entry k = LNnn[x-10] */
+/* where x is truncated (NB) into the range 10 through 99, */
+/* and then c = k>>2 and e = k&3. */
+/* ------------------------------------------------------------------ */
+const uShort LNnn[90]={9016, 8652, 8316, 8008, 7724, 7456, 7208,
+ 6972, 6748, 6540, 6340, 6148, 5968, 5792, 5628, 5464, 5312,
+ 5164, 5020, 4884, 4748, 4620, 4496, 4376, 4256, 4144, 4032,
+ 39233, 38181, 37157, 36157, 35181, 34229, 33297, 32389, 31501, 30629,
+ 29777, 28945, 28129, 27329, 26545, 25777, 25021, 24281, 23553, 22837,
+ 22137, 21445, 20769, 20101, 19445, 18801, 18165, 17541, 16925, 16321,
+ 15721, 15133, 14553, 13985, 13421, 12865, 12317, 11777, 11241, 10717,
+ 10197, 9685, 9177, 8677, 8185, 7697, 7213, 6737, 6269, 5801,
+ 5341, 4889, 4437, 39930, 35534, 31186, 26886, 22630, 18418, 14254,
+ 10130, 6046, 20055};
+
+/* ------------------------------------------------------------------ */
+/* decLnOp -- effect natural logarithm */
+/* */
+/* This computes C = ln(A) */
+/* */
+/* res is C, the result. C may be A */
+/* rhs is A */
+/* set is the context; note that rounding mode has no effect */
+/* */
+/* C must have space for set->digits digits. */
+/* */
+/* Notable cases: */
+/* A<0 -> Invalid */
+/* A=0 -> -Infinity (Exact) */
+/* A=+Infinity -> +Infinity (Exact) */
+/* A=1 exactly -> 0 (Exact) */
+/* */
+/* Restrictions (as for Exp): */
+/* */
+/* digits, emax, and -emin in the context must be less than */
+/* DEC_MAX_MATH+11 (1000010), and the rhs must be within these */
+/* bounds or a zero. This is an internal routine, so these */
+/* restrictions are contractual and not enforced. */
+/* */
+/* A finite result is rounded using DEC_ROUND_HALF_EVEN; it will */
+/* almost always be correctly rounded, but may be up to 1 ulp in */
+/* error in rare cases. */
+/* ------------------------------------------------------------------ */
+/* The result is calculated using Newton's method, with each */
+/* iteration calculating a' = a + x * exp(-a) - 1. See, for example, */
+/* Epperson 1989. */
+/* */
+/* The iteration ends when the adjustment x*exp(-a)-1 is tiny enough. */
+/* This has to be calculated at the sum of the precision of x and the */
+/* working precision. */
+/* */
+/* Implementation notes: */
+/* */
+/* 1. This is separated out as decLnOp so it can be called from */
+/* other Mathematical functions (e.g., Log 10) with a wider range */
+/* than normal. In particular, it can handle the slightly wider */
+/* (+9+2) range needed by a power function. */
+/* */
+/* 2. The speed of this function is about 10x slower than exp, as */
+/* it typically needs 4-6 iterations for short numbers, and the */
+/* extra precision needed adds a squaring effect, twice. */
+/* */
+/* 3. Fastpaths are included for ln(10) and ln(2), up to length 40, */
+/* as these are common requests. ln(10) is used by log10(x). */
+/* */
+/* 4. An iteration might be saved by widening the LNnn table, and */
+/* would certainly save at least one if it were made ten times */
+/* bigger, too (for truncated fractions 0.100 through 0.999). */
+/* However, for most practical evaluations, at least four or five */
+/* iterations will be neede -- so this would only speed up by */
+/* 20-25% and that probably does not justify increasing the table */
+/* size. */
+/* */
+/* 5. The static buffers are larger than might be expected to allow */
+/* for calls from decNumberPower. */
+/* ------------------------------------------------------------------ */
+decNumber * decLnOp(decNumber *res, const decNumber *rhs,
+ decContext *set, uInt *status) {
+ uInt ignore=0; /* working status accumulator */
+ uInt needbytes; /* for space calculations */
+ Int residue; /* rounding residue */
+ Int r; /* rhs=f*10**r [see below] */
+ Int p; /* working precision */
+ Int pp; /* precision for iteration */
+ Int t; /* work */
+
+ /* buffers for a (accumulator, typically precision+2) and b */
+ /* (adjustment calculator, same size) */
+ decNumber bufa[D2N(DECBUFFER+12)];
+ decNumber *allocbufa=NULL; /* -> allocated bufa, iff allocated */
+ decNumber *a=bufa; /* accumulator/work */
+ decNumber bufb[D2N(DECBUFFER*2+2)];
+ decNumber *allocbufb=NULL; /* -> allocated bufa, iff allocated */
+ decNumber *b=bufb; /* adjustment/work */
+
+ decNumber numone; /* constant 1 */
+ decNumber cmp; /* work */
+ decContext aset, bset; /* working contexts */
+
+ #if DECCHECK
+ Int iterations=0; /* for later sanity check */
+ if (decCheckOperands(res, DECUNUSED, rhs, set)) return res;
+ #endif
+
+ do { /* protect allocated storage */
+ if (SPECIALARG) { /* handle infinities and NaNs */
+ if (decNumberIsInfinite(rhs)) { /* an infinity */
+ if (decNumberIsNegative(rhs)) /* -Infinity -> error */
+ *status|=DEC_Invalid_operation;
+ else decNumberCopy(res, rhs); /* +Infinity -> self */
+ }
+ else decNaNs(res, rhs, NULL, set, status); /* a NaN */
+ break;}
+
+ if (ISZERO(rhs)) { /* +/- zeros -> -Infinity */
+ decNumberZero(res); /* make clean */
+ res->bits=DECINF|DECNEG; /* set - infinity */
+ break;} /* [no status to set] */
+
+ /* Non-zero negatives are bad... */
+ if (decNumberIsNegative(rhs)) { /* -x -> error */
+ *status|=DEC_Invalid_operation;
+ break;}
+
+ /* Here, rhs is positive, finite, and in range */
+
+ /* lookaside fastpath code for ln(2) and ln(10) at common lengths */
+ if (rhs->exponent==0 && set->digits<=40) {
+ #if DECDPUN==1
+ if (rhs->lsu[0]==0 && rhs->lsu[1]==1 && rhs->digits==2) { /* ln(10) */
+ #else
+ if (rhs->lsu[0]==10 && rhs->digits==2) { /* ln(10) */
+ #endif
+ aset=*set; aset.round=DEC_ROUND_HALF_EVEN;
+ #define LN10 "2.302585092994045684017991454684364207601"
+ decNumberFromString(res, LN10, &aset);
+ *status|=(DEC_Inexact | DEC_Rounded); /* is inexact */
+ break;}
+ if (rhs->lsu[0]==2 && rhs->digits==1) { /* ln(2) */
+ aset=*set; aset.round=DEC_ROUND_HALF_EVEN;
+ #define LN2 "0.6931471805599453094172321214581765680755"
+ decNumberFromString(res, LN2, &aset);
+ *status|=(DEC_Inexact | DEC_Rounded);
+ break;}
+ } /* integer and short */
+
+ /* Determine the working precision. This is normally the */
+ /* requested precision + 2, with a minimum of 9. However, if */
+ /* the rhs is 'over-precise' then allow for all its digits to */
+ /* potentially participate (consider an rhs where all the excess */
+ /* digits are 9s) so in this case use rhs->digits+2. */
+ p=MAXI(rhs->digits, MAXI(set->digits, 7))+2;
+
+ /* Allocate space for the accumulator and the high-precision */
+ /* adjustment calculator, if necessary. The accumulator must */
+ /* be able to hold p digits, and the adjustment up to */
+ /* rhs->digits+p digits. They are also made big enough for 16 */
+ /* digits so that they can be used for calculating the initial */
+ /* estimate. */
+ needbytes=sizeof(decNumber)+(D2U(MAXI(p,16))-1)*sizeof(Unit);
+ if (needbytes>sizeof(bufa)) { /* need malloc space */
+ allocbufa=(decNumber *)malloc(needbytes);
+ if (allocbufa==NULL) { /* hopeless -- abandon */
+ *status|=DEC_Insufficient_storage;
+ break;}
+ a=allocbufa; /* use the allocated space */
+ }
+ pp=p+rhs->digits;
+ needbytes=sizeof(decNumber)+(D2U(MAXI(pp,16))-1)*sizeof(Unit);
+ if (needbytes>sizeof(bufb)) { /* need malloc space */
+ allocbufb=(decNumber *)malloc(needbytes);
+ if (allocbufb==NULL) { /* hopeless -- abandon */
+ *status|=DEC_Insufficient_storage;
+ break;}
+ b=allocbufb; /* use the allocated space */
+ }
+
+ /* Prepare an initial estimate in acc. Calculate this by */
+ /* considering the coefficient of x to be a normalized fraction, */
+ /* f, with the decimal point at far left and multiplied by */
+ /* 10**r. Then, rhs=f*10**r and 0.1<=f<1, and */
+ /* ln(x) = ln(f) + ln(10)*r */
+ /* Get the initial estimate for ln(f) from a small lookup */
+ /* table (see above) indexed by the first two digits of f, */
+ /* truncated. */
+
+ decContextDefault(&aset, DEC_INIT_DECIMAL64); /* 16-digit extended */
+ r=rhs->exponent+rhs->digits; /* 'normalised' exponent */
+ decNumberFromInt32(a, r); /* a=r */
+ decNumberFromInt32(b, 2302585); /* b=ln(10) (2.302585) */
+ b->exponent=-6; /* .. */
+ decMultiplyOp(a, a, b, &aset, &ignore); /* a=a*b */
+ /* now get top two digits of rhs into b by simple truncate and */
+ /* force to integer */
+ residue=0; /* (no residue) */
+ aset.digits=2; aset.round=DEC_ROUND_DOWN;
+ decCopyFit(b, rhs, &aset, &residue, &ignore); /* copy & shorten */
+ b->exponent=0; /* make integer */
+ t=decGetInt(b); /* [cannot fail] */
+ if (t<10) t=X10(t); /* adjust single-digit b */
+ t=LNnn[t-10]; /* look up ln(b) */
+ decNumberFromInt32(b, t>>2); /* b=ln(b) coefficient */
+ b->exponent=-(t&3)-3; /* set exponent */
+ b->bits=DECNEG; /* ln(0.10)->ln(0.99) always -ve */
+ aset.digits=16; aset.round=DEC_ROUND_HALF_EVEN; /* restore */
+ decAddOp(a, a, b, &aset, 0, &ignore); /* acc=a+b */
+ /* the initial estimate is now in a, with up to 4 digits correct. */
+ /* When rhs is at or near Nmax the estimate will be low, so we */
+ /* will approach it from below, avoiding overflow when calling exp. */
+
+ decNumberZero(&numone); *numone.lsu=1; /* constant 1 for adjustment */
+
+ /* accumulator bounds are as requested (could underflow, but */
+ /* cannot overflow) */
+ aset.emax=set->emax;
+ aset.emin=set->emin;
+ aset.clamp=0; /* no concrete format */
+ /* set up a context to be used for the multiply and subtract */
+ bset=aset;
+ bset.emax=DEC_MAX_MATH*2; /* use double bounds for the */
+ bset.emin=-DEC_MAX_MATH*2; /* adjustment calculation */
+ /* [see decExpOp call below] */
+ /* for each iteration double the number of digits to calculate, */
+ /* up to a maximum of p */
+ pp=9; /* initial precision */
+ /* [initially 9 as then the sequence starts 7+2, 16+2, and */
+ /* 34+2, which is ideal for standard-sized numbers] */
+ aset.digits=pp; /* working context */
+ bset.digits=pp+rhs->digits; /* wider context */
+ for (;;) { /* iterate */
+ #if DECCHECK
+ iterations++;
+ if (iterations>24) break; /* consider 9 * 2**24 */
+ #endif
+ /* calculate the adjustment (exp(-a)*x-1) into b. This is a */
+ /* catastrophic subtraction but it really is the difference */
+ /* from 1 that is of interest. */
+ /* Use the internal entry point to Exp as it allows the double */
+ /* range for calculating exp(-a) when a is the tiniest subnormal. */
+ a->bits^=DECNEG; /* make -a */
+ decExpOp(b, a, &bset, &ignore); /* b=exp(-a) */
+ a->bits^=DECNEG; /* restore sign of a */
+ /* now multiply by rhs and subtract 1, at the wider precision */
+ decMultiplyOp(b, b, rhs, &bset, &ignore); /* b=b*rhs */
+ decAddOp(b, b, &numone, &bset, DECNEG, &ignore); /* b=b-1 */
+
+ /* the iteration ends when the adjustment cannot affect the */
+ /* result by >=0.5 ulp (at the requested digits), which */
+ /* is when its value is smaller than the accumulator by */
+ /* set->digits+1 digits (or it is zero) -- this is a looser */
+ /* requirement than for Exp because all that happens to the */
+ /* accumulator after this is the final rounding (but note that */
+ /* there must also be full precision in a, or a=0). */
+
+ if (decNumberIsZero(b) ||
+ (a->digits+a->exponent)>=(b->digits+b->exponent+set->digits+1)) {
+ if (a->digits==p) break;
+ if (decNumberIsZero(a)) {
+ decCompareOp(&cmp, rhs, &numone, &aset, COMPARE, &ignore); /* rhs=1 ? */
+ if (cmp.lsu[0]==0) a->exponent=0; /* yes, exact 0 */
+ else *status|=(DEC_Inexact | DEC_Rounded); /* no, inexact */
+ break;
+ }
+ /* force padding if adjustment has gone to 0 before full length */
+ if (decNumberIsZero(b)) b->exponent=a->exponent-p;
+ }
+
+ /* not done yet ... */
+ decAddOp(a, a, b, &aset, 0, &ignore); /* a=a+b for next estimate */
+ if (pp==p) continue; /* precision is at maximum */
+ /* lengthen the next calculation */
+ pp=pp*2; /* double precision */
+ if (pp>p) pp=p; /* clamp to maximum */
+ aset.digits=pp; /* working context */
+ bset.digits=pp+rhs->digits; /* wider context */
+ } /* Newton's iteration */
+
+ #if DECCHECK
+ /* just a sanity check; remove the test to show always */
+ if (iterations>24)
+ printf("Ln iterations=%ld, status=%08lx, p=%ld, d=%ld\n",
+ (LI)iterations, (LI)*status, (LI)p, (LI)rhs->digits);
+ #endif
+
+ /* Copy and round the result to res */
+ residue=1; /* indicate dirt to right */
+ if (ISZERO(a)) residue=0; /* .. unless underflowed to 0 */
+ aset.digits=set->digits; /* [use default rounding] */
+ decCopyFit(res, a, &aset, &residue, status); /* copy & shorten */
+ decFinish(res, set, &residue, status); /* cleanup/set flags */
+ } while(0); /* end protected */
+
+ free(allocbufa); /* drop any storage used */
+ free(allocbufb); /* .. */
+ /* [status is handled by caller] */