1 ------------------------------------------------------------------------------
3 -- GNAT COMPILER COMPONENTS --
5 -- S Y S T E M . F A T _ G E N --
11 -- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
13 -- GNAT is free software; you can redistribute it and/or modify it under --
14 -- terms of the GNU General Public License as published by the Free Soft- --
15 -- ware Foundation; either version 2, or (at your option) any later ver- --
16 -- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
17 -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
18 -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
19 -- for more details. You should have received a copy of the GNU General --
20 -- Public License distributed with GNAT; see file COPYING. If not, write --
21 -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
22 -- MA 02111-1307, USA. --
24 -- As a special exception, if other files instantiate generics from this --
25 -- unit, or you link this unit with other files to produce an executable, --
26 -- this unit does not by itself cause the resulting executable to be --
27 -- covered by the GNU General Public License. This exception does not --
28 -- however invalidate any other reasons why the executable file might be --
29 -- covered by the GNU Public License. --
31 -- GNAT was originally developed by the GNAT team at New York University. --
32 -- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
34 ------------------------------------------------------------------------------
36 -- The implementation here is portable to any IEEE implementation. It does
37 -- not handle non-binary radix, and also assumes that model numbers and
38 -- machine numbers are basically identical, which is not true of all possible
39 -- floating-point implementations. On a non-IEEE machine, this body must be
40 -- specialized appropriately, or better still, its generic instantiations
41 -- should be replaced by efficient machine-specific code.
43 with Ada.Unchecked_Conversion; use Ada;
45 package body System.Fat_Gen is
47 Float_Radix : constant T := T (T'Machine_Radix);
48 Float_Radix_Inv : constant T := 1.0 / Float_Radix;
49 Radix_To_M_Minus_1 : constant T := Float_Radix ** (T'Machine_Mantissa - 1);
51 pragma Assert (T'Machine_Radix = 2);
52 -- This version does not handle radix 16
54 -- Constants for Decompose and Scaling
56 Rad : constant T := T (T'Machine_Radix);
57 Invrad : constant T := 1.0 / Rad;
59 subtype Expbits is Integer range 0 .. 6;
60 -- 2 ** (2 ** 7) might overflow. how big can radix-16 exponents get?
62 Log_Power : constant array (Expbits) of Integer := (1, 2, 4, 8, 16, 32, 64);
64 R_Power : constant array (Expbits) of T :=
73 R_Neg_Power : constant array (Expbits) of T :=
82 -----------------------
83 -- Local Subprograms --
84 -----------------------
86 procedure Decompose (XX : T; Frac : out T; Expo : out UI);
87 -- Decomposes a floating-point number into fraction and exponent parts
89 function Gradual_Scaling (Adjustment : UI) return T;
90 -- Like Scaling with a first argument of 1.0, but returns the smallest
91 -- denormal rather than zero when the adjustment is smaller than
92 -- Machine_Emin. Used for Succ and Pred.
98 function Adjacent (X, Towards : T) return T is
103 elsif Towards > X then
115 function Ceiling (X : T) return T is
116 XT : constant T := Truncation (X);
134 function Compose (Fraction : T; Exponent : UI) return T is
139 Decompose (Fraction, Arg_Frac, Arg_Exp);
140 return Scaling (Arg_Frac, Exponent);
147 function Copy_Sign (Value, Sign : T) return T is
150 function Is_Negative (V : T) return Boolean;
151 pragma Import (Intrinsic, Is_Negative);
156 if Is_Negative (Sign) then
167 procedure Decompose (XX : T; Frac : out T; Expo : out UI) is
168 X : T := T'Machine (XX);
175 -- More useful would be defining Expo to be T'Machine_Emin - 1 or
176 -- T'Machine_Emin - T'Machine_Mantissa, which would preserve
177 -- monotonicity of the exponent function ???
179 -- Check for infinities, transfinites, whatnot.
181 elsif X > T'Safe_Last then
183 Expo := T'Machine_Emax + 1;
185 elsif X < T'Safe_First then
187 Expo := T'Machine_Emax + 2; -- how many extra negative values?
190 -- Case of nonzero finite x. Essentially, we just multiply
191 -- by Rad ** (+-2**N) to reduce the range.
197 -- Ax * Rad ** Ex is invariant.
201 while Ax >= R_Power (Expbits'Last) loop
202 Ax := Ax * R_Neg_Power (Expbits'Last);
203 Ex := Ex + Log_Power (Expbits'Last);
208 for N in reverse Expbits'First .. Expbits'Last - 1 loop
209 if Ax >= R_Power (N) then
210 Ax := Ax * R_Neg_Power (N);
211 Ex := Ex + Log_Power (N);
225 while Ax < R_Neg_Power (Expbits'Last) loop
226 Ax := Ax * R_Power (Expbits'Last);
227 Ex := Ex - Log_Power (Expbits'Last);
230 -- Rad ** -64 <= Ax < 1
232 for N in reverse Expbits'First .. Expbits'Last - 1 loop
233 if Ax < R_Neg_Power (N) then
234 Ax := Ax * R_Power (N);
235 Ex := Ex - Log_Power (N);
238 -- R_Neg_Power (N) <= Ax < 1
257 function Exponent (X : T) return UI is
262 Decompose (X, X_Frac, X_Exp);
270 function Floor (X : T) return T is
271 XT : constant T := Truncation (X);
289 function Fraction (X : T) return T is
294 Decompose (X, X_Frac, X_Exp);
298 ---------------------
299 -- Gradual_Scaling --
300 ---------------------
302 function Gradual_Scaling (Adjustment : UI) return T is
305 Ex : UI := Adjustment;
308 if Adjustment < T'Machine_Emin then
309 Y := 2.0 ** T'Machine_Emin;
311 Ex := Ex - T'Machine_Emin;
314 Y := T'Machine (Y / 2.0);
327 return Scaling (1.0, Adjustment);
335 function Leading_Part (X : T; Radix_Digits : UI) return T is
340 if Radix_Digits >= T'Machine_Mantissa then
344 L := Exponent (X) - Radix_Digits;
345 Y := Truncation (Scaling (X, -L));
356 -- The trick with Machine is to force the compiler to store the result
357 -- in memory so that we do not have extra precision used. The compiler
358 -- is clever, so we have to outwit its possible optimizations! We do
359 -- this by using an intermediate pragma Volatile location.
361 function Machine (X : T) return T is
363 pragma Volatile (Temp);
374 -- We treat Model as identical to Machine. This is true of IEEE and other
375 -- nice floating-point systems, but not necessarily true of all systems.
377 function Model (X : T) return T is
386 -- Subtract from the given number a number equivalent to the value of its
387 -- least significant bit. Given that the most significant bit represents
388 -- a value of 1.0 * radix ** (exp - 1), the value we want is obtained by
389 -- shifting this by (mantissa-1) bits to the right, i.e. decreasing the
390 -- exponent by that amount.
392 -- Zero has to be treated specially, since its exponent is zero
394 function Pred (X : T) return T is
403 Decompose (X, X_Frac, X_Exp);
405 -- A special case, if the number we had was a positive power of
406 -- two, then we want to subtract half of what we would otherwise
407 -- subtract, since the exponent is going to be reduced.
409 if X_Frac = 0.5 and then X > 0.0 then
410 return X - Gradual_Scaling (X_Exp - T'Machine_Mantissa - 1);
412 -- Otherwise the exponent stays the same
415 return X - Gradual_Scaling (X_Exp - T'Machine_Mantissa);
424 function Remainder (X, Y : T) return T is
452 P_Exp := Exponent (P);
455 Decompose (Arg, Arg_Frac, Arg_Exp);
456 Decompose (P, P_Frac, P_Exp);
458 P := Compose (P_Frac, Arg_Exp);
459 K := Arg_Exp - P_Exp;
463 for Cnt in reverse 0 .. K loop
464 if IEEE_Rem >= P then
466 IEEE_Rem := IEEE_Rem - P;
475 -- That completes the calculation of modulus remainder. The final
476 -- step is get the IEEE remainder. Here we need to compare Rem with
477 -- (abs Y) / 2. We must be careful of unrepresentable Y/2 value
478 -- caused by subnormal numbers
489 if A > B or else (A = B and then not P_Even) then
490 IEEE_Rem := IEEE_Rem - abs Y;
493 return Sign_X * IEEE_Rem;
501 function Rounding (X : T) return T is
506 Result := Truncation (abs X);
507 Tail := abs X - Result;
510 Result := Result + 1.0;
519 -- For zero case, make sure sign of zero is preserved
531 -- Return x * rad ** adjustment quickly,
532 -- or quietly underflow to zero, or overflow naturally.
534 function Scaling (X : T; Adjustment : UI) return T is
536 if X = 0.0 or else Adjustment = 0 then
540 -- Nonzero x. essentially, just multiply repeatedly by Rad ** (+-2**n).
544 Ex : UI := Adjustment;
546 -- Y * Rad ** Ex is invariant
550 while Ex <= -Log_Power (Expbits'Last) loop
551 Y := Y * R_Neg_Power (Expbits'Last);
552 Ex := Ex + Log_Power (Expbits'Last);
557 for N in reverse Expbits'First .. Expbits'Last - 1 loop
558 if Ex <= -Log_Power (N) then
559 Y := Y * R_Neg_Power (N);
560 Ex := Ex + Log_Power (N);
563 -- -Log_Power (N) < Ex <= 0
571 while Ex >= Log_Power (Expbits'Last) loop
572 Y := Y * R_Power (Expbits'Last);
573 Ex := Ex - Log_Power (Expbits'Last);
578 for N in reverse Expbits'First .. Expbits'Last - 1 loop
579 if Ex >= Log_Power (N) then
580 Y := Y * R_Power (N);
581 Ex := Ex - Log_Power (N);
584 -- 0 <= Ex < Log_Power (N)
597 -- Similar computation to that of Pred: find value of least significant
598 -- bit of given number, and add. Zero has to be treated specially since
599 -- the exponent can be zero, and also we want the smallest denormal if
600 -- denormals are supported.
602 function Succ (X : T) return T is
609 X1 := 2.0 ** T'Machine_Emin;
611 -- Following loop generates smallest denormal
614 X2 := T'Machine (X1 / 2.0);
622 Decompose (X, X_Frac, X_Exp);
624 -- A special case, if the number we had was a negative power of
625 -- two, then we want to add half of what we would otherwise add,
626 -- since the exponent is going to be reduced.
628 if X_Frac = 0.5 and then X < 0.0 then
629 return X + Gradual_Scaling (X_Exp - T'Machine_Mantissa - 1);
631 -- Otherwise the exponent stays the same
634 return X + Gradual_Scaling (X_Exp - T'Machine_Mantissa);
643 -- The basic approach is to compute
645 -- T'Machine (RM1 + N) - RM1.
647 -- where N >= 0.0 and RM1 = radix ** (mantissa - 1)
649 -- This works provided that the intermediate result (RM1 + N) does not
650 -- have extra precision (which is why we call Machine). When we compute
651 -- RM1 + N, the exponent of N will be normalized and the mantissa shifted
652 -- shifted appropriately so the lower order bits, which cannot contribute
653 -- to the integer part of N, fall off on the right. When we subtract RM1
654 -- again, the significant bits of N are shifted to the left, and what we
655 -- have is an integer, because only the first e bits are different from
656 -- zero (assuming binary radix here).
658 function Truncation (X : T) return T is
664 if Result >= Radix_To_M_Minus_1 then
668 Result := Machine (Radix_To_M_Minus_1 + Result) - Radix_To_M_Minus_1;
670 if Result > abs X then
671 Result := Result - 1.0;
680 -- For zero case, make sure sign of zero is preserved
689 -----------------------
690 -- Unbiased_Rounding --
691 -----------------------
693 function Unbiased_Rounding (X : T) return T is
694 Abs_X : constant T := abs X;
699 Result := Truncation (Abs_X);
700 Tail := Abs_X - Result;
703 Result := Result + 1.0;
705 elsif Tail = 0.5 then
706 Result := 2.0 * Truncation ((Result / 2.0) + 0.5);
715 -- For zero case, make sure sign of zero is preserved
721 end Unbiased_Rounding;
727 function Valid (X : access T) return Boolean is
729 IEEE_Emin : constant Integer := T'Machine_Emin - 1;
730 IEEE_Emax : constant Integer := T'Machine_Emax - 1;
732 IEEE_Bias : constant Integer := -(IEEE_Emin - 1);
734 subtype IEEE_Exponent_Range is
735 Integer range IEEE_Emin - 1 .. IEEE_Emax + 1;
737 -- The implementation of this floating point attribute uses
738 -- a representation type Float_Rep that allows direct access to
739 -- the exponent and mantissa parts of a floating point number.
741 -- The Float_Rep type is an array of Float_Word elements. This
742 -- representation is chosen to make it possible to size the
743 -- type based on a generic parameter.
745 -- The following conditions must be met for all possible
746 -- instantiations of the attributes package:
748 -- - T'Size is an integral multiple of Float_Word'Size
750 -- - The exponent and sign are completely contained in a single
751 -- component of Float_Rep, named Most_Significant_Word (MSW).
753 -- - The sign occupies the most significant bit of the MSW
754 -- and the exponent is in the following bits.
755 -- Unused bits (if any) are in the least significant part.
757 type Float_Word is mod 2**32;
758 type Rep_Index is range 0 .. 7;
760 Rep_Last : constant Rep_Index := (T'Size - 1) / Float_Word'Size;
762 type Float_Rep is array (Rep_Index range 0 .. Rep_Last) of Float_Word;
764 Most_Significant_Word : constant Rep_Index :=
765 Rep_Last * Standard'Default_Bit_Order;
766 -- Finding the location of the Exponent_Word is a bit tricky.
767 -- In general we assume Word_Order = Bit_Order.
768 -- This expression needs to be refined for VMS.
770 Exponent_Factor : constant Float_Word :=
771 2**(Float_Word'Size - 1) /
772 Float_Word (IEEE_Emax - IEEE_Emin + 3) *
773 Boolean'Pos (T'Size /= 96) +
774 Boolean'Pos (T'Size = 96);
775 -- Factor that the extracted exponent needs to be divided by
776 -- to be in range 0 .. IEEE_Emax - IEEE_Emin + 2.
777 -- Special kludge: Exponent_Factor is 0 for x86 double extended
778 -- as GCC adds 16 unused bits to the type.
780 Exponent_Mask : constant Float_Word :=
781 Float_Word (IEEE_Emax - IEEE_Emin + 2) *
783 -- Value needed to mask out the exponent field.
784 -- This assumes that the range IEEE_Emin - 1 .. IEEE_Emax + 1
785 -- contains 2**N values, for some N in Natural.
787 function To_Float is new Unchecked_Conversion (Float_Rep, T);
789 type Float_Access is access all T;
790 function To_Address is
791 new Unchecked_Conversion (Float_Access, System.Address);
793 XA : constant System.Address := To_Address (Float_Access (X));
796 pragma Import (Ada, R);
797 for R'Address use XA;
798 -- R is a view of the input floating-point parameter. Note that we
799 -- must avoid copying the actual bits of this parameter in float
800 -- form (since it may be a signalling NaN.
802 E : constant IEEE_Exponent_Range :=
803 Integer ((R (Most_Significant_Word) and Exponent_Mask) /
806 -- Mask/Shift T to only get bits from the exponent
807 -- Then convert biased value to integer value.
810 -- Float_Rep representation of significant of X.all
815 -- All denormalized numbers are valid, so only invalid numbers
816 -- are overflows and NaN's, both with exponent = Emax + 1.
818 return E /= IEEE_Emax + 1;
822 -- All denormalized numbers except 0.0 are invalid
824 -- Set exponent of X to zero, so we end up with the significand, which
825 -- definitely is a valid number and can be converted back to a float.
828 SR (Most_Significant_Word) :=
829 (SR (Most_Significant_Word)
830 and not Exponent_Mask) + Float_Word (IEEE_Bias) * Exponent_Factor;
832 return (E in IEEE_Emin .. IEEE_Emax) or else
833 ((E = IEEE_Emin - 1) and then abs To_Float (SR) = 1.0);