X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Fconfig%2Ffp-bit.c;h=1cdac6ebecac1708e9cad742eb72719ebdace5ec;hb=b5533320792a330994b507ade924b9092c91e106;hp=8819029aa09ad8597e0efdc11bb5bf778a389134;hpb=15ea643f430eece230a5c23dbda1e6757f936c6a;p=pf3gnuchains%2Fgcc-fork.git

diff --git a/gcc/config/fp-bit.c b/gcc/config/fp-bit.c
index 8819029aa09..1cdac6ebeca 100644
--- a/gcc/config/fp-bit.c
+++ b/gcc/config/fp-bit.c
@@ -1,37 +1,28 @@
-/* This is a software floating point library which can be used instead of
-   the floating point routines in libgcc1.c for targets without hardware
-   floating point.  */
-
-/* Copyright (C) 1994 Free Software Foundation, Inc.
-
-This file is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
-later version.
-
-In addition to the permissions in the GNU General Public License, the
-Free Software Foundation gives you unlimited permission to link the
-compiled version of this file with other programs, and to distribute
-those programs without any restriction coming from the use of this
-file.  (The General Public License restrictions do apply in other
-respects; for example, they cover modification of the file, and
-distribution when not linked into another program.)
-
-This file is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; see the file COPYING.  If not, write to
-the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
-
-/* As a special exception, if you link this library with other files,
-   some of which are compiled with GCC, to produce an executable,
-   this library does not by itself cause the resulting executable
-   to be covered by the GNU General Public License.
-   This exception does not however invalidate any other reasons why
-   the executable file might be covered by the GNU General Public License.  */
+/* This is a software floating point library which can be used
+   for targets without hardware floating point. 
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
+   2004, 2005, 2008, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
 
 /* This implements IEEE 754 format arithmetic, but does not provide a
    mechanism for setting the rounding mode, or for generating or handling
@@ -43,7 +34,12 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
 /* The intended way to use this file is to make two copies, add `#define FLOAT'
    to one copy, then compile both copies and add them to libgcc.a.  */
 
-/* The following macros can be defined to change the behaviour of this file:
+#include "tconfig.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "config/fp-bit.h"
+
+/* The following macros can be defined to change the behavior of this file:
    FLOAT: Implement a `float', aka SFmode, fp library.  If this is not
      defined, then this file implements a `double', aka DFmode, fp library.
    FLOAT_ONLY: Used with FLOAT, to implement a `float' only library, i.e.
@@ -53,214 +49,66 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
    CMPtype: Specify the type that floating point compares should return.
      This defaults to SItype, aka int.
    US_SOFTWARE_GOFAST: This makes all entry points use the same names as the
-     US Software goFast library.  If this is not defined, the entry points use
-     the same names as libgcc1.c.
+     US Software goFast library.
    _DEBUG_BITFLOAT: This makes debugging the code a little easier, by adding
-     two integers to the FLO_union_type.  
+     two integers to the FLO_union_type.
+   NO_DENORMALS: Disable handling of denormals.
    NO_NANS: Disable nan and infinity handling
    SMALL_MACHINE: Useful when operations on QIs and HIs are faster
      than on an SI */
 
-typedef SFtype __attribute__ ((mode (SF)));
-typedef DFtype __attribute__ ((mode (DF)));
-
-typedef int HItype __attribute__ ((mode (HI)));
-typedef int SItype __attribute__ ((mode (SI)));
-typedef int DItype __attribute__ ((mode (DI)));
-
-/* The type of the result of a fp compare */
-#ifndef CMPtype
-#define CMPtype SItype
-#endif
-
-typedef unsigned int UHItype __attribute__ ((mode (HI)));
-typedef unsigned int USItype __attribute__ ((mode (SI)));
-typedef unsigned int UDItype __attribute__ ((mode (DI)));
-
-#define MAX_SI_INT   ((SItype) ((unsigned) (~0)>>1))
-#define MAX_USI_INT  ((USItype) ~0)
-
-
-#ifdef FLOAT_ONLY
-#define NO_DI_MODE
-#endif
-
-#ifdef FLOAT
-#	define NGARDS    7L
-#	define GARDROUND 0x3f
-#	define GARDMASK  0x7f
-#	define GARDMSB   0x40
-#	define EXPBITS 8
-#	define EXPBIAS 127
-#	define FRACBITS 23
-#	define EXPMAX (0xff)
-#	define QUIET_NAN 0x100000L
-#	define FRAC_NBITS 32
-#	define FRACHIGH  0x80000000L
-#	define FRACHIGH2 0xc0000000L
-	typedef USItype fractype;
-	typedef UHItype halffractype;
-	typedef SFtype FLO_type;
-	typedef SItype intfrac;
+/* We don't currently support extended floats (long doubles) on machines
+   without hardware to deal with them.
 
-#else
-#	define PREFIXFPDP dp
-#	define PREFIXSFDF df
-#	define NGARDS 8L
-#	define GARDROUND 0x7f
-#	define GARDMASK  0xff
-#	define GARDMSB   0x80
-#	define EXPBITS 11
-#	define EXPBIAS 1023
-#	define FRACBITS 52
-#	define EXPMAX (0x7ff)
-#	define QUIET_NAN 0x8000000000000LL
-#	define FRAC_NBITS 64
-#	define FRACHIGH  0x8000000000000000LL
-#	define FRACHIGH2 0xc000000000000000LL
-	typedef UDItype fractype;
-	typedef USItype halffractype;
-	typedef DFtype FLO_type;
-	typedef DItype intfrac;
-#endif
+   These stubs are just to keep the linker from complaining about unresolved
+   references which can be pulled in from libio & libstdc++, even if the
+   user isn't using long doubles.  However, they may generate an unresolved
+   external to abort if abort is not used by the function, and the stubs
+   are referenced from within libc, since libgcc goes before and after the
+   system library.  */
 
-#ifdef US_SOFTWARE_GOFAST
-#	ifdef FLOAT
-#		define add 		fpadd
-#		define sub 		fpsub
-#		define multiply 	fpmul
-#		define divide 		fpdiv
-#		define compare 		fpcmp
-#		define si_to_float 	sitofp
-#		define float_to_si 	fptosi
-#		define float_to_usi 	fptoui
-#		define negate 		__negsf2
-#		define sf_to_df		fptodp
-#		define dptofp 		dptofp
-#else
-#		define add 		dpadd
-#		define sub 		dpsub
-#		define multiply 	dpmul
-#		define divide 		dpdiv
-#		define compare 		dpcmp
-#		define si_to_float 	litodp
-#		define float_to_si 	dptoli
-#		define float_to_usi 	dptoul
-#		define negate 		__negdf2
-#		define df_to_sf 	dptofp
-#endif
-#else
-#	ifdef FLOAT
-#		define add 		__addsf3
-#		define sub 		__subsf3
-#		define multiply 	__mulsf3
-#		define divide 		__divsf3
-#		define compare 		__cmpsf2
-#		define _eq_f2 		__eqsf2
-#		define _ne_f2 		__nesf2
-#		define _gt_f2 		__gtsf2
-#		define _ge_f2 		__gesf2
-#		define _lt_f2 		__ltsf2
-#		define _le_f2 		__lesf2
-#		define si_to_float 	__floatsisf
-#		define float_to_si 	__fixsfsi
-#		define float_to_usi 	__fixunssfsi
-#		define negate 		__negsf2
-#		define sf_to_df		__extendsfdf2
-#else
-#		define add 		__adddf3
-#		define sub 		__subdf3
-#		define multiply 	__muldf3
-#		define divide 		__divdf3
-#		define compare 		__cmpdf2
-#		define _eq_f2 		__eqdf2
-#		define _ne_f2 		__nedf2
-#		define _gt_f2 		__gtdf2
-#		define _ge_f2 		__gedf2
-#		define _lt_f2 		__ltdf2
-#		define _le_f2 		__ledf2
-#		define si_to_float 	__floatsidf
-#		define float_to_si 	__fixdfsi
-#		define float_to_usi 	__fixunsdfsi
-#		define negate 		__negdf2
-#		define df_to_sf		__truncdfsf2
-#	endif
+#ifdef DECLARE_LIBRARY_RENAMES
+  DECLARE_LIBRARY_RENAMES
 #endif
 
-
-#define INLINE __inline__
-
-/* Preserve the sticky-bit when shifting fractions to the right.  */
-#define LSHIFT(a) { a = (a & 1) | (a >> 1); }
-
-/* numeric parameters */
-/* F_D_BITOFF is the number of bits offset between the MSB of the mantissa
-   of a float and of a double. Assumes there are only two float types.
-   (double::FRAC_BITS+double::NGARGS-(float::FRAC_BITS-float::NGARDS))
- */
-#define F_D_BITOFF (52+8-(23+7))
-
-
-#define NORMAL_EXPMIN (-(EXPBIAS)+1)
-#define IMPLICIT_1 (1LL<<(FRACBITS+NGARDS))
-#define IMPLICIT_2 (1LL<<(FRACBITS+1+NGARDS))
-
-/* common types */
-
-typedef enum
-{
-  CLASS_SNAN,
-  CLASS_QNAN,
-  CLASS_ZERO,
-  CLASS_NUMBER,
-  CLASS_INFINITY
-} fp_class_type;
-
-typedef struct
-{
-#ifdef SMALL_MACHINE
-  char class;
-  unsigned char sign;
-  short normal_exp;
-#else
-  fp_class_type class;
-  unsigned int sign;
-  int normal_exp;
-#endif
-
-  union
-    {
-      fractype ll;
-      halffractype l[2];
-    } fraction;
-} fp_number_type;
-
-typedef
-union
-{
-  FLO_type value;
-#ifdef _DEBUG_BITFLOAT
-  int l[2];
-#endif
-  struct
-    {
-#ifndef FLOAT_BIT_ORDER_MISMATCH
-      unsigned int sign:1;
-      unsigned int exp:EXPBITS;
-      fractype fraction:FRACBITS;
-#else
-      fractype fraction:FRACBITS;
-      unsigned int exp:EXPBITS;
-      unsigned int sign:1;
-#endif
-    }
-  bits;
-}
-
-FLO_union_type;
-
-
-/* end of header */
+#ifdef EXTENDED_FLOAT_STUBS
+extern void abort (void);
+void __extendsfxf2 (void) { abort(); }
+void __extenddfxf2 (void) { abort(); }
+void __truncxfdf2 (void) { abort(); }
+void __truncxfsf2 (void) { abort(); }
+void __fixxfsi (void) { abort(); }
+void __floatsixf (void) { abort(); }
+void __addxf3 (void) { abort(); }
+void __subxf3 (void) { abort(); }
+void __mulxf3 (void) { abort(); }
+void __divxf3 (void) { abort(); }
+void __negxf2 (void) { abort(); }
+void __eqxf2 (void) { abort(); }
+void __nexf2 (void) { abort(); }
+void __gtxf2 (void) { abort(); }
+void __gexf2 (void) { abort(); }
+void __lexf2 (void) { abort(); }
+void __ltxf2 (void) { abort(); }
+
+void __extendsftf2 (void) { abort(); }
+void __extenddftf2 (void) { abort(); }
+void __trunctfdf2 (void) { abort(); }
+void __trunctfsf2 (void) { abort(); }
+void __fixtfsi (void) { abort(); }
+void __floatsitf (void) { abort(); }
+void __addtf3 (void) { abort(); }
+void __subtf3 (void) { abort(); }
+void __multf3 (void) { abort(); }
+void __divtf3 (void) { abort(); }
+void __negtf2 (void) { abort(); }
+void __eqtf2 (void) { abort(); }
+void __netf2 (void) { abort(); }
+void __gttf2 (void) { abort(); }
+void __getf2 (void) { abort(); }
+void __letf2 (void) { abort(); }
+void __lttf2 (void) { abort(); }
+#else	/* !EXTENDED_FLOAT_STUBS, rest of file */
 
 /* IEEE "special" number predicates */
 
@@ -271,34 +119,53 @@ FLO_union_type;
 #define isinf(x) 0
 #else
 
+#if   defined L_thenan_sf
+const fp_number_type __thenan_sf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_df
+const fp_number_type __thenan_df = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_tf
+const fp_number_type __thenan_tf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined TFLOAT
+extern const fp_number_type __thenan_tf;
+#elif defined FLOAT
+extern const fp_number_type __thenan_sf;
+#else
+extern const fp_number_type __thenan_df;
+#endif
+
 INLINE
-static fp_number_type *
-nan ()
+static const fp_number_type *
+makenan (void)
 {
-  static fp_number_type thenan;
-
-  return &thenan;
+#ifdef TFLOAT
+  return & __thenan_tf;
+#elif defined FLOAT  
+  return & __thenan_sf;
+#else
+  return & __thenan_df;
+#endif
 }
 
 INLINE
 static int
-isnan ( fp_number_type *  x)
+isnan (const fp_number_type *x)
 {
-  return x->class == CLASS_SNAN || x->class == CLASS_QNAN;
+  return __builtin_expect (x->class == CLASS_SNAN || x->class == CLASS_QNAN,
+			   0);
 }
 
 INLINE
 static int
-isinf ( fp_number_type *  x)
+isinf (const fp_number_type *  x)
 {
-  return x->class == CLASS_INFINITY;
+  return __builtin_expect (x->class == CLASS_INFINITY, 0);
 }
 
-#endif
+#endif /* NO_NANS */
 
 INLINE
 static int
-iszero ( fp_number_type *  x)
+iszero (const fp_number_type *  x)
 {
   return x->class == CLASS_ZERO;
 }
@@ -310,48 +177,85 @@ flip_sign ( fp_number_type *  x)
   x->sign = !x->sign;
 }
 
-static FLO_type
-pack_d ( fp_number_type *  src)
+/* Count leading zeroes in N.  */
+INLINE
+static int
+clzusi (USItype n)
+{
+  extern int __clzsi2 (USItype);
+  if (sizeof (USItype) == sizeof (unsigned int))
+    return __builtin_clz (n);
+  else if (sizeof (USItype) == sizeof (unsigned long))
+    return __builtin_clzl (n);
+  else if (sizeof (USItype) == sizeof (unsigned long long))
+    return __builtin_clzll (n);
+  else
+    return __clzsi2 (n);
+}
+
+extern FLO_type pack_d (const fp_number_type * );
+
+#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf)
+FLO_type
+pack_d (const fp_number_type *src)
 {
   FLO_union_type dst;
   fractype fraction = src->fraction.ll;	/* wasn't unsigned before? */
+  int sign = src->sign;
+  int exp = 0;
 
-  dst.bits.sign = src->sign;
-
-  if (isnan (src))
+  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && (isnan (src) || isinf (src)))
     {
-      dst.bits.exp = EXPMAX;
-      dst.bits.fraction = src->fraction.ll;
+      /* We can't represent these values accurately.  By using the
+	 largest possible magnitude, we guarantee that the conversion
+	 of infinity is at least as big as any finite number.  */
+      exp = EXPMAX;
+      fraction = ((fractype) 1 << FRACBITS) - 1;
+    }
+  else if (isnan (src))
+    {
+      exp = EXPMAX;
       if (src->class == CLASS_QNAN || 1)
 	{
-	  dst.bits.fraction |= QUIET_NAN;
+#ifdef QUIET_NAN_NEGATED
+	  fraction |= QUIET_NAN - 1;
+#else
+	  fraction |= QUIET_NAN;
+#endif
 	}
     }
   else if (isinf (src))
     {
-      dst.bits.exp = EXPMAX;
-      dst.bits.fraction = 0;
+      exp = EXPMAX;
+      fraction = 0;
     }
   else if (iszero (src))
     {
-      dst.bits.exp = 0;
-      dst.bits.fraction = 0;
+      exp = 0;
+      fraction = 0;
     }
   else if (fraction == 0)
     {
-      dst.value = 0;
+      exp = 0;
     }
   else
     {
-      if (src->normal_exp < NORMAL_EXPMIN)
+      if (__builtin_expect (src->normal_exp < NORMAL_EXPMIN, 0))
 	{
+#ifdef NO_DENORMALS
+	  /* Go straight to a zero representation if denormals are not
+ 	     supported.  The denormal handling would be harmless but
+ 	     isn't unnecessary.  */
+	  exp = 0;
+	  fraction = 0;
+#else /* NO_DENORMALS */
 	  /* This number's exponent is too low to fit into the bits
 	     available in the number, so we'll store 0 in the exponent and
 	     shift the fraction to the right to make up for it.  */
 
 	  int shift = NORMAL_EXPMIN - src->normal_exp;
 
-	  dst.bits.exp = 0;
+	  exp = 0;
 
 	  if (shift > FRAC_NBITS - NGARDS)
 	    {
@@ -360,65 +264,280 @@ pack_d ( fp_number_type *  src)
 	    }
 	  else
 	    {
-	      /* Shift by the value */
-	      fraction >>= shift;
+	      int lowbit = (fraction & (((fractype)1 << shift) - 1)) ? 1 : 0;
+	      fraction = (fraction >> shift) | lowbit;
 	    }
-	  fraction >>= NGARDS;
-	  dst.bits.fraction = fraction;
-	}
-      else if (src->normal_exp > EXPBIAS)
-	{
-	  dst.bits.exp = EXPMAX;
-	  dst.bits.fraction = 0;
-	}
-      else
-	{
-	  dst.bits.exp = src->normal_exp + EXPBIAS;
-	  /* IF the gard bits are the all zero, but the first, then we're
-	     half way between two numbers, choose the one which makes the
-	     lsb of the answer 0.  */
 	  if ((fraction & GARDMASK) == GARDMSB)
 	    {
-	      if (fraction & (1 << NGARDS))
+	      if ((fraction & (1 << NGARDS)))
 		fraction += GARDROUND + 1;
 	    }
 	  else
 	    {
-	      /* Add a one to the guards to round up */
+	      /* Add to the guards to round up.  */
 	      fraction += GARDROUND;
 	    }
-	  if (fraction >= IMPLICIT_2)
+	  /* Perhaps the rounding means we now need to change the
+             exponent, because the fraction is no longer denormal.  */
+	  if (fraction >= IMPLICIT_1)
 	    {
-	      fraction >>= 1;
-	      dst.bits.exp += 1;
+	      exp += 1;
 	    }
 	  fraction >>= NGARDS;
-	  dst.bits.fraction = fraction;
+#endif /* NO_DENORMALS */
+	}
+      else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
+	       && __builtin_expect (src->normal_exp > EXPBIAS, 0))
+	{
+	  exp = EXPMAX;
+	  fraction = 0;
+	}
+      else
+	{
+	  exp = src->normal_exp + EXPBIAS;
+	  if (!ROUND_TOWARDS_ZERO)
+	    {
+	      /* IF the gard bits are the all zero, but the first, then we're
+		 half way between two numbers, choose the one which makes the
+		 lsb of the answer 0.  */
+	      if ((fraction & GARDMASK) == GARDMSB)
+		{
+		  if (fraction & (1 << NGARDS))
+		    fraction += GARDROUND + 1;
+		}
+	      else
+		{
+		  /* Add a one to the guards to round up */
+		  fraction += GARDROUND;
+		}
+	      if (fraction >= IMPLICIT_2)
+		{
+		  fraction >>= 1;
+		  exp += 1;
+		}
+	    }
+	  fraction >>= NGARDS;
+
+	  if (LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS) && exp > EXPMAX)
+	    {
+	      /* Saturate on overflow.  */
+	      exp = EXPMAX;
+	      fraction = ((fractype) 1 << FRACBITS) - 1;
+	    }
 	}
     }
+
+  /* We previously used bitfields to store the number, but this doesn't
+     handle little/big endian systems conveniently, so use shifts and
+     masks */
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  dst.bits.fraction = fraction;
+  dst.bits.exp = exp;
+  dst.bits.sign = sign;
+#else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low, unity;
+   int lowsign, lowexp;
+
+   unity = (halffractype) 1 << HALFFRACBITS;
+
+   /* Set HIGH to the high double's significand, masking out the implicit 1.
+      Set LOW to the low double's full significand.  */
+   high = (fraction >> (FRACBITS - HALFFRACBITS)) & (unity - 1);
+   low = fraction & (unity * 2 - 1);
+
+   /* Get the initial sign and exponent of the low double.  */
+   lowexp = exp - HALFFRACBITS - 1;
+   lowsign = sign;
+
+   /* HIGH should be rounded like a normal double, making |LOW| <=
+      0.5 ULP of HIGH.  Assume round-to-nearest.  */
+   if (exp < EXPMAX)
+     if (low > unity || (low == unity && (high & 1) == 1))
+       {
+	 /* Round HIGH up and adjust LOW to match.  */
+	 high++;
+	 if (high == unity)
+	   {
+	     /* May make it infinite, but that's OK.  */
+	     high = 0;
+	     exp++;
+	   }
+	 low = unity * 2 - low;
+	 lowsign ^= 1;
+       }
+
+   high |= (halffractype) exp << HALFFRACBITS;
+   high |= (halffractype) sign << (HALFFRACBITS + EXPBITS);
+
+   if (exp == EXPMAX || exp == 0 || low == 0)
+     low = 0;
+   else
+     {
+       while (lowexp > 0 && low < unity)
+	 {
+	   low <<= 1;
+	   lowexp--;
+	 }
+
+       if (lowexp <= 0)
+	 {
+	   halffractype roundmsb, round;
+	   int shift;
+
+	   shift = 1 - lowexp;
+	   roundmsb = (1 << (shift - 1));
+	   round = low & ((roundmsb << 1) - 1);
+
+	   low >>= shift;
+	   lowexp = 0;
+
+	   if (round > roundmsb || (round == roundmsb && (low & 1) == 1))
+	     {
+	       low++;
+	       if (low == unity)
+		 /* LOW rounds up to the smallest normal number.  */
+		 lowexp++;
+	     }
+	 }
+
+       low &= unity - 1;
+       low |= (halffractype) lowexp << HALFFRACBITS;
+       low |= (halffractype) lowsign << (HALFFRACBITS + EXPBITS);
+     }
+   dst.value_raw = ((fractype) high << HALFSHIFT) | low;
+ }
+# else
+  dst.value_raw = fraction & ((((fractype)1) << FRACBITS) - (fractype)1);
+  dst.value_raw |= ((fractype) (exp & ((1 << EXPBITS) - 1))) << FRACBITS;
+  dst.value_raw |= ((fractype) (sign & 1)) << (FRACBITS | EXPBITS);
+# endif
+#endif
+
+#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+#ifdef TFLOAT
+  {
+    qrtrfractype tmp1 = dst.words[0];
+    qrtrfractype tmp2 = dst.words[1];
+    dst.words[0] = dst.words[3];
+    dst.words[1] = dst.words[2];
+    dst.words[2] = tmp2;
+    dst.words[3] = tmp1;
+  }
+#else
+  {
+    halffractype tmp = dst.words[0];
+    dst.words[0] = dst.words[1];
+    dst.words[1] = tmp;
+  }
+#endif
+#endif
+
   return dst.value;
 }
+#endif
 
-static void
+#if defined(L_unpack_df) || defined(L_unpack_sf) || defined(L_unpack_tf)
+void
 unpack_d (FLO_union_type * src, fp_number_type * dst)
 {
-  fractype fraction = src->bits.fraction;
+  /* We previously used bitfields to store the number, but this doesn't
+     handle little/big endian systems conveniently, so use shifts and
+     masks */
+  fractype fraction;
+  int exp;
+  int sign;
+
+#if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+  FLO_union_type swapped;
+
+#ifdef TFLOAT
+  swapped.words[0] = src->words[3];
+  swapped.words[1] = src->words[2];
+  swapped.words[2] = src->words[1];
+  swapped.words[3] = src->words[0];
+#else
+  swapped.words[0] = src->words[1];
+  swapped.words[1] = src->words[0];
+#endif
+  src = &swapped;
+#endif
+  
+#ifdef FLOAT_BIT_ORDER_MISMATCH
+  fraction = src->bits.fraction;
+  exp = src->bits.exp;
+  sign = src->bits.sign;
+#else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low;
+   
+   high = src->value_raw >> HALFSHIFT;
+   low = src->value_raw & (((fractype)1 << HALFSHIFT) - 1);
+
+   fraction = high & ((((fractype)1) << HALFFRACBITS) - 1);
+   fraction <<= FRACBITS - HALFFRACBITS;
+   exp = ((int)(high >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+   sign = ((int)(high >> (((HALFFRACBITS + EXPBITS))))) & 1;
+
+   if (exp != EXPMAX && exp != 0 && low != 0)
+     {
+       int lowexp = ((int)(low >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+       int lowsign = ((int)(low >> (((HALFFRACBITS + EXPBITS))))) & 1;
+       int shift;
+       fractype xlow;
+
+       xlow = low & ((((fractype)1) << HALFFRACBITS) - 1);
+       if (lowexp)
+	 xlow |= (((halffractype)1) << HALFFRACBITS);
+       else
+	 lowexp = 1;
+       shift = (FRACBITS - HALFFRACBITS) - (exp - lowexp);
+       if (shift > 0)
+	 xlow <<= shift;
+       else if (shift < 0)
+	 xlow >>= -shift;
+       if (sign == lowsign)
+	 fraction += xlow;
+       else if (fraction >= xlow)
+	 fraction -= xlow;
+       else
+	 {
+	   /* The high part is a power of two but the full number is lower.
+	      This code will leave the implicit 1 in FRACTION, but we'd
+	      have added that below anyway.  */
+	   fraction = (((fractype) 1 << FRACBITS) - xlow) << 1;
+	   exp--;
+	 }
+     }
+ }
+# else
+  fraction = src->value_raw & ((((fractype)1) << FRACBITS) - 1);
+  exp = ((int)(src->value_raw >> FRACBITS)) & ((1 << EXPBITS) - 1);
+  sign = ((int)(src->value_raw >> (FRACBITS + EXPBITS))) & 1;
+# endif
+#endif
 
-  dst->sign = src->bits.sign;
-  if (src->bits.exp == 0)
+  dst->sign = sign;
+  if (exp == 0)
     {
       /* Hmm.  Looks like 0 */
-      if (fraction == 0)
+      if (fraction == 0
+#ifdef NO_DENORMALS
+	  || 1
+#endif
+	  )
 	{
 	  /* tastes like zero */
 	  dst->class = CLASS_ZERO;
 	}
       else
 	{
-	  /* Zero exponent with non zero fraction - it's denormalized,
+	  /* Zero exponent with nonzero fraction - it's denormalized,
 	     so there isn't a leading implicit one - we'll shift it so
 	     it gets one.  */
-	  dst->normal_exp = src->bits.exp - EXPBIAS + 1;
+	  dst->normal_exp = exp - EXPBIAS + 1;
 	  fraction <<= NGARDS;
 
 	  dst->class = CLASS_NUMBER;
@@ -432,24 +551,29 @@ unpack_d (FLO_union_type * src, fp_number_type * dst)
 	  dst->fraction.ll = fraction;
 	}
     }
-  else if (src->bits.exp == EXPMAX)
+  else if (!LARGEST_EXPONENT_IS_NORMAL (FRAC_NBITS)
+	   && __builtin_expect (exp == EXPMAX, 0))
     {
       /* Huge exponent*/
       if (fraction == 0)
 	{
-	  /* Attatched to a zero fraction - means infinity */
+	  /* Attached to a zero fraction - means infinity */
 	  dst->class = CLASS_INFINITY;
 	}
       else
 	{
-	  /* Non zero fraction, means nan */
-	  if (dst->sign)
+	  /* Nonzero fraction, means nan */
+#ifdef QUIET_NAN_NEGATED
+	  if ((fraction & QUIET_NAN) == 0)
+#else
+	  if (fraction & QUIET_NAN)
+#endif
 	    {
-	      dst->class = CLASS_SNAN;
+	      dst->class = CLASS_QNAN;
 	    }
 	  else
 	    {
-	      dst->class = CLASS_QNAN;
+	      dst->class = CLASS_SNAN;
 	    }
 	  /* Keep the fraction part as the nan number */
 	  dst->fraction.ll = fraction;
@@ -458,13 +582,15 @@ unpack_d (FLO_union_type * src, fp_number_type * dst)
   else
     {
       /* Nothing strange about this number */
-      dst->normal_exp = src->bits.exp - EXPBIAS;
+      dst->normal_exp = exp - EXPBIAS;
       dst->class = CLASS_NUMBER;
       dst->fraction.ll = (fraction << NGARDS) | IMPLICIT_1;
     }
 }
+#endif /* L_unpack_df || L_unpack_sf */
 
-static fp_number_type *
+#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf)
+static const fp_number_type *
 _fpadd_parts (fp_number_type * a,
 	      fp_number_type * b,
 	      fp_number_type * tmp)
@@ -487,6 +613,9 @@ _fpadd_parts (fp_number_type * a,
     }
   if (isinf (a))
     {
+      /* Adding infinities with opposite signs yields a NaN.  */
+      if (isinf (b) && a->sign != b->sign)
+	return makenan ();
       return a;
     }
   if (isinf (b))
@@ -495,6 +624,12 @@ _fpadd_parts (fp_number_type * a,
     }
   if (iszero (b))
     {
+      if (iszero (a))
+	{
+	  *tmp = *a;
+	  tmp->sign = a->sign & b->sign;
+	  return tmp;
+	}
       return a;
     }
   if (iszero (a))
@@ -506,6 +641,7 @@ _fpadd_parts (fp_number_type * a,
      they're the same */
   {
     int diff;
+    int sdiff;
 
     a_normal_exp = a->normal_exp;
     b_normal_exp = b->normal_exp;
@@ -513,21 +649,21 @@ _fpadd_parts (fp_number_type * a,
     b_fraction = b->fraction.ll;
 
     diff = a_normal_exp - b_normal_exp;
+    sdiff = diff;
 
     if (diff < 0)
       diff = -diff;
     if (diff < FRAC_NBITS)
       {
-	/* ??? This does shifts one bit at a time.  Optimize.  */
-	while (a_normal_exp > b_normal_exp)
+	if (sdiff > 0)
 	  {
-	    b_normal_exp++;
-	    LSHIFT (b_fraction);
+	    b_normal_exp += diff;
+	    LSHIFT (b_fraction, diff);
 	  }
-	while (b_normal_exp > a_normal_exp)
+	else if (sdiff < 0)
 	  {
-	    a_normal_exp++;
-	    LSHIFT (a_fraction);
+	    a_normal_exp += diff;
+	    LSHIFT (a_fraction, diff);
 	  }
       }
     else
@@ -556,7 +692,7 @@ _fpadd_parts (fp_number_type * a,
 	{
 	  tfraction = a_fraction - b_fraction;
 	}
-      if (tfraction > 0)
+      if (tfraction >= 0)
 	{
 	  tmp->sign = 0;
 	  tmp->normal_exp = a_normal_exp;
@@ -568,7 +704,7 @@ _fpadd_parts (fp_number_type * a,
 	  tmp->normal_exp = a_normal_exp;
 	  tmp->fraction.ll = -tfraction;
 	}
-      /* and renomalize it */
+      /* and renormalize it */
 
       while (tmp->fraction.ll < IMPLICIT_1 && tmp->fraction.ll)
 	{
@@ -588,11 +724,10 @@ _fpadd_parts (fp_number_type * a,
 
   if (tmp->fraction.ll >= IMPLICIT_2)
     {
-      LSHIFT (tmp->fraction.ll);
+      LSHIFT (tmp->fraction.ll, 1);
       tmp->normal_exp++;
     }
   return tmp;
-
 }
 
 FLO_type
@@ -601,10 +736,14 @@ add (FLO_type arg_a, FLO_type arg_b)
   fp_number_type a;
   fp_number_type b;
   fp_number_type tmp;
-  fp_number_type *res;
+  const fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   res = _fpadd_parts (&a, &b, &tmp);
 
@@ -617,10 +756,14 @@ sub (FLO_type arg_a, FLO_type arg_b)
   fp_number_type a;
   fp_number_type b;
   fp_number_type tmp;
-  fp_number_type *res;
+  const fp_number_type *res;
+  FLO_union_type au, bu;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   b.sign ^= 1;
 
@@ -628,8 +771,10 @@ sub (FLO_type arg_a, FLO_type arg_b)
 
   return pack_d (res);
 }
+#endif /* L_addsub_sf || L_addsub_df */
 
-static fp_number_type *
+#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf)
+static inline __attribute__ ((__always_inline__)) const fp_number_type *
 _fpmul_parts ( fp_number_type *  a,
 	       fp_number_type *  b,
 	       fp_number_type * tmp)
@@ -650,7 +795,7 @@ _fpmul_parts ( fp_number_type *  a,
   if (isinf (a))
     {
       if (iszero (b))
-	return nan ();
+	return makenan ();
       a->sign = a->sign != b->sign;
       return a;
     }
@@ -658,7 +803,7 @@ _fpmul_parts ( fp_number_type *  a,
     {
       if (iszero (a))
 	{
-	  return nan ();
+	  return makenan ();
 	}
       b->sign = a->sign != b->sign;
       return b;
@@ -674,16 +819,16 @@ _fpmul_parts ( fp_number_type *  a,
       return b;
     }
 
-  /* Calculate the mantissa by multiplying both 64bit numbers to get a
-     128 bit number */
+  /* Calculate the mantissa by multiplying both numbers to get a
+     twice-as-wide number.  */
   {
-    fractype x = a->fraction.ll;
-    fractype ylow = b->fraction.ll;
-    fractype yhigh = 0;
-    int bit;
-
-#if defined(NO_DI_MODE)
+#if defined(NO_DI_MODE) || defined(TFLOAT)
     {
+      fractype x = a->fraction.ll;
+      fractype ylow = b->fraction.ll;
+      fractype yhigh = 0;
+      int bit;
+
       /* ??? This does multiplies one bit at a time.  Optimize.  */
       for (bit = 0; bit < FRAC_NBITS; bit++)
 	{
@@ -704,49 +849,45 @@ _fpmul_parts ( fp_number_type *  a,
 	}
     }
 #elif defined(FLOAT) 
+    /* Multiplying two USIs to get a UDI, we're safe.  */
     {
-      /* Multiplying two 32 bit numbers to get a 64 bit number  on 
-        a machine with DI, so we're safe */
-
-      DItype answer = (DItype)(a->fraction.ll) * (DItype)(b->fraction.ll);
+      UDItype answer = (UDItype)a->fraction.ll * (UDItype)b->fraction.ll;
       
-      high = answer >> 32;
+      high = answer >> BITS_PER_SI;
       low = answer;
     }
 #else
-    /* Doing a 64*64 to 128 */
+    /* fractype is DImode, but we need the result to be twice as wide.
+       Assuming a widening multiply from DImode to TImode is not
+       available, build one by hand.  */
     {
-      UDItype nl = a->fraction.ll;
-      UDItype nh = a->fraction.ll >> 32;
-      UDItype ml = b->fraction.ll;
-      UDItype mh = b->fraction.ll >>32;
-      UDItype pp_ll = ml * nl;
-      UDItype pp_hl = mh * nl;
-      UDItype pp_lh = ml * nh;
-      UDItype pp_hh = mh * nh;
+      USItype nl = a->fraction.ll;
+      USItype nh = a->fraction.ll >> BITS_PER_SI;
+      USItype ml = b->fraction.ll;
+      USItype mh = b->fraction.ll >> BITS_PER_SI;
+      UDItype pp_ll = (UDItype) ml * nl;
+      UDItype pp_hl = (UDItype) mh * nl;
+      UDItype pp_lh = (UDItype) ml * nh;
+      UDItype pp_hh = (UDItype) mh * nh;
       UDItype res2 = 0;
       UDItype res0 = 0;
       UDItype ps_hh__ = pp_hl + pp_lh;
       if (ps_hh__ < pp_hl)
-	res2 += 0x100000000LL;
-      pp_hl = (ps_hh__ << 32) & 0xffffffff00000000LL;
+	res2 += (UDItype)1 << BITS_PER_SI;
+      pp_hl = (UDItype)(USItype)ps_hh__ << BITS_PER_SI;
       res0 = pp_ll + pp_hl;
       if (res0 < pp_ll)
 	res2++;
-      res2 += ((ps_hh__ >> 32) & 0xffffffffL) + pp_hh;
+      res2 += (ps_hh__ >> BITS_PER_SI) + pp_hh;
       high = res2;
       low = res0;
     }
 #endif
   }
 
-  tmp->normal_exp = a->normal_exp + b->normal_exp;
+  tmp->normal_exp = a->normal_exp + b->normal_exp
+    + FRAC_NBITS - (FRACBITS + NGARDS);
   tmp->sign = a->sign != b->sign;
-#ifdef FLOAT
-  tmp->normal_exp += 2;		/* ??????????????? */
-#else
-  tmp->normal_exp += 4;		/* ??????????????? */
-#endif
   while (high >= IMPLICIT_2)
     {
       tmp->normal_exp++;
@@ -766,32 +907,28 @@ _fpmul_parts ( fp_number_type *  a,
 	high |= 1;
       low <<= 1;
     }
-  /* rounding is tricky. if we only round if it won't make us round later. */
-#if 0
-  if (low & FRACHIGH2)
-    {
-      if (((high & GARDMASK) != GARDMSB)
-	  && (((high + 1) & GARDMASK) == GARDMSB))
-	{
-	  /* don't round, it gets done again later. */
-	}
-      else
-	{
-	  high++;
-	}
-    }
-#endif
-  if ((high & GARDMASK) == GARDMSB)
+
+  if (!ROUND_TOWARDS_ZERO && (high & GARDMASK) == GARDMSB)
     {
       if (high & (1 << NGARDS))
 	{
-	  /* half way, so round to even */
-	  high += GARDROUND + 1;
+	  /* Because we're half way, we would round to even by adding
+	     GARDROUND + 1, except that's also done in the packing
+	     function, and rounding twice will lose precision and cause
+	     the result to be too far off.  Example: 32-bit floats with
+	     bit patterns 0xfff * 0x3f800400 ~= 0xfff (less than 0.5ulp
+	     off), not 0x1000 (more than 0.5ulp off).  */
 	}
       else if (low)
 	{
-	  /* but we really weren't half way */
+	  /* We're a further than half way by a small amount corresponding
+	     to the bits set in "low".  Knowing that, we round here and
+	     not in pack_d, because there we don't have "low" available
+	     anymore.  */
 	  high += GARDROUND + 1;
+
+	  /* Avoid further rounding in pack_d.  */
+	  high &= ~(fractype) GARDMASK;
 	}
     }
   tmp->fraction.ll = high;
@@ -805,29 +942,30 @@ multiply (FLO_type arg_a, FLO_type arg_b)
   fp_number_type a;
   fp_number_type b;
   fp_number_type tmp;
-  fp_number_type *res;
+  const fp_number_type *res;
+  FLO_union_type au, bu;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   res = _fpmul_parts (&a, &b, &tmp);
 
   return pack_d (res);
 }
+#endif /* L_mul_sf || L_mul_df || L_mul_tf */
 
-static fp_number_type *
+#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf)
+static inline __attribute__ ((__always_inline__)) const fp_number_type *
 _fpdiv_parts (fp_number_type * a,
-	      fp_number_type * b,
-	      fp_number_type * tmp)
+	      fp_number_type * b)
 {
-  fractype low = 0;
-  fractype high = 0;
-  fractype r0, r1, y0, y1, bit;
-  fractype q;
+  fractype bit;
   fractype numerator;
   fractype denominator;
   fractype quotient;
-  fractype remainder;
 
   if (isnan (a))
     {
@@ -837,13 +975,15 @@ _fpdiv_parts (fp_number_type * a,
     {
       return b;
     }
+
+  a->sign = a->sign ^ b->sign;
+
   if (isinf (a) || iszero (a))
     {
       if (a->class == b->class)
-	return nan ();
+	return makenan ();
       return a;
     }
-  a->sign = a->sign ^ b->sign;
 
   if (isinf (b))
     {
@@ -854,15 +994,12 @@ _fpdiv_parts (fp_number_type * a,
   if (iszero (b))
     {
       a->class = CLASS_INFINITY;
-      return b;
+      return a;
     }
 
   /* Calculate the mantissa by multiplying both 64bit numbers to get a
      128 bit number */
   {
-    int carry;
-    intfrac d0, d1;		/* weren't unsigned before ??? */
-
     /* quotient =
        ( numerator / denominator) * 2^(numerator exponent -  denominator exponent)
      */
@@ -891,17 +1028,25 @@ _fpdiv_parts (fp_number_type * a,
 	numerator *= 2;
       }
 
-    if ((quotient & GARDMASK) == GARDMSB)
+    if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB)
       {
 	if (quotient & (1 << NGARDS))
 	  {
-	    /* half way, so round to even */
-	    quotient += GARDROUND + 1;
+	    /* Because we're half way, we would round to even by adding
+	       GARDROUND + 1, except that's also done in the packing
+	       function, and rounding twice will lose precision and cause
+	       the result to be too far off.  */
 	  }
 	else if (numerator)
 	  {
-	    /* but we really weren't half way, more bits exist */
+	    /* We're a further than half way by the small amount
+	       corresponding to the bits set in "numerator".  Knowing
+	       that, we round here and not in pack_d, because there we
+	       don't have "numerator" available anymore.  */
 	    quotient += GARDROUND + 1;
+
+	    /* Avoid further rounding in pack_d.  */
+	    quotient &= ~(fractype) GARDMASK;
 	  }
       }
 
@@ -915,28 +1060,34 @@ divide (FLO_type arg_a, FLO_type arg_b)
 {
   fp_number_type a;
   fp_number_type b;
-  fp_number_type tmp;
-  fp_number_type *res;
+  const fp_number_type *res;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
-  res = _fpdiv_parts (&a, &b, &tmp);
+  res = _fpdiv_parts (&a, &b);
 
   return pack_d (res);
 }
+#endif /* L_div_sf || L_div_df */
 
+#if defined(L_fpcmp_parts_sf) || defined(L_fpcmp_parts_df) \
+    || defined(L_fpcmp_parts_tf)
 /* according to the demo, fpcmp returns a comparison with 0... thus
    a<b -> -1
    a==b -> 0
    a>b -> +1
  */
 
-static int
-_fpcmp_parts (fp_number_type * a, fp_number_type * b)
+int
+__fpcmp_parts (fp_number_type * a, fp_number_type * b)
 {
 #if 0
-  /* either nan -> unordered. Must be checked outside of this routine. */
+  /* either nan -> unordered. Must be checked outside of this routine.  */
   if (isnan (a) && isnan (b))
     {
       return 1;			/* still unordered! */
@@ -956,11 +1107,11 @@ _fpcmp_parts (fp_number_type * a, fp_number_type * b)
        -------+--------+--------
        -inf(1)| a>b(1) | a==b(0)
        -------+--------+--------
-       So since unordered must be non zero, just line up the columns...
+       So since unordered must be nonzero, just line up the columns...
        */
       return b->sign - a->sign;
     }
-  /* but not both... */
+  /* but not both...  */
   if (isinf (a))
     {
       return a->sign ? -1 : 1;
@@ -981,7 +1132,7 @@ _fpcmp_parts (fp_number_type * a, fp_number_type * b)
     {
       return a->sign ? -1 : 1;
     }
-  /* now both are "normal". */
+  /* now both are "normal".  */
   if (a->sign != b->sign)
     {
       /* opposite signs */
@@ -996,7 +1147,7 @@ _fpcmp_parts (fp_number_type * a, fp_number_type * b)
     {
       return a->sign ? 1 : -1;
     }
-  /* same exponents; check size. */
+  /* same exponents; check size.  */
   if (a->fraction.ll > b->fraction.ll)
     {
       return a->sign ? -1 : 1;
@@ -1005,117 +1156,179 @@ _fpcmp_parts (fp_number_type * a, fp_number_type * b)
     {
       return a->sign ? 1 : -1;
     }
-  /* after all that, they're equal. */
+  /* after all that, they're equal.  */
   return 0;
 }
+#endif
 
+#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compoare_tf)
 CMPtype
 compare (FLO_type arg_a, FLO_type arg_b)
 {
   fp_number_type a;
   fp_number_type b;
+  FLO_union_type au, bu;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  au.value = arg_a;
+  bu.value = arg_b;
 
-  return _fpcmp_parts (&a, &b);
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  return __fpcmp_parts (&a, &b);
 }
+#endif /* L_compare_sf || L_compare_df */
 
 #ifndef US_SOFTWARE_GOFAST
 
 /* These should be optimized for their specific tasks someday.  */
 
+#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf)
 CMPtype
 _eq_f2 (FLO_type arg_a, FLO_type arg_b)
 {
   fp_number_type a;
   fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   if (isnan (&a) || isnan (&b))
     return 1;			/* false, truth == 0 */
 
-  return _fpcmp_parts (&a, &b) ;
+  return __fpcmp_parts (&a, &b) ;
 }
+#endif /* L_eq_sf || L_eq_df */
 
+#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf)
 CMPtype
 _ne_f2 (FLO_type arg_a, FLO_type arg_b)
 {
   fp_number_type a;
   fp_number_type b;
+  FLO_union_type au, bu;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   if (isnan (&a) || isnan (&b))
     return 1;			/* true, truth != 0 */
 
-  return  _fpcmp_parts (&a, &b) ;
+  return  __fpcmp_parts (&a, &b) ;
 }
+#endif /* L_ne_sf || L_ne_df */
 
+#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf)
 CMPtype
 _gt_f2 (FLO_type arg_a, FLO_type arg_b)
 {
   fp_number_type a;
   fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   if (isnan (&a) || isnan (&b))
     return -1;			/* false, truth > 0 */
 
-  return _fpcmp_parts (&a, &b);
+  return __fpcmp_parts (&a, &b);
 }
+#endif /* L_gt_sf || L_gt_df */
 
+#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf)
 CMPtype
 _ge_f2 (FLO_type arg_a, FLO_type arg_b)
 {
   fp_number_type a;
   fp_number_type b;
+  FLO_union_type au, bu;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   if (isnan (&a) || isnan (&b))
     return -1;			/* false, truth >= 0 */
-  return _fpcmp_parts (&a, &b) ;
+  return __fpcmp_parts (&a, &b) ;
 }
+#endif /* L_ge_sf || L_ge_df */
 
+#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf)
 CMPtype
 _lt_f2 (FLO_type arg_a, FLO_type arg_b)
 {
   fp_number_type a;
   fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   if (isnan (&a) || isnan (&b))
     return 1;			/* false, truth < 0 */
 
-  return _fpcmp_parts (&a, &b);
+  return __fpcmp_parts (&a, &b);
 }
+#endif /* L_lt_sf || L_lt_df */
 
+#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf)
 CMPtype
 _le_f2 (FLO_type arg_a, FLO_type arg_b)
 {
   fp_number_type a;
   fp_number_type b;
+  FLO_union_type au, bu;
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
-  unpack_d ((FLO_union_type *) & arg_b, &b);
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
 
   if (isnan (&a) || isnan (&b))
     return 1;			/* false, truth <= 0 */
 
-  return _fpcmp_parts (&a, &b) ;
+  return __fpcmp_parts (&a, &b) ;
 }
+#endif /* L_le_sf || L_le_df */
 
 #endif /* ! US_SOFTWARE_GOFAST */
 
+#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf)
+CMPtype
+_unord_f2 (FLO_type arg_a, FLO_type arg_b)
+{
+  fp_number_type a;
+  fp_number_type b;
+  FLO_union_type au, bu;
+
+  au.value = arg_a;
+  bu.value = arg_b;
+
+  unpack_d (&au, &a);
+  unpack_d (&bu, &b);
+
+  return (isnan (&a) || isnan (&b));
+}
+#endif /* L_unord_sf || L_unord_df */
+
+#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf)
 FLO_type
 si_to_float (SItype arg_a)
 {
@@ -1129,53 +1342,100 @@ si_to_float (SItype arg_a)
     }
   else
     {
+      USItype uarg;
+      int shift;
       in.normal_exp = FRACBITS + NGARDS;
       if (in.sign) 
 	{
 	  /* Special case for minint, since there is no +ve integer
 	     representation for it */
-	  if (arg_a == 0x80000000)
+	  if (arg_a == (- MAX_SI_INT - 1))
 	    {
-	      return -2147483648.0;
+	      return (FLO_type)(- MAX_SI_INT - 1);
 	    }
-	  in.fraction.ll = (-arg_a);
+	  uarg = (-arg_a);
 	}
       else
-	in.fraction.ll = arg_a;
+	uarg = arg_a;
 
-      while (in.fraction.ll < (1LL << (FRACBITS + NGARDS)))
+      in.fraction.ll = uarg;
+      shift = clzusi (uarg) - (BITS_PER_SI - 1 - FRACBITS - NGARDS);
+      if (shift > 0)
 	{
-	  in.fraction.ll <<= 1;
-	  in.normal_exp -= 1;
+	  in.fraction.ll <<= shift;
+	  in.normal_exp -= shift;
 	}
     }
   return pack_d (&in);
 }
+#endif /* L_si_to_sf || L_si_to_df */
 
+#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf)
+FLO_type
+usi_to_float (USItype arg_a)
+{
+  fp_number_type in;
+
+  in.sign = 0;
+  if (!arg_a)
+    {
+      in.class = CLASS_ZERO;
+    }
+  else
+    {
+      int shift;
+      in.class = CLASS_NUMBER;
+      in.normal_exp = FRACBITS + NGARDS;
+      in.fraction.ll = arg_a;
+
+      shift = clzusi (arg_a) - (BITS_PER_SI - 1 - FRACBITS - NGARDS);
+      if (shift < 0)
+	{
+	  fractype guard = in.fraction.ll & (((fractype)1 << -shift) - 1);
+	  in.fraction.ll >>= -shift;
+	  in.fraction.ll |= (guard != 0);
+	  in.normal_exp -= shift;
+	}
+      else if (shift > 0)
+	{
+	  in.fraction.ll <<= shift;
+	  in.normal_exp -= shift;
+	}
+    }
+  return pack_d (&in);
+}
+#endif
+
+#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si)
 SItype
 float_to_si (FLO_type arg_a)
 {
   fp_number_type a;
   SItype tmp;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
   if (iszero (&a))
     return 0;
   if (isnan (&a))
     return 0;
-  /* get reasonable MAX_SI_INT... */
+  /* get reasonable MAX_SI_INT...  */
   if (isinf (&a))
-    return a.sign ? MAX_SI_INT : (-MAX_SI_INT)-1;
+    return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT;
   /* it is a number, but a small one */
   if (a.normal_exp < 0)
     return 0;
-  if (a.normal_exp > 30)
+  if (a.normal_exp > BITS_PER_SI - 2)
     return a.sign ? (-MAX_SI_INT)-1 : MAX_SI_INT;
   tmp = a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp);
   return a.sign ? (-tmp) : (tmp);
 }
+#endif /* L_sf_to_si || L_df_to_si */
 
-#ifdef US_SOFTWARE_GOFAST
+#if defined(L_sf_to_usi) || defined(L_df_to_usi) || defined(L_tf_to_usi)
+#if defined US_SOFTWARE_GOFAST || defined(L_tf_to_usi)
 /* While libgcc2.c defines its own __fixunssfsi and __fixunsdfsi routines,
    we also define them for GOFAST because the ones in libgcc2.c have the
    wrong names and I'd rather define these here and keep GOFAST CYG-LOC's
@@ -1186,42 +1446,52 @@ USItype
 float_to_usi (FLO_type arg_a)
 {
   fp_number_type a;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
   if (iszero (&a))
     return 0;
   if (isnan (&a))
     return 0;
-  /* get reasonable MAX_USI_INT... */
-  if (isinf (&a))
-    return a.sign ? MAX_USI_INT : 0;
   /* it is a negative number */
   if (a.sign)
     return 0;
+  /* get reasonable MAX_USI_INT...  */
+  if (isinf (&a))
+    return MAX_USI_INT;
   /* it is a number, but a small one */
   if (a.normal_exp < 0)
     return 0;
-  if (a.normal_exp > 31)
+  if (a.normal_exp > BITS_PER_SI - 1)
     return MAX_USI_INT;
   else if (a.normal_exp > (FRACBITS + NGARDS))
-    return a.fraction.ll << ((FRACBITS + NGARDS) - a.normal_exp);
+    return a.fraction.ll << (a.normal_exp - (FRACBITS + NGARDS));
   else
     return a.fraction.ll >> ((FRACBITS + NGARDS) - a.normal_exp);
 }
-#endif
+#endif /* US_SOFTWARE_GOFAST */
+#endif /* L_sf_to_usi || L_df_to_usi */
 
+#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf)
 FLO_type
 negate (FLO_type arg_a)
 {
   fp_number_type a;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &a);
 
-  unpack_d ((FLO_union_type *) & arg_a, &a);
   flip_sign (&a);
   return pack_d (&a);
 }
+#endif /* L_negate_sf || L_negate_df */
 
 #ifdef FLOAT
 
+#if defined(L_make_sf)
 SFtype
 __make_fp(fp_class_type class,
 	     unsigned int sign,
@@ -1236,6 +1506,7 @@ __make_fp(fp_class_type class,
   in.fraction.ll = frac;
   return pack_d (&in);
 }
+#endif /* L_make_sf */
 
 #ifndef FLOAT_ONLY
 
@@ -1244,25 +1515,44 @@ __make_fp(fp_class_type class,
    This is needed for some 8-bit ports that can't handle well values that
    are 8-bytes in size, so we just don't support double for them at all.  */
 
-extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype frac);
-
+#if defined(L_sf_to_df)
 DFtype
 sf_to_df (SFtype arg_a)
 {
   fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
 
-  unpack_d ((FLO_union_type *) & arg_a, &in);
   return __make_dp (in.class, in.sign, in.normal_exp,
 		    ((UDItype) in.fraction.ll) << F_D_BITOFF);
 }
+#endif /* L_sf_to_df */
 
-#endif
-#endif
+#if defined(L_sf_to_tf) && defined(TMODES)
+TFtype
+sf_to_tf (SFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << F_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#endif /* ! FLOAT_ONLY */
+#endif /* FLOAT */
 
 #ifndef FLOAT
 
 extern SFtype __make_fp (fp_class_type, unsigned int, int, USItype);
 
+#if defined(L_make_df)
 DFtype
 __make_dp (fp_class_type class, unsigned int sign, int exp, UDItype frac)
 {
@@ -1274,15 +1564,108 @@ __make_dp (fp_class_type class, unsigned int sign, int exp, UDItype frac)
   in.fraction.ll = frac;
   return pack_d (&in);
 }
+#endif /* L_make_df */
 
+#if defined(L_df_to_sf)
 SFtype
 df_to_sf (DFtype arg_a)
 {
   fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_D_BITOFF;
 
-  unpack_d ((FLO_union_type *) & arg_a, &in);
-  return __make_fp (in.class, in.sign, in.normal_exp,
-		    in.fraction.ll >> F_D_BITOFF);
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((USItype) 1 << F_D_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
 }
+#endif /* L_df_to_sf */
 
-#endif
+#if defined(L_df_to_tf) && defined(TMODES) \
+    && !defined(FLOAT) && !defined(TFLOAT)
+TFtype
+df_to_tf (DFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << D_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#ifdef TFLOAT
+#if defined(L_make_tf)
+TFtype
+__make_tp(fp_class_type class,
+	     unsigned int sign,
+	     int exp, 
+	     UTItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_tf */
+
+#if defined(L_tf_to_df)
+DFtype
+tf_to_df (TFtype arg_a)
+{
+  fp_number_type in;
+  UDItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> D_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << D_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_dp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_df */
+
+#if defined(L_tf_to_sf)
+SFtype
+tf_to_sf (TFtype arg_a)
+{
+  fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << F_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_sf */
+#endif /* TFLOAT */
+
+#endif /* ! FLOAT */
+#endif /* !EXTENDED_FLOAT_STUBS */