Fix some problems with previous checkin

[pf3gnuchains/gcc-fork.git] / gcc / doc / invoke.texi
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 39fd01a..66c0816 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -274,7 +274,7 @@ in the following sections.
  -fschedule-insns  -fschedule-insns2 @gol
  -fsingle-precision-constant  -fssa -fssa-ccp -fssa-dce @gol
  -fstrength-reduce  -fstrict-aliasing  -fthread-jumps  -ftrapv @gol
--funroll-all-loops  -funroll-loops -fmessy-debugging @gol
+-funroll-all-loops  -funroll-loops  @gol
  --param @var{name}=@var{value}
  -O  -O0  -O1  -O2  -O3  -Os}
  
@@ -464,16 +464,19 @@ in the following sections.
  -mabi=32  -mabi=n32  -mabi=64  -mabi=eabi @gol
  -mfix7000  -mno-crt0}
  
-@emph{i386 Options}
+@emph{i386 and x86-64 Options}
  @gccoptlist{
  -mcpu=@var{cpu-type}  -march=@var{cpu-type} @gol
  -mintel-syntax -mieee-fp  -mno-fancy-math-387 @gol
  -mno-fp-ret-in-387  -msoft-float  -msvr3-shlib @gol
  -mno-wide-multiply  -mrtd  -malign-double @gol
  -mpreferred-stack-boundary=@var{num} @gol
+-mmmx  -msse  -m3dnow @gol
  -mthreads  -mno-align-stringops  -minline-all-stringops @gol
  -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
--m96bit-long-double  -mregparm=@var{num}  -momit-leaf-frame-pointer}
+-m96bit-long-double  -mregparm=@var{num}  -momit-leaf-frame-pointer @gol
+-mno-red-zone@gol
+-m32 -m64}
  
  @emph{HPPA Options}
  @gccoptlist{
@@ -3415,17 +3418,6 @@ Not all of the optimizations performed by GCC have @option{-f} options
  to control them.
  
  @table @gcctabopt
-@item -fmessy-debugging
-@opindex fmessy-debugging
-Some optimizations can be stronger if compiler give up ability to generate
-at least approximately usefull debug information for optimized programs.
-These transformation are not done when @option{-g} switch is not specified,
-as politics is to get the code with @option{-g} and without equivalent,
-except for debugging section.
-
-This optimization enables such transformations.  It is still compatible with
-@option{-g}, but debugging resulting programs will be even higher challenge.
-
  @item -fstrength-reduce
  @opindex fstrength-reduce
  Perform the optimizations of loop strength reduction and
@@ -5087,7 +5079,7 @@ that macro, which enables you to change the defaults.
  * RS/6000 and PowerPC Options::
  * RT Options::
  * MIPS Options::
-* i386 Options::
+* i386 and x86-64 Options::
  * HPPA Options::
  * Intel 960 Options::
  * DEC Alpha Options::
@@ -6597,9 +6589,9 @@ instruction scheduling parameters for machine type @var{cpu_type}.
  Supported values for @var{cpu_type} are @samp{rios}, @samp{rios1},
  @samp{rsc}, @samp{rios2}, @samp{rs64a}, @samp{601}, @samp{602},
  @samp{603}, @samp{603e}, @samp{604}, @samp{604e}, @samp{620},
-@samp{630}, @samp{740}, @samp{750}, @samp{power}, @samp{power2},
-@samp{powerpc}, @samp{403}, @samp{505}, @samp{801}, @samp{821},
-@samp{823}, and @samp{860} and @samp{common}.
+@samp{630}, @samp{740}, @samp{7400}, @samp{7450}, @samp{750},
+@samp{power}, @samp{power2}, @samp{powerpc}, @samp{403}, @samp{505},
+@samp{801}, @samp{821}, @samp{823}, and @samp{860} and @samp{common}.
  
  @option{-mcpu=common} selects a completely generic processor.  Code
  generated under this option will run on any POWER or PowerPC processor.
@@ -6640,6 +6632,8 @@ The @option{-mcpu} options automatically enable or disable other
  @itemx 620
  @itemx 630
  @itemx 740
+@itemx 7400
+@itemx 7450
  @itemx 750
  @itemx 505
  @option{-mno-power}, @option{-mpowerpc}, @option{-mnew-mnemonics}
@@ -7426,12 +7420,15 @@ options is also defined by that macro, which enables you to change the
  defaults.
  @end ifset
  
-@node i386 Options
-@subsection Intel 386 Options
+@node i386 and x86-64 Options
+@subsection Intel 386 and AMD x86-64 Options
  @cindex i386 Options
+@cindex x86-64 Options
  @cindex Intel 386 Options
+@cindex AMD x86-64 Options
  
-These @samp{-m} options are defined for the i386 family of computers:
+These @samp{-m} options are defined for the i386 and x86-64 family of
+computers:
  
  @table @gcctabopt
  @item -mcpu=@var{cpu-type}
@@ -7611,6 +7608,383 @@ to stack space usage, such as embedded systems and operating system kernels,
  may want to reduce the preferred alignment to
  @option{-mpreferred-stack-boundary=2}.
  
+@item -mmmx
+@itemx -mno-mmx
+@item -msse
+@itemx -mno-sse
+@item -m3dnow
+@itemx -mno-3dnow
+@opindex mmmx
+@opindex mno-mmx
+@opindex msse
+@opindex mno-sse
+@opindex m3dnow
+@opindex mno-3dnow
+These switches enable or disable the use of built-in functions that allow
+direct access to the MMX, SSE and 3Dnow extensions of the instruction set.
+
+The following machine modes are available for use with MMX built-in functions
+(@pxref{Vector Extensions}): @code{V2SI} for a vector of two 32 bit integers,
+@code{V4HI} for a vector of four 16 bit integers, and @code{V8QI} for a
+vector of eight 8 bit integers.  Some of the built-in functions operate on
+MMX registers as a whole 64 bit entity, these use @code{DI} as their mode.
+
+If 3Dnow extensions are enabled, @code{V2SF} is used as a mode for a vector
+of two 32 bit floating point values.
+
+If SSE extensions are enabled, @code{V4SF} is used for a vector of four 32 bit
+floating point values.  Some instructions use a vector of four 32 bit
+integers, these use @code{V4SI}.  Finally, some instructions operate on an
+entire vector register, interpreting it as a 128 bit integer, these use mode
+@code{TI}.
+
+The following built-in functions are made available by @option{-mmmx}:
+@table @code
+@item v8qi __builtin_ia32_paddb (v8qi, v8qi)
+Generates the @code{paddb} machine instruction.
+@item v4hi __builtin_ia32_paddw (v4hi, v4hi)
+Generates the @code{paddw} machine instruction.
+@item v2si __builtin_ia32_paddd (v2si, v2si)
+Generates the @code{paddd} machine instruction.
+@item v8qi __builtin_ia32_psubb (v8qi, v8qi)
+Generates the @code{psubb} machine instruction.
+@item v4hi __builtin_ia32_psubw (v4hi, v4hi)
+Generates the @code{psubw} machine instruction.
+@item v2si __builtin_ia32_psubd (v2si, v2si)
+Generates the @code{psubd} machine instruction.
+
+@item v8qi __builtin_ia32_paddsb (v8qi, v8qi)
+Generates the @code{paddsb} machine instruction.
+@item v4hi __builtin_ia32_paddsw (v4hi, v4hi)
+Generates the @code{paddsw} machine instruction.
+@item v8qi __builtin_ia32_psubsb (v8qi, v8qi)
+Generates the @code{psubsb} machine instruction.
+@item v4hi __builtin_ia32_psubsw (v4hi, v4hi)
+Generates the @code{psubsw} machine instruction.
+
+@item v8qi __builtin_ia32_paddusb (v8qi, v8qi)
+Generates the @code{paddusb} machine instruction.
+@item v4hi __builtin_ia32_paddusw (v4hi, v4hi)
+Generates the @code{paddusw} machine instruction.
+@item v8qi __builtin_ia32_psubusb (v8qi, v8qi)
+Generates the @code{psubusb} machine instruction.
+@item v4hi __builtin_ia32_psubusw (v4hi, v4hi)
+Generates the @code{psubusw} machine instruction.
+
+@item v4hi __builtin_ia32_pmullw (v4hi, v4hi)
+Generates the @code{pmullw} machine instruction.
+@item v4hi __builtin_ia32_pmulhw (v4hi, v4hi)
+Generates the @code{pmulhw} machine instruction.
+
+@item di __builtin_ia32_pand (di, di)
+Generates the @code{pand} machine instruction.
+@item di __builtin_ia32_pandn (di,di)
+Generates the @code{pandn} machine instruction.
+@item di __builtin_ia32_por (di, di)
+Generates the @code{por} machine instruction.
+@item di __builtin_ia32_pxor (di, di)
+Generates the @code{pxor} machine instruction.
+
+@item v8qi __builtin_ia32_pcmpeqb (v8qi, v8qi)
+Generates the @code{pcmpeqb} machine instruction.
+@item v4hi __builtin_ia32_pcmpeqw (v4hi, v4hi)
+Generates the @code{pcmpeqw} machine instruction.
+@item v2si __builtin_ia32_pcmpeqd (v2si, v2si)
+Generates the @code{pcmpeqd} machine instruction.
+@item v8qi __builtin_ia32_pcmpgtb (v8qi, v8qi)
+Generates the @code{pcmpgtb} machine instruction.
+@item v4hi __builtin_ia32_pcmpgtw (v4hi, v4hi)
+Generates the @code{pcmpgtw} machine instruction.
+@item v2si __builtin_ia32_pcmpgtd (v2si, v2si)
+Generates the @code{pcmpgtd} machine instruction.
+
+@item v8qi __builtin_ia32_punpckhbw (v8qi, v8qi)
+Generates the @code{punpckhbw} machine instruction.
+@item v4hi __builtin_ia32_punpckhwd (v4hi, v4hi)
+Generates the @code{punpckhwd} machine instruction.
+@item v2si __builtin_ia32_punpckhdq (v2si, v2si)
+Generates the @code{punpckhdq} machine instruction.
+@item v8qi __builtin_ia32_punpcklbw (v8qi, v8qi)
+Generates the @code{punpcklbw} machine instruction.
+@item v4hi __builtin_ia32_punpcklwd (v4hi, v4hi)
+Generates the @code{punpcklwd} machine instruction.
+@item v2si __builtin_ia32_punpckldq (v2si, v2si)
+Generates the @code{punpckldq} machine instruction.
+
+@item v8qi __builtin_ia32_packsswb (v4hi, v4hi)
+Generates the @code{packsswb} machine instruction.
+@item v4hi __builtin_ia32_packssdw (v2si, v2si)
+Generates the @code{packssdw} machine instruction.
+@item v8qi __builtin_ia32_packuswb (v4hi, v4hi)
+Generates the @code{packuswb} machine instruction.
+
+@end table
+
+The following built-in functions are made available either with @option{-msse}, or
+with a combination of @option{-m3dnow} and @option{-march=athlon}.
+@table @code
+
+@item v4hi __builtin_ia32_pmulhuw (v4hi, v4hi)
+Generates the @code{pmulhuw} machine instruction.
+
+@item v8qi __builtin_ia32_pavgb (v8qi, v8qi)
+Generates the @code{pavgb} machine instruction.
+@item v4hi __builtin_ia32_pavgw (v4hi, v4hi)
+Generates the @code{pavgw} machine instruction.
+@item v4hi __builtin_ia32_psadbw (v8qi, v8qi)
+Generates the @code{psadbw} machine instruction.
+
+@item v8qi __builtin_ia32_pmaxub (v8qi, v8qi)
+Generates the @code{pmaxub} machine instruction.
+@item v4hi __builtin_ia32_pmaxsw (v4hi, v4hi)
+Generates the @code{pmaxsw} machine instruction.
+@item v8qi __builtin_ia32_pminub (v8qi, v8qi)
+Generates the @code{pminub} machine instruction.
+@item v4hi __builtin_ia32_pminsw (v4hi, v4hi)
+Generates the @code{pminsw} machine instruction.
+
+@item int __builtin_ia32_pextrw (v4hi, int)
+Generates the @code{pextrw} machine instruction.
+@item v4hi __builtin_ia32_pinsrw (v4hi, int, int)
+Generates the @code{pinsrw} machine instruction.
+
+@item int __builtin_ia32_pmovmskb (v8qi)
+Generates the @code{pmovmskb} machine instruction.
+@item void __builtin_ia32_maskmovq (v8qi, v8qi, char *)
+Generates the @code{maskmovq} machine instruction.
+@item void __buitlin_ia32_movntq (di *, di)
+Generates the @code{movntq} machine instruction.
+@item void __buitlin_ia32_sfence (void)
+Generates the @code{sfence} machine instruction.
+@item void __builtin_ia32_prefetch (char *, int selector)
+Generates a prefetch machine instruction, depending on the value of
+selector.  If @code{selector} is 0, it generates @code{prefetchnta}; for
+a value of 1, it generates @code{prefetcht0}; for a value of 2, it generates
+@code{prefetcht1}; and for a value of 3 it generates @code{prefetcht2}.
+
+@end table
+
+The following built-in functions are available when @option{-msse} is used.
+
+@table @code
+@item int __buitlin_ia32_comieq (v4sf, v4sf)
+Generates the @code{comiss} machine instruction and performs an equality
+comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_comineq (v4sf, v4sf)
+Generates the @code{comiss} machine instruction and performs an inequality
+comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_comilt (v4sf, v4sf)
+Generates the @code{comiss} machine instruction and performs a ``less than''
+comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_comile (v4sf, v4sf)
+Generates the @code{comiss} machine instruction and performs a ``less or
+equal'' comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_comigt (v4sf, v4sf)
+Generates the @code{comiss} machine instruction and performs a ``greater than''
+comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_comige (v4sf, v4sf)
+Generates the @code{comiss} machine instruction and performs a ``greater or
+equal'' comparison.  The return value is the truth value of that comparison.
+
+@item int __buitlin_ia32_ucomieq (v4sf, v4sf)
+Generates the @code{ucomiss} machine instruction and performs an equality
+comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_ucomineq (v4sf, v4sf)
+Generates the @code{ucomiss} machine instruction and performs an inequality
+comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_ucomilt (v4sf, v4sf)
+Generates the @code{ucomiss} machine instruction and performs a ``less than''
+comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_ucomile (v4sf, v4sf)
+Generates the @code{ucomiss} machine instruction and performs a ``less or
+equal'' comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_ucomigt (v4sf, v4sf)
+Generates the @code{ucomiss} machine instruction and performs a ``greater than''
+comparison.  The return value is the truth value of that comparison.
+@item int __buitlin_ia32_ucomige (v4sf, v4sf)
+Generates the @code{ucomiss} machine instruction and performs a ``greater or
+equal'' comparison.  The return value is the truth value of that comparison.
+
+@item v4sf __buitlin_ia32_addps (v4sf, v4sf)
+Generates the @code{addps} machine instruction.
+@item v4sf __buitlin_ia32_addss (v4sf, v4sf)
+Generates the @code{addss} machine instruction.
+@item v4sf __buitlin_ia32_subps (v4sf, v4sf)
+Generates the @code{subps} machine instruction.
+@item v4sf __buitlin_ia32_subss (v4sf, v4sf)
+Generates the @code{subss} machine instruction.
+@item v4sf __buitlin_ia32_mulps (v4sf, v4sf)
+Generates the @code{mulps} machine instruction.
+@item v4sf __buitlin_ia32_mulss (v4sf, v4sf)
+Generates the @code{mulss} machine instruction.
+@item v4sf __buitlin_ia32_divps (v4sf, v4sf)
+Generates the @code{divps} machine instruction.
+@item v4sf __buitlin_ia32_divss (v4sf, v4sf)
+Generates the @code{divss} machine instruction.
+
+@item v4si __buitlin_ia32_cmpeqps (v4sf, v4sf)
+Generates the @code{cmpeqps} machine instruction.
+@item v4si __buitlin_ia32_cmplts (v4sf, v4sf)
+Generates the @code{cmpltps} machine instruction.
+@item v4si __buitlin_ia32_cmpleps (v4sf, v4sf)
+Generates the @code{cmpleps} machine instruction.
+@item v4si __buitlin_ia32_cmpgtps (v4sf, v4sf)
+Generates the @code{cmpgtps} machine instruction.
+@item v4si __buitlin_ia32_cmpgeps (v4sf, v4sf)
+Generates the @code{cmpgeps} machine instruction.
+@item v4si __buitlin_ia32_cmpunordps (v4sf, v4sf)
+Generates the @code{cmpunodps} machine instruction.
+@item v4si __buitlin_ia32_cmpneqps (v4sf, v4sf)
+Generates the @code{cmpeqps} machine instruction.
+@item v4si __buitlin_ia32_cmpnltps (v4sf, v4sf)
+Generates the @code{cmpltps} machine instruction.
+@item v4si __buitlin_ia32_cmpnleps (v4sf, v4sf)
+Generates the @code{cmpleps} machine instruction.
+@item v4si __buitlin_ia32_cmpngtps (v4sf, v4sf)
+Generates the @code{cmpgtps} machine instruction.
+@item v4si __buitlin_ia32_cmpngeps (v4sf, v4sf)
+Generates the @code{cmpgeps} machine instruction.
+@item v4si __buitlin_ia32_cmpordps (v4sf, v4sf)
+Generates the @code{cmpunodps} machine instruction.
+
+@item v4si __buitlin_ia32_cmpeqss (v4sf, v4sf)
+Generates the @code{cmpeqss} machine instruction.
+@item v4si __buitlin_ia32_cmpltss (v4sf, v4sf)
+Generates the @code{cmpltss} machine instruction.
+@item v4si __buitlin_ia32_cmpless (v4sf, v4sf)
+Generates the @code{cmpless} machine instruction.
+@item v4si __buitlin_ia32_cmpgtss (v4sf, v4sf)
+Generates the @code{cmpgtss} machine instruction.
+@item v4si __buitlin_ia32_cmpgess (v4sf, v4sf)
+Generates the @code{cmpgess} machine instruction.
+@item v4si __buitlin_ia32_cmpunordss (v4sf, v4sf)
+Generates the @code{cmpunodss} machine instruction.
+@item v4si __buitlin_ia32_cmpneqss (v4sf, v4sf)
+Generates the @code{cmpeqss} machine instruction.
+@item v4si __buitlin_ia32_cmpnlts (v4sf, v4sf)
+Generates the @code{cmpltss} machine instruction.
+@item v4si __buitlin_ia32_cmpnless (v4sf, v4sf)
+Generates the @code{cmpless} machine instruction.
+@item v4si __buitlin_ia32_cmpngtss (v4sf, v4sf)
+Generates the @code{cmpgtss} machine instruction.
+@item v4si __buitlin_ia32_cmpngess (v4sf, v4sf)
+Generates the @code{cmpgess} machine instruction.
+@item v4si __buitlin_ia32_cmpordss (v4sf, v4sf)
+Generates the @code{cmpunodss} machine instruction.
+
+@item v4sf __buitlin_ia32_maxps (v4sf, v4sf)
+Generates the @code{maxps} machine instruction.
+@item v4sf __buitlin_ia32_maxsss (v4sf, v4sf)
+Generates the @code{maxss} machine instruction.
+@item v4sf __buitlin_ia32_minps (v4sf, v4sf)
+Generates the @code{minps} machine instruction.
+@item v4sf __buitlin_ia32_minsss (v4sf, v4sf)
+Generates the @code{minss} machine instruction.
+
+@item ti __buitlin_ia32_andps (ti, ti)
+Generates the @code{andps} machine instruction.
+@item ti __buitlin_ia32_andnps (ti, ti)
+Generates the @code{andnps} machine instruction.
+@item ti __buitlin_ia32_orps (ti, ti)
+Generates the @code{orps} machine instruction.
+@item ti __buitlin_ia32_xorps (ti, ti)
+Generates the @code{xorps} machine instruction.
+
+@item v4sf __buitlin_ia32_movps (v4sf, v4sf)
+Generates the @code{movps} machine instruction.
+@item v4sf __buitlin_ia32_movhlps (v4sf, v4sf)
+Generates the @code{movhlps} machine instruction.
+@item v4sf __buitlin_ia32_movlhps (v4sf, v4sf)
+Generates the @code{movlhps} machine instruction.
+@item v4sf __buitlin_ia32_unpckhps (v4sf, v4sf)
+Generates the @code{unpckhps} machine instruction.
+@item v4sf __buitlin_ia32_unpcklps (v4sf, v4sf)
+Generates the @code{unpcklps} machine instruction.
+
+@item v4sf __buitlin_ia32_cvtpi2ps (v4sf, v2si)
+Generates the @code{cvtpi2ps} machine instruction.
+@item v2si __buitlin_ia32_cvtps2pi (v4sf)
+Generates the @code{cvtps2pi} machine instruction.
+@item v4sf __buitlin_ia32_cvtsi2ss (v4sf, int)
+Generates the @code{cvtsi2ss} machine instruction.
+@item int __buitlin_ia32_cvtss2si (v4sf)
+Generates the @code{cvtsi2ss} machine instruction.
+@item v2si __buitlin_ia32_cvttps2pi (v4sf)
+Generates the @code{cvttps2pi} machine instruction.
+@item int __buitlin_ia32_cvttss2si (v4sf)
+Generates the @code{cvttsi2ss} machine instruction.
+
+@item v4sf __buitlin_ia32_rcpps (v4sf)
+Generates the @code{rcpps} machine instruction.
+@item v4sf __buitlin_ia32_rsqrtps (v4sf)
+Generates the @code{rsqrtps} machine instruction.
+@item v4sf __buitlin_ia32_sqrtps (v4sf)
+Generates the @code{sqrtps} machine instruction.
+@item v4sf __buitlin_ia32_rcpss (v4sf)
+Generates the @code{rcpss} machine instruction.
+@item v4sf __buitlin_ia32_rsqrtss (v4sf)
+Generates the @code{rsqrtss} machine instruction.
+@item v4sf __buitlin_ia32_sqrtss (v4sf)
+Generates the @code{sqrtss} machine instruction.
+
+@item v4sf __buitlin_ia32_shufps (v4sf, v4sf, int)
+Generates the @code{shufps} machine instruction.
+
+@item v4sf __buitlin_ia32_loadaps (float *)
+Generates the @code{movaps} machine instruction as a load from memory.
+@item void __buitlin_ia32_storeaps (float *, v4sf)
+Generates the @code{movaps} machine instruction as a store to memory.
+@item v4sf __buitlin_ia32_loadups (float *)
+Generates the @code{movups} machine instruction as a load from memory.
+@item void __buitlin_ia32_storeups (float *, v4sf)
+Generates the @code{movups} machine instruction as a store to memory.
+@item v4sf __buitlin_ia32_loadsss (float *)
+Generates the @code{movss} machine instruction as a load from memory.
+@item void __buitlin_ia32_storess (float *, v4sf)
+Generates the @code{movss} machine instruction as a store to memory.
+
+@item v4sf __buitlin_ia32_loadhps (v4sf, v2si *)
+Generates the @code{movhps} machine instruction as a load from memory.
+@item v4sf __buitlin_ia32_loadlps (v4sf, v2si *)
+Generates the @code{movlps} machine instruction as a load from memory
+@item void __buitlin_ia32_storehps (v4sf, v2si *)
+Generates the @code{movhps} machine instruction as a store to memory.
+@item void __buitlin_ia32_storelps (v4sf, v2si *)
+Generates the @code{movlps} machine instruction as a store to memory.
+
+@item void __buitlin_ia32_movntps (float *, v4sf)
+Generates the @code{movntps} machine instruction.
+@item int __buitlin_ia32_movmskps (v4sf)
+Generates the @code{movntps} machine instruction.
+
+@item void __buitlin_ia32_storeps1 (float *, v4sf)
+Generates the @code{movaps} machine instruction as a store to memory.
+Before storing, the value is modified with a @code{shufps} instruction
+so that the lowest of the four floating point elements is replicated
+across the entire vector that is stored.
+@item void __buitlin_ia32_storerps (float *, v4sf)
+Generates the @code{movaps} machine instruction as a store to memory.
+Before storing, the value is modified with a @code{shufps} instruction
+so that the order of the four floating point elements in the vector is
+reversed.
+@item v4sf __buitlin_ia32_loadps1 (float *)
+Generates a @code{movss} machine instruction to load a floating point
+value from memory, and a @code{shufps} instruction to replicate the
+loaded value across all four elements of the result vector.
+@item v4sf __buitlin_ia32_loadrps (float *)
+Generates a @code{movaps} machine instruction to load a vector from
+memory, and a @code{shufps} instruction to reverse the order of the
+four floating point elements in the result vector.
+@item v4sf __builtin_ia32_setps (float, float, float, float)
+Constructs a vector from four single floating point values.  The return
+value is equal to the value that would result from storing the four
+arguments into consecutive memory locations and then executing a
+@code{movaps} to load the vector from memory.
+@item v4sf __builtin_ia32_setps1 (float)
+Constructs a vector from a single floating point value by replicating
+it across all four elements of the result vector.
+@end table
+
  @item -mpush-args
  @itemx -mno-push-args
  @opindex mpush-args
@@ -7658,6 +8032,29 @@ makes an extra register available in leaf functions.  The option
  which might make debugging harder.
  @end table
  
+These @samp{-m} switches are supported in addition to the above
+on AMD x86-64 processors in 64-bit environments.
+
+@table @gcctabopt
+@item -m32
+@itemx -m64
+@opindex m32
+@opindex m64
+Generate code for a 32-bit or 64-bit environment.
+The 32-bit environment sets int, long and pointer to 32 bits and
+generates code that runs on any i386 system.
+The 64-bit environment sets int to 32 bits and long and pointer
+to 64 bits and generates code for AMD's x86-64 architecture.
+
+@item -mno-red-zone
+@opindex no-red-zone
+Do not use a so called red zone for x86-64 code.  The red zone is mandated
+by the x86-64 ABI, it is a 128-byte area beyond the location of the
+stack pointer that will not be modified by signal or interrupt handlers
+and therefore can be used for temporary data without adjusting the stack
+pointer.  The flag @option{-mno-red-zone} disables this red zone.
+@end table
+
  @node HPPA Options
  @subsection HPPA Options
  @cindex HPPA Options