OSDN Git Service

PR target/28946
[pf3gnuchains/gcc-fork.git] / gcc / config / i386 / i386.md
index 923bdf9..f9be68f 100644 (file)
@@ -1,6 +1,6 @@
 ;; GCC machine description for IA-32 and x86-64.
 ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-;; 2001, 2002, 2003, 2004
+;; 2001, 2002, 2003, 2004, 2005, 2006
 ;; Free Software Foundation, Inc.
 ;; Mostly by William Schelter.
 ;; x86_64 support added by Jan Hubicka
@@ -19,8 +19,8 @@
 ;;
 ;; You should have received a copy of the GNU General Public License
 ;; along with GCC; see the file COPYING.  If not, write to
-;; the Free Software Foundation, 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.  */
+;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.  */
 ;;
 ;; The original PO technology requires these to be ordered by speed,
 ;; so that assigner will pick the fastest.
    (UNSPEC_STACK_ALLOC         11)
    (UNSPEC_SET_GOT             12)
    (UNSPEC_SSE_PROLOGUE_SAVE   13)
+   (UNSPEC_REG_SAVE            14)
+   (UNSPEC_DEF_CFA             15)
 
    ; TLS support
-   (UNSPEC_TP                  15)
-   (UNSPEC_TLS_GD              16)
-   (UNSPEC_TLS_LD_BASE         17)
+   (UNSPEC_TP                  16)
+   (UNSPEC_TLS_GD              17)
+   (UNSPEC_TLS_LD_BASE         18)
+   (UNSPEC_TLSDESC             19)
 
    ; Other random patterns
    (UNSPEC_SCAS                        20)
-   (UNSPEC_SIN                 21)
-   (UNSPEC_COS                 22)
-   (UNSPEC_FNSTSW              24)
-   (UNSPEC_SAHF                        25)
-   (UNSPEC_FSTCW               26)
-   (UNSPEC_ADD_CARRY           27)
-   (UNSPEC_FLDCW               28)
+   (UNSPEC_FNSTSW              21)
+   (UNSPEC_SAHF                        22)
+   (UNSPEC_FSTCW               23)
+   (UNSPEC_ADD_CARRY           24)
+   (UNSPEC_FLDCW               25)
+   (UNSPEC_REP                 26)
+   (UNSPEC_EH_RETURN           27)
+   (UNSPEC_LD_MPIC             28)     ; load_macho_picbase
 
    ; For SSE/MMX support:
-   (UNSPEC_FIX                 30)
-   (UNSPEC_MASKMOV             32)
-   (UNSPEC_MOVMSK              33)
-   (UNSPEC_MOVNT               34)
-   (UNSPEC_MOVA                        38)
-   (UNSPEC_MOVU                        39)
-   (UNSPEC_SHUFFLE             41)
-   (UNSPEC_RCP                 42)
-   (UNSPEC_RSQRT               43)
-   (UNSPEC_SFENCE              44)
-   (UNSPEC_NOP                 45)     ; prevents combiner cleverness
-   (UNSPEC_PAVGUSB             49)
-   (UNSPEC_PFRCP               50)
-   (UNSPEC_PFRCPIT1            51)
-   (UNSPEC_PFRCPIT2            52)
-   (UNSPEC_PFRSQRT             53)
-   (UNSPEC_PFRSQIT1            54)
-   (UNSPEC_PSHUFLW             55)
-   (UNSPEC_PSHUFHW             56)
-   (UNSPEC_MFENCE              59)
-   (UNSPEC_LFENCE              60)
-   (UNSPEC_PSADBW              61)
-   (UNSPEC_ADDSUB              71)
-   (UNSPEC_HADD                        72)
-   (UNSPEC_HSUB                        73)
-   (UNSPEC_MOVSHDUP            74)
-   (UNSPEC_MOVSLDUP            75)
-   (UNSPEC_LDQQU               76)
-   (UNSPEC_MOVDDUP             77)
+   (UNSPEC_FIX_NOTRUNC         30)
+   (UNSPEC_MASKMOV             31)
+   (UNSPEC_MOVMSK              32)
+   (UNSPEC_MOVNT               33)
+   (UNSPEC_MOVU                        34)
+   (UNSPEC_RCP                 35)
+   (UNSPEC_RSQRT               36)
+   (UNSPEC_SFENCE              37)
+   (UNSPEC_NOP                 38)     ; prevents combiner cleverness
+   (UNSPEC_PFRCP               39)
+   (UNSPEC_PFRCPIT1            40)
+   (UNSPEC_PFRCPIT2            41)
+   (UNSPEC_PFRSQRT             42)
+   (UNSPEC_PFRSQIT1            43)
+   (UNSPEC_MFENCE              44)
+   (UNSPEC_LFENCE              45)
+   (UNSPEC_PSADBW              46)
+   (UNSPEC_LDQQU               47)
+
+   ; Generic math support
+   (UNSPEC_COPYSIGN            50)
+   (UNSPEC_IEEE_MIN            51)     ; not commutative
+   (UNSPEC_IEEE_MAX            52)     ; not commutative
 
    ; x87 Floating point
-   (UNSPEC_FPATAN              65)
-   (UNSPEC_FYL2X               66)
-   (UNSPEC_FYL2XP1             67)
-   (UNSPEC_FRNDINT             68)
-   (UNSPEC_F2XM1               69)
+   (UNSPEC_SIN                 60)
+   (UNSPEC_COS                 61)
+   (UNSPEC_FPATAN              62)
+   (UNSPEC_FYL2X               63)
+   (UNSPEC_FYL2XP1             64)
+   (UNSPEC_FRNDINT             65)
+   (UNSPEC_FIST                        66)
+   (UNSPEC_F2XM1               67)
+
+   ; x87 Rounding
+   (UNSPEC_FRNDINT_FLOOR       70)
+   (UNSPEC_FRNDINT_CEIL        71)
+   (UNSPEC_FRNDINT_TRUNC       72)
+   (UNSPEC_FRNDINT_MASK_PM     73)
+   (UNSPEC_FIST_FLOOR          74)
+   (UNSPEC_FIST_CEIL           75)
 
    ; x87 Double output FP
    (UNSPEC_SINCOS_COS          80)
    (UNSPEC_FPREM1_F            90)
    (UNSPEC_FPREM1_U            91)
 
-   ; x87 Rounding
-   (UNSPEC_FRNDINT_FLOOR       96)
-   (UNSPEC_FRNDINT_CEIL        97)
-   (UNSPEC_FRNDINT_TRUNC       98)
-   (UNSPEC_FRNDINT_MASK_PM     99)
-
-   ; REP instruction
-   (UNSPEC_REP                 75)
-
-   (UNSPEC_EH_RETURN           76)
+   ; SSP patterns
+   (UNSPEC_SP_SET              100)
+   (UNSPEC_SP_TEST             101)
+   (UNSPEC_SP_TLS_SET          102)
+   (UNSPEC_SP_TLS_TEST         103)
   ])
 
 (define_constants
   [(UNSPECV_BLOCKAGE           0)
-   (UNSPECV_STACK_PROBE                10)
-   (UNSPECV_EMMS               31)
-   (UNSPECV_LDMXCSR            37)
-   (UNSPECV_STMXCSR            40)
-   (UNSPECV_FEMMS              46)
-   (UNSPECV_CLFLUSH            57)
-   (UNSPECV_ALIGN              68)
-   (UNSPECV_MONITOR            69)
-   (UNSPECV_MWAIT              70)
+   (UNSPECV_STACK_PROBE                1)
+   (UNSPECV_EMMS               2)
+   (UNSPECV_LDMXCSR            3)
+   (UNSPECV_STMXCSR            4)
+   (UNSPECV_FEMMS              5)
+   (UNSPECV_CLFLUSH            6)
+   (UNSPECV_ALIGN              7)
+   (UNSPECV_MONITOR            8)
+   (UNSPECV_MWAIT              9)
+   (UNSPECV_CMPXCHG_1          10)
+   (UNSPECV_CMPXCHG_2          11)
+   (UNSPECV_XCHG               12)
+   (UNSPECV_LOCK               13)
   ])
 
 ;; Registers by name.
 \f
 ;; Processor type.  This attribute must exactly match the processor_type
 ;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4,k8,nocona,generic32,generic64"
   (const (symbol_ref "ix86_tune")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
    str,cld,
-   fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint,
-   sselog,sseiadd,sseishft,sseimul,
+   fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
+   sselog,sselog1,sseiadd,sseishft,sseimul,
    sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 
 ;; The CPU unit operations uses.
 (define_attr "unit" "integer,i387,sse,mmx,unknown"
-  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,frndint")
+  (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
           (const_string "i387")
-        (eq_attr "type" "sselog,sseiadd,sseishft,sseimul,
+        (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
                          sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
           (const_string "sse")
         (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
           (const_int 1)
         ]
         (symbol_ref "/* Update immediate_length and other attributes! */
-                     abort(),1")))
+                     gcc_unreachable (),1")))
 
 ;; The (bounding maximum) length of an instruction address.
 (define_attr "length_address" ""
              (not (match_operand 0 "memory_operand" "")))
           (const_int 0)
         (and (eq_attr "type" "imov")
-             (and (match_operand 0 "register_operand" "")
-                  (match_operand 1 "immediate_operand" "")))
+             (ior (and (match_operand 0 "register_operand" "")
+                       (match_operand 1 "immediate_operand" ""))
+                  (ior (and (match_operand 0 "ax_reg_operand" "")
+                            (match_operand 1 "memory_displacement_only_operand" ""))
+                       (and (match_operand 0 "memory_displacement_only_operand" "")
+                            (match_operand 1 "ax_reg_operand" "")))))
           (const_int 0)
         (and (eq_attr "type" "call")
              (match_operand 0 "constant_call_address_operand" ""))
           (if_then_else (match_operand 1 "constant_call_address_operand" "")
             (const_string "none")
             (const_string "load"))
-        (and (eq_attr "type" "alu1,negnot,ishift1")
+        (and (eq_attr "type" "alu1,negnot,ishift1,sselog1")
              (match_operand 1 "memory_operand" ""))
           (const_string "both")
         (and (match_operand 0 "memory_operand" "")
                 "!alu1,negnot,ishift1,
                   imov,imovx,icmp,test,
                   fmov,fcmp,fsgn,
-                  sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,
+                  sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
                   mmx,mmxmov,mmxcmp,mmxcvt")
              (match_operand 2 "memory_operand" ""))
           (const_string "load")
 
 ;; Defines rounding mode of an FP operation.
 
-(define_attr "i387_cw" "floor,ceil,trunc,mask_pm,uninitialized,any"
+(define_attr "i387_cw" "trunc,floor,ceil,mask_pm,uninitialized,any"
   (const_string "any"))
 
 ;; Describe a user's asm statement.
 (define_asm_attributes
   [(set_attr "length" "128")
    (set_attr "type" "multi")])
+
+;; All x87 floating point modes
+(define_mode_macro X87MODEF [SF DF XF])
+;; All integer modes handled by x87 fisttp operator.
+(define_mode_macro X87MODEI [HI SI DI])
+
+;; All integer modes handled by integer x87 operators.
+(define_mode_macro X87MODEI12 [HI SI])
+
+;; All SSE floating point modes
+(define_mode_macro SSEMODEF [SF DF])
+;; All integer modes handled by SSE cvtts?2si* operators.
+(define_mode_macro SSEMODEI24 [SI DI])
+
 \f
 ;; Scheduling descriptions
 
 (include "athlon.md")
 
 \f
-;; Operand and operator predicates
+;; Operand and operator predicates and constraints
 
 (include "predicates.md")
+(include "constraints.md")
 
 \f
 ;; Compare instructions.
 ;; actually generating RTL.  The bCOND or sCOND (emitted immediately
 ;; after the cmp) will actually emit the cmpM.
 
+(define_expand "cmpti"
+  [(set (reg:CC FLAGS_REG)
+       (compare:CC (match_operand:TI 0 "nonimmediate_operand" "")
+                   (match_operand:TI 1 "x86_64_general_operand" "")))]
+  "TARGET_64BIT"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    operands[0] = force_reg (TImode, operands[0]);
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  DONE;
+})
+
 (define_expand "cmpdi"
   [(set (reg:CC FLAGS_REG)
        (compare:CC (match_operand:DI 0 "nonimmediate_operand" "")
 
 (define_expand "cmpxf"
   [(set (reg:CC FLAGS_REG)
-       (compare:CC (match_operand:XF 0 "cmp_fp_expander_operand" "")
-                   (match_operand:XF 1 "cmp_fp_expander_operand" "")))]
+       (compare:CC (match_operand:XF 0 "nonmemory_operand" "")
+                   (match_operand:XF 1 "nonmemory_operand" "")))]
   "TARGET_80387"
 {
   ix86_compare_op0 = operands[0];
   [(set (reg:CC FLAGS_REG)
        (compare:CC (match_operand:DF 0 "cmp_fp_expander_operand" "")
                    (match_operand:DF 1 "cmp_fp_expander_operand" "")))]
-  "TARGET_80387 || TARGET_SSE2"
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
 {
   ix86_compare_op0 = operands[0];
   ix86_compare_op1 = operands[1];
   [(set (reg:CC FLAGS_REG)
        (compare:CC (match_operand:SF 0 "cmp_fp_expander_operand" "")
                    (match_operand:SF 1 "cmp_fp_expander_operand" "")))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
 {
   ix86_compare_op0 = operands[0];
   ix86_compare_op1 = operands[1];
 ;; We may not use "#" to split and emit these, since the REG_DEAD notes
 ;; used to manage the reg stack popping would not be preserved.
 
-(define_insn "*cmpfp_0_sf"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-       (unspec:HI
-         [(compare:CCFP
-            (match_operand:SF 1 "register_operand" "f")
-            (match_operand:SF 2 "const0_operand" "X"))]
-       UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "* return output_fp_compare (insn, operands, 0, 0);"
-  [(set_attr "type" "multi")
-   (set_attr "mode" "SF")])
-
-(define_insn "*cmpfp_0_df"
-  [(set (match_operand:HI 0 "register_operand" "=a")
-       (unspec:HI
-         [(compare:CCFP
-            (match_operand:DF 1 "register_operand" "f")
-            (match_operand:DF 2 "const0_operand" "X"))]
-       UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "* return output_fp_compare (insn, operands, 0, 0);"
-  [(set_attr "type" "multi")
-   (set_attr "mode" "DF")])
-
-(define_insn "*cmpfp_0_xf"
+(define_insn "*cmpfp_0"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
          [(compare:CCFP
-            (match_operand:XF 1 "register_operand" "f")
-            (match_operand:XF 2 "const0_operand" "X"))]
+            (match_operand 1 "register_operand" "f")
+            (match_operand 2 "const0_operand" "X"))]
        UNSPEC_FNSTSW))]
-  "TARGET_80387"
+  "TARGET_80387
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
-   (set_attr "mode" "XF")])
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+             (const_string "SF")
+           (match_operand:DF 1 "" "")
+             (const_string "DF")
+          ]
+          (const_string "XF")))])
 
 (define_insn "*cmpfp_sf"
   [(set (match_operand:HI 0 "register_operand" "=a")
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
    (set_attr "mode" "SF")])
 
 (define_insn "*cmpfp_df"
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
    (set_attr "mode" "DF")])
 
 (define_insn "*cmpfp_xf"
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
    (set_attr "mode" "XF")])
 
 (define_insn "*cmpfp_u"
    && GET_MODE (operands[1]) == GET_MODE (operands[2])"
   "* return output_fp_compare (insn, operands, 0, 1);"
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
    (set (attr "mode")
      (cond [(match_operand:SF 1 "" "")
              (const_string "SF")
           ]
           (const_string "XF")))])
 
-(define_insn "*cmpfp_si"
+(define_insn "*cmpfp_<mode>"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
          [(compare:CCFP
             (match_operand 1 "register_operand" "f")
             (match_operator 3 "float_operator"
-              [(match_operand:SI 2 "memory_operand" "m")]))]
+              [(match_operand:X87MODEI12 2 "memory_operand" "m")]))]
          UNSPEC_FNSTSW))]
-  "TARGET_80387 && TARGET_USE_FIOP
+  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
    && FLOAT_MODE_P (GET_MODE (operands[1]))
    && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
    (set_attr "fp_int_src" "true")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 ;; FP compares, step 2
 ;; Move the fpsw to ax.
 
 ;; Pentium Pro can do steps 1 through 3 in one go.
 
-(define_insn "*cmpfp_i"
+(define_insn "*cmpfp_i_mixed"
   [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP (match_operand 0 "register_operand" "f")
-                     (match_operand 1 "register_operand" "f")))]
-  "TARGET_80387 && TARGET_CMOVE
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
-   && FLOAT_MODE_P (GET_MODE (operands[0]))
-   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
-  "* return output_fp_compare (insn, operands, 1, 0);"
-  [(set_attr "type" "fcmp")
-   (set (attr "mode")
-     (cond [(match_operand:SF 1 "" "")
-             (const_string "SF")
-           (match_operand:DF 1 "" "")
-             (const_string "DF")
-          ]
-          (const_string "XF")))
-   (set_attr "athlon_decode" "vector")])
-
-(define_insn "*cmpfp_i_sse"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP (match_operand 0 "register_operand" "f#x,x#f")
-                     (match_operand 1 "nonimmediate_operand" "f#x,xm#f")))]
-  "TARGET_80387
+       (compare:CCFP (match_operand 0 "register_operand" "f,x")
+                     (match_operand 1 "nonimmediate_operand" "f,xm")))]
+  "TARGET_MIX_SSE_I387
    && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 0);"
         (const_string "DF")))
    (set_attr "athlon_decode" "vector")])
 
-(define_insn "*cmpfp_i_sse_only"
+(define_insn "*cmpfp_i_sse"
   [(set (reg:CCFP FLAGS_REG)
        (compare:CCFP (match_operand 0 "register_operand" "x")
                      (match_operand 1 "nonimmediate_operand" "xm")))]
-  "SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 0);"
   [(set_attr "type" "ssecomi")
         (const_string "DF")))
    (set_attr "athlon_decode" "vector")])
 
-(define_insn "*cmpfp_iu"
-  [(set (reg:CCFPU FLAGS_REG)
-       (compare:CCFPU (match_operand 0 "register_operand" "f")
-                      (match_operand 1 "register_operand" "f")))]
+(define_insn "*cmpfp_i_i387"
+  [(set (reg:CCFP FLAGS_REG)
+       (compare:CCFP (match_operand 0 "register_operand" "f")
+                     (match_operand 1 "register_operand" "f")))]
   "TARGET_80387 && TARGET_CMOVE
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))
    && FLOAT_MODE_P (GET_MODE (operands[0]))
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
-  "* return output_fp_compare (insn, operands, 1, 1);"
+  "* return output_fp_compare (insn, operands, 1, 0);"
   [(set_attr "type" "fcmp")
    (set (attr "mode")
      (cond [(match_operand:SF 1 "" "")
           (const_string "XF")))
    (set_attr "athlon_decode" "vector")])
 
-(define_insn "*cmpfp_iu_sse"
+(define_insn "*cmpfp_iu_mixed"
   [(set (reg:CCFPU FLAGS_REG)
-       (compare:CCFPU (match_operand 0 "register_operand" "f#x,x#f")
-                      (match_operand 1 "nonimmediate_operand" "f#x,xm#f")))]
-  "TARGET_80387
+       (compare:CCFPU (match_operand 0 "register_operand" "f,x")
+                      (match_operand 1 "nonimmediate_operand" "f,xm")))]
+  "TARGET_MIX_SSE_I387
    && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 1);"
         (const_string "DF")))
    (set_attr "athlon_decode" "vector")])
 
-(define_insn "*cmpfp_iu_sse_only"
+(define_insn "*cmpfp_iu_sse"
   [(set (reg:CCFPU FLAGS_REG)
        (compare:CCFPU (match_operand 0 "register_operand" "x")
                       (match_operand 1 "nonimmediate_operand" "xm")))]
-  "SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+  "TARGET_SSE_MATH
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 1);"
   [(set_attr "type" "ssecomi")
         (const_string "SF")
         (const_string "DF")))
    (set_attr "athlon_decode" "vector")])
+
+(define_insn "*cmpfp_iu_387"
+  [(set (reg:CCFPU FLAGS_REG)
+       (compare:CCFPU (match_operand 0 "register_operand" "f")
+                      (match_operand 1 "register_operand" "f")))]
+  "TARGET_80387 && TARGET_CMOVE
+   && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))
+   && FLOAT_MODE_P (GET_MODE (operands[0]))
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "* return output_fp_compare (insn, operands, 1, 1);"
+  [(set_attr "type" "fcmp")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+             (const_string "SF")
+           (match_operand:DF 1 "" "")
+             (const_string "DF")
+          ]
+          (const_string "XF")))
+   (set_attr "athlon_decode" "vector")])
 \f
 ;; Move instructions.
 
 
 (define_insn "*movsi_1"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-                       "=r  ,m  ,!*y,!rm,!*y,!*x,!rm,!*x")
+                       "=r  ,m  ,*y,*y,?rm,?*y,*x,*x,?r,m ,?*Y,*x")
        (match_operand:SI 1 "general_operand"
-                       "rinm,rin,*y ,*y ,rm ,*x ,*x ,rm"))]
-  "(TARGET_INTER_UNIT_MOVES || optimize_size)
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+                       "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Y,*x,r  ,m "))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     {
-    case TYPE_SSEMOV:
+    case TYPE_SSELOG1:
       if (get_attr_mode (insn) == MODE_TI)
-        return "movdqa\t{%1, %0|%0, %1}";
-      return "movd\t{%1, %0|%0, %1}";
-
-    case TYPE_MMXMOV:
-      if (get_attr_mode (insn) == MODE_DI)
-       return "movq\t{%1, %0|%0, %1}";
-      return "movd\t{%1, %0|%0, %1}";
-
-    case TYPE_LEA:
-      return "lea{l}\t{%1, %0|%0, %1}";
-
-    default:
-      if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1]))
-       abort();
-      return "mov{l}\t{%1, %0|%0, %1}";
-    }
-}
-  [(set (attr "type")
-     (cond [(eq_attr "alternative" "2,3,4")
-             (const_string "mmxmov")
-           (eq_attr "alternative" "5,6,7")
-             (const_string "ssemov")
-           (and (ne (symbol_ref "flag_pic") (const_int 0))
-                (match_operand:SI 1 "symbolic_operand" ""))
-             (const_string "lea")
-          ]
-          (const_string "imov")))
-   (set_attr "mode" "SI,SI,DI,SI,SI,TI,SI,SI")])
+        return "pxor\t%0, %0";
+      return "xorps\t%0, %0";
 
-(define_insn "*movsi_1_nointernunit"
-  [(set (match_operand:SI 0 "nonimmediate_operand"
-                       "=r  ,m  ,!*y,!m,!*y,!*x,!m,!*x")
-       (match_operand:SI 1 "general_operand"
-                       "rinm,rin,*y ,*y,m  ,*x ,*x,m"))]
-  "(!TARGET_INTER_UNIT_MOVES && !optimize_size)
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (get_attr_type (insn))
-    {
     case TYPE_SSEMOV:
-      if (get_attr_mode (insn) == MODE_TI)
-        return "movdqa\t{%1, %0|%0, %1}";
-      return "movd\t{%1, %0|%0, %1}";
+      switch (get_attr_mode (insn))
+       {
+       case MODE_TI:
+         return "movdqa\t{%1, %0|%0, %1}";
+       case MODE_V4SF:
+         return "movaps\t{%1, %0|%0, %1}";
+       case MODE_SI:
+          return "movd\t{%1, %0|%0, %1}";
+       case MODE_SF:
+          return "movss\t{%1, %0|%0, %1}";
+       default:
+         gcc_unreachable ();
+       }
+
+    case TYPE_MMXADD:
+      return "pxor\t%0, %0";
 
     case TYPE_MMXMOV:
       if (get_attr_mode (insn) == MODE_DI)
       return "lea{l}\t{%1, %0|%0, %1}";
 
     default:
-      if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1]))
-       abort();
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
       return "mov{l}\t{%1, %0|%0, %1}";
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "2,3,4")
+     (cond [(eq_attr "alternative" "2")
+             (const_string "mmxadd")
+           (eq_attr "alternative" "3,4,5")
              (const_string "mmxmov")
-           (eq_attr "alternative" "5,6,7")
+           (eq_attr "alternative" "6")
+             (const_string "sselog1")
+           (eq_attr "alternative" "7,8,9,10,11")
              (const_string "ssemov")
-           (and (ne (symbol_ref "flag_pic") (const_int 0))
-                (match_operand:SI 1 "symbolic_operand" ""))
+           (match_operand:DI 1 "pic_32bit_operand" "")
              (const_string "lea")
           ]
           (const_string "imov")))
-   (set_attr "mode" "SI,SI,DI,SI,SI,TI,SI,SI")])
+   (set (attr "mode")
+     (cond [(eq_attr "alternative" "2,3")
+             (const_string "DI")
+           (eq_attr "alternative" "6,7")
+             (if_then_else
+               (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+               (const_string "V4SF")
+               (const_string "TI"))
+           (and (eq_attr "alternative" "8,9,10,11")
+                (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+             (const_string "SF")
+          ]
+          (const_string "SI")))])
 
 ;; Stores and loads of ax to arbitrary constant address.
 ;; We fake an second form of instruction to force reload to load address
   "ix86_expand_move (HImode, operands); DONE;")
 
 (define_insn "*pushhi2"
-  [(set (match_operand:HI 0 "push_operand" "=<,<")
-       (match_operand:HI 1 "general_no_elim_operand" "n,r*m"))]
+  [(set (match_operand:HI 0 "push_operand" "=X")
+       (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
   "!TARGET_64BIT"
-  "@
-   push{w}\t{|WORD PTR }%1
-   push{w}\t%1"
+  "push{l}\t%k1"
   [(set_attr "type" "push")
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "SI")])
 
 ;; For 64BIT abi we always round up to 8 bytes.
 (define_insn "*pushhi2_rex64"
   "TARGET_64BIT"
   "push{q}\t%q1"
   [(set_attr "type" "push")
-   (set_attr "mode" "QI")])
+   (set_attr "mode" "DI")])
 
 (define_insn "*movhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
   "ix86_expand_move (QImode, operands); DONE;")
 
 ;; emit_push_insn when it calls move_by_pieces requires an insn to
-;; "push a byte".  But actually we use pushw, which has the effect
-;; of rounding the amount pushed up to a halfword.
+;; "push a byte".  But actually we use pushl, which has the effect
+;; of rounding the amount pushed up to a word.
 
 (define_insn "*pushqi2"
-  [(set (match_operand:QI 0 "push_operand" "=X,X")
-       (match_operand:QI 1 "nonmemory_no_elim_operand" "n,r"))]
+  [(set (match_operand:QI 0 "push_operand" "=X")
+       (match_operand:QI 1 "nonmemory_no_elim_operand" "rn"))]
   "!TARGET_64BIT"
-  "@
-   push{w}\t{|word ptr }%1
-   push{w}\t%w1"
+  "push{l}\t%k1"
   [(set_attr "type" "push")
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "SI")])
 
 ;; For 64BIT abi we always round up to 8 bytes.
 (define_insn "*pushqi2_rex64"
   "TARGET_64BIT"
   "push{q}\t%q1"
   [(set_attr "type" "push")
-   (set_attr "mode" "QI")])
+   (set_attr "mode" "DI")])
 
 ;; Situation is quite tricky about when to choose full sized (SImode) move
 ;; over QImode moves.  For Q_REG -> Q_REG move we use full size only for
   switch (get_attr_type (insn))
     {
     case TYPE_IMOVX:
-      if (!ANY_QI_REG_P (operands[1]) && GET_CODE (operands[1]) != MEM)
-       abort ();
+      gcc_assert (ANY_QI_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM);
       return "movz{bl|x}\t{%1, %k0|%k0, %1}";
     default:
       if (get_attr_mode (insn) == MODE_SI)
     }
 }
   [(set (attr "type")
-     (cond [(ne (symbol_ref "optimize_size") (const_int 0))
+     (cond [(and (eq_attr "alternative" "5")
+                (not (match_operand:QI 1 "aligned_operand" "")))
+             (const_string "imovx")
+           (ne (symbol_ref "optimize_size") (const_int 0))
              (const_string "imov")
            (and (eq_attr "alternative" "3")
                 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
               (const_string "SI")
             (and (eq_attr "type" "imov")
                  (and (eq_attr "alternative" "0,1")
-                      (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
-                          (const_int 0))))
+                      (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
+                               (const_int 0))
+                           (and (eq (symbol_ref "optimize_size")
+                                    (const_int 0))
+                                (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
+                                    (const_int 0))))))
               (const_string "SI")
             ;; Avoid partial register stalls when not using QImode arithmetic
             (and (eq_attr "type" "imov")
   rtx op0, op1, op2;
   op0 = operands[0]; op1 = operands[1]; op2 = operands[2];
 
-  if (reg_overlap_mentioned_p (op2, op0))
-    abort ();
+  gcc_assert (!reg_overlap_mentioned_p (op2, op0));
   if (! q_regs_operand (op1, QImode))
     {
       emit_insn (gen_movqi (op2, op1));
    (set_attr "memory" "load")
    (set_attr "mode" "QI")])
 
+(define_insn "*movdi_extzv_1"
+  [(set (match_operand:DI 0 "register_operand" "=R")
+       (zero_extract:DI (match_operand 1 "ext_register_operand" "Q")
+                        (const_int 8)
+                        (const_int 8)))]
+  "TARGET_64BIT"
+  "movz{bl|x}\t{%h1, %k0|%k0, %h1}"
+  [(set_attr "type" "imovx")
+   (set_attr "mode" "DI")])
+
 (define_insn "*movsi_extzv_1"
   [(set (match_operand:SI 0 "register_operand" "=R")
        (zero_extract:SI (match_operand 1 "ext_register_operand" "Q")
 (define_split
   [(set (match_operand:DI 0 "push_operand" "")
         (match_operand:DI 1 "immediate_operand" ""))]
-  "TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+                   ? flow2_completed : reload_completed)
    && !symbolic_operand (operands[1], DImode)
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 0) (match_dup 1))
    (set_attr "length_immediate" "1")])
 
 (define_insn "*movdi_2"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!m*y,!*y,!m,!*x,!*x")
-       (match_operand:DI 1 "general_operand" "riFo,riF,*y,m,*x,*x,m"))]
-  "!TARGET_64BIT
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+                               "=r  ,o  ,*y,m*y,*y,*Y,m ,*Y,*Y,*x,m ,*x,*x")
+       (match_operand:DI 1 "general_operand"
+                               "riFo,riF,C ,*y ,m ,C ,*Y,*Y,m ,C ,*x,*x,m "))]
+  "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    #
    #
+   pxor\t%0, %0
    movq\t{%1, %0|%0, %1}
    movq\t{%1, %0|%0, %1}
+   pxor\t%0, %0
    movq\t{%1, %0|%0, %1}
    movdqa\t{%1, %0|%0, %1}
-   movq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "*,*,mmx,mmx,ssemov,ssemov,ssemov")
-   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI")])
+   movq\t{%1, %0|%0, %1}
+   xorps\t%0, %0
+   movlps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}
+   movlps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
 
 (define_split
   [(set (match_operand:DI 0 "push_operand" "")
 
 (define_insn "*movdi_1_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-               "=r,r  ,r,mr,!mr,!*y,!rm,!*y,!*x,!rm,!*x,!*x,!*y")
+               "=r,r  ,r,m ,!m,*y,*y,?rm,?*y,*x,*x,?rm,?*x,?*x,?*y")
        (match_operand:DI 1 "general_operand"
-               "Z ,rem,i,re,n  ,*y ,*y ,rm ,*x ,*x ,rm ,*y ,*x"))]
-  "TARGET_64BIT
-   && (TARGET_INTER_UNIT_MOVES || optimize_size)
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+               "Z ,rem,i,re,n ,C ,*y,*y ,rm ,C ,*x,*x ,rm ,*y ,*x"))]
+  "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_SSECVT:
-      if (which_alternative == 11)
+      if (which_alternative == 13)
        return "movq2dq\t{%1, %0|%0, %1}";
       else
        return "movdq2q\t{%1, %0|%0, %1}";
       if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1]))
          return "movd\t{%1, %0|%0, %1}";
       return "movq\t{%1, %0|%0, %1}";
+    case TYPE_SSELOG1:
+    case TYPE_MMXADD:
+      return "pxor\t%0, %0";
     case TYPE_MULTI:
       return "#";
     case TYPE_LEA:
       return "lea{q}\t{%a1, %0|%0, %a1}";
     default:
-      if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1]))
-       abort ();
+      gcc_assert (!flag_pic || LEGITIMATE_PIC_OPERAND_P (operands[1]));
       if (get_attr_mode (insn) == MODE_SI)
        return "mov{l}\t{%k1, %k0|%k0, %k1}";
       else if (which_alternative == 2)
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "5,6,7")
+     (cond [(eq_attr "alternative" "5")
+             (const_string "mmxadd")
+           (eq_attr "alternative" "6,7,8")
              (const_string "mmxmov")
-           (eq_attr "alternative" "8,9,10")
+           (eq_attr "alternative" "9")
+             (const_string "sselog1")
+           (eq_attr "alternative" "10,11,12")
              (const_string "ssemov")
-           (eq_attr "alternative" "11,12")
+           (eq_attr "alternative" "13,14")
              (const_string "ssecvt")
            (eq_attr "alternative" "4")
              (const_string "multi")
-           (and (ne (symbol_ref "flag_pic") (const_int 0))
-                (match_operand:DI 1 "symbolic_operand" ""))
+           (match_operand:DI 1 "pic_32bit_operand" "")
              (const_string "lea")
           ]
           (const_string "imov")))
-   (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*")
-   (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*")
-   (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI,DI,DI")])
-
-(define_insn "*movdi_1_rex64_nointerunit"
-  [(set (match_operand:DI 0 "nonimmediate_operand"
-               "=r,r ,r,mr,!mr,!*y,!m,!*y,!*Y,!m,!*Y")
-       (match_operand:DI 1 "general_operand"
-               "Z,rem,i,re,n  ,*y ,*y,m  ,*Y ,*Y,m"))]
-  "TARGET_64BIT
-   && (!TARGET_INTER_UNIT_MOVES && !optimize_size)
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (get_attr_type (insn))
-    {
-    case TYPE_SSEMOV:
-      if (get_attr_mode (insn) == MODE_TI)
-         return "movdqa\t{%1, %0|%0, %1}";
-      /* FALLTHRU */
-    case TYPE_MMXMOV:
-      return "movq\t{%1, %0|%0, %1}";
-    case TYPE_MULTI:
-      return "#";
-    case TYPE_LEA:
-      return "lea{q}\t{%a1, %0|%0, %a1}";
-    default:
-      if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1]))
-       abort ();
-      if (get_attr_mode (insn) == MODE_SI)
-       return "mov{l}\t{%k1, %k0|%k0, %k1}";
-      else if (which_alternative == 2)
-       return "movabs{q}\t{%1, %0|%0, %1}";
-      else
-       return "mov{q}\t{%1, %0|%0, %1}";
-    }
-}
-  [(set (attr "type")
-     (cond [(eq_attr "alternative" "5,6,7")
-             (const_string "mmxmov")
-           (eq_attr "alternative" "8,9,10")
-             (const_string "ssemov")
-           (eq_attr "alternative" "4")
-             (const_string "multi")
-           (and (ne (symbol_ref "flag_pic") (const_int 0))
-                (match_operand:DI 1 "symbolic_operand" ""))
-             (const_string "lea")
-          ]
-          (const_string "imov")))
-   (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*")
-   (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*")
-   (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")])
+   (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI")])
 
 ;; Stores and loads of ax to arbitrary constant address.
 ;; We fake an second form of instruction to force reload to load address
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
         (match_operand:DI 1 "immediate_operand" ""))]
-  "TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+                   ? flow2_completed : reload_completed)
    && !symbolic_operand (operands[1], DImode)
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 2) (match_dup 3))
    (set_attr "pent_pair" "np")
    (set_attr "athlon_decode" "vector")])
 
-(define_expand "movsf"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "")
-       (match_operand:SF 1 "general_operand" ""))]
-  ""
-  "ix86_expand_move (SFmode, operands); DONE;")
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+       (match_operand:TI 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE || TARGET_64BIT"
+{
+  if (TARGET_64BIT)
+    ix86_expand_move (TImode, operands);
+  else
+    ix86_expand_vector_move (TImode, operands);
+  DONE;
+})
 
-(define_insn "*pushsf"
-  [(set (match_operand:SF 0 "push_operand" "=<,<,<")
-       (match_operand:SF 1 "general_no_elim_operand" "f#rx,rFm#fx,x#rf"))]
-  "!TARGET_64BIT"
+(define_insn "*movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
+       (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE && !TARGET_64BIT
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
 {
   switch (which_alternative)
     {
+    case 0:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "pxor\t%0, %0";
     case 1:
-      return "push{l}\t%1";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movdqa\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set (attr "mode")
+       (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
+                   (ne (symbol_ref "optimize_size") (const_int 0)))
+                (const_string "V4SF")
+              (and (eq_attr "alternative" "2")
+                   (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+                       (const_int 0)))
+                (const_string "V4SF")]
+             (const_string "TI")))])
 
+(define_insn "*movti_rex64"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
+       (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
+  "TARGET_64BIT
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "#";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "pxor\t%0, %0";
+    case 3:
+    case 4:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movdqa\t{%1, %0|%0, %1}";
     default:
-      /* This insn should be already split before reg-stack.  */
-      abort ();
+      gcc_unreachable ();
     }
 }
+  [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "2,3")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "TI"))
+              (eq_attr "alternative" "4")
+                (if_then_else
+                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+                           (const_int 0))
+                       (ne (symbol_ref "optimize_size")
+                           (const_int 0)))
+                  (const_string "V4SF")
+                  (const_string "TI"))]
+              (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+        (match_operand:TI 1 "general_operand" ""))]
+  "reload_completed && !SSE_REG_P (operands[0])
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "")
+       (match_operand:SF 1 "general_operand" ""))]
+  ""
+  "ix86_expand_move (SFmode, operands); DONE;")
+
+(define_insn "*pushsf"
+  [(set (match_operand:SF 0 "push_operand" "=<,<,<")
+       (match_operand:SF 1 "general_no_elim_operand" "f,rFm,x"))]
+  "!TARGET_64BIT"
+{
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{l}\t%1";
+}
   [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
    (set_attr "mode" "SF,SI,SF")])
 
 (define_insn "*pushsf_rex64"
   [(set (match_operand:SF 0 "push_operand" "=X,X,X")
-       (match_operand:SF 1 "nonmemory_no_elim_operand" "f#rx,rF#fx,x#rf"))]
+       (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
   "TARGET_64BIT"
 {
-  switch (which_alternative)
-    {
-    case 1:
-      return "push{q}\t%q1";
-
-    default:
-      /* This insn should be already split before reg-stack.  */
-      abort ();
-    }
+  /* Anything else should be already split before reg-stack.  */
+  gcc_assert (which_alternative == 1);
+  return "push{q}\t%q1";
 }
   [(set_attr "type" "multi,push,multi")
+   (set_attr "unit" "i387,*,*")
    (set_attr "mode" "SF,DI,SF")])
 
 (define_split
        (match_operand:SF 1 "memory_operand" ""))]
   "reload_completed
    && GET_CODE (operands[1]) == MEM
-   && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
-   && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))"
+   && constant_pool_reference_p (operands[1])"
   [(set (match_dup 0)
        (match_dup 1))]
-  "operands[1] = get_pool_constant (XEXP (operands[1], 0));")
+  "operands[1] = avoid_constant_pool_reference (operands[1]);")
 
 
 ;; %%% Kill this when call knows how to work this out.
 
 (define_insn "*movsf_1"
   [(set (match_operand:SF 0 "nonimmediate_operand"
-         "=f#xr,m   ,f#xr,r#xf  ,m    ,x#rf,x#rf,x#rf ,m   ,!*y,!rm,!*y")
-       (match_operand:SF 1 "general_operand"
-         "fm#rx,f#rx,G   ,rmF#fx,Fr#fx,C   ,x   ,xm#rf,x#rf,rm ,*y ,*y"))]
-  "(TARGET_INTER_UNIT_MOVES || optimize_size)
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
-   && (reload_in_progress || reload_completed
-       || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], SFmode))" 
-{
-  switch (which_alternative)
-    {
-    case 0:
-      return output_387_reg_move (insn, operands);
-
-    case 1:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0";
-      else
-        return "fst%z0\t%y0";
-
-    case 2:
-      return standard_80387_constant_opcode (operands[1]);
-
-    case 3:
-    case 4:
-      return "mov{l}\t{%1, %0|%0, %1}";
-    case 5:
-      if (get_attr_mode (insn) == MODE_TI)
-       return "pxor\t%0, %0";
-      else
-       return "xorps\t%0, %0";
-    case 6:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movss\t{%1, %0|%0, %1}";
-    case 7:
-    case 8:
-      return "movss\t{%1, %0|%0, %1}";
-
-    case 9:
-    case 10:
-      return "movd\t{%1, %0|%0, %1}";
-
-    case 11:
-      return "movq\t{%1, %0|%0, %1}";
-
-    default:
-      abort();
-    }
-}
-  [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "3,4,9,10")
-                (const_string "SI")
-              (eq_attr "alternative" "5")
-                (if_then_else
-                  (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
-                                (const_int 0))
-                            (ne (symbol_ref "TARGET_SSE2")
-                                (const_int 0)))
-                       (eq (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "TI")
-                  (const_string "V4SF"))
-              /* For architectures resolving dependencies on
-                 whole SSE registers use APS move to break dependency
-                 chains, otherwise use short move to avoid extra work. 
-
-                 Do the same for architectures resolving dependencies on
-                 the parts.  While in DF mode it is better to always handle
-                 just register parts, the SF mode is different due to lack
-                 of instructions to load just part of the register.  It is
-                 better to maintain the whole registers in single format
-                 to avoid problems on using packed logical operations.  */
-              (eq_attr "alternative" "6")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
-                           (const_int 0))
-                       (ne (symbol_ref "TARGET_SSE_SPLIT_REGS")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "SF"))
-              (eq_attr "alternative" "11")
-                (const_string "DI")]
-              (const_string "SF")))])
-
-(define_insn "*movsf_1_nointerunit"
-  [(set (match_operand:SF 0 "nonimmediate_operand"
-         "=f#xr,m   ,f#xr,r#xf  ,m    ,x#rf,x#rf,x#rf ,m   ,!*y,!m,!*y")
+         "=f,m   ,f,r  ,m    ,x,x,x ,m   ,!*y,!rm,!*y")
        (match_operand:SF 1 "general_operand"
-         "fm#rx,f#rx,G   ,rmF#fx,Fr#fx,C   ,x   ,xm#rf,x#rf,m  ,*y,*y"))]
-  "(!TARGET_INTER_UNIT_MOVES && !optimize_size)
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
+         "fm,f,G   ,rmF,Fr,C   ,x   ,xm,x,rm ,*y ,*y"))]
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
       return "movq\t{%1, %0|%0, %1}";
 
     default:
-      abort();
+      gcc_unreachable ();
     }
 }
-  [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
+  [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov")
    (set (attr "mode")
         (cond [(eq_attr "alternative" "3,4,9,10")
                 (const_string "SI")
 
 (define_insn "*pushdf_nointeger"
   [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
-       (match_operand:DF 1 "general_no_elim_operand" "f#Y,Fo#fY,*r#fY,Y#f"))]
+       (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y"))]
   "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES"
 {
   /* This insn should be already split before reg-stack.  */
-  abort ();
+  gcc_unreachable ();
 }
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*,*")
    (set_attr "mode" "DF,SI,SI,DF")])
 
 (define_insn "*pushdf_integer"
   [(set (match_operand:DF 0 "push_operand" "=<,<,<")
-       (match_operand:DF 1 "general_no_elim_operand" "f#rY,rFo#fY,Y#rf"))]
+       (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y"))]
   "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
 {
   /* This insn should be already split before reg-stack.  */
-  abort ();
+  gcc_unreachable ();
 }
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*")
    (set_attr "mode" "DF,SI,DF")])
 
 ;; %%% Kill this when call knows how to work this out.
 
 (define_insn "*movdf_nointeger"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-                               "=f#x,m  ,f#x,*r  ,o  ,x#f,x#f,x#f  ,m")
+                       "=f,m,f,*r  ,o  ,Y*x,Y*x,Y*x ,m  ")
        (match_operand:DF 1 "general_operand"
-                               "fm#x,f#x,G  ,*roF,F*r,C  ,x#f,xHm#f,x#f"))]
+                       "fm,f,G,*roF,F*r,C  ,Y*x,mY*x,Y*x"))]
   "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
    && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
    && (reload_in_progress || reload_completed
        case MODE_TI:
          return "pxor\t%0, %0";
        default:
-         abort ();
+         gcc_unreachable ();
        }
     case 6:
     case 7:
          return "movsd\t{%1, %0|%0, %1}";
        case MODE_V1DF:
          return "movlpd\t{%1, %0|%0, %1}";
+       case MODE_V2SF:
+         return "movlps\t{%1, %0|%0, %1}";
        default:
-         abort ();
+         gcc_unreachable ();
        }
 
     default:
-      abort();
+      gcc_unreachable ();
     }
 }
-  [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
    (set (attr "mode")
-        (cond [(eq_attr "alternative" "3,4")
+        (cond [(eq_attr "alternative" "0,1,2")
+                (const_string "DF")
+              (eq_attr "alternative" "3,4")
                 (const_string "SI")
 
               /* For SSE1, we have many fewer alternatives.  */
               (eq (symbol_ref "TARGET_SSE2") (const_int 0))
                 (cond [(eq_attr "alternative" "5,6")
-                         (if_then_else
-                           (ne (symbol_ref "optimize_size") (const_int 0))
-                           (const_string "V4SF")
-                           (const_string "TI"))
+                         (const_string "V4SF")
                       ]
-                  (const_string "DI"))
+                  (const_string "V2SF"))
 
               /* xorps is one byte shorter.  */
               (eq_attr "alternative" "5")
 
 (define_insn "*movdf_integer"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-                       "=f#Yr,m   ,f#Yr,r#Yf  ,o    ,Y#rf,Y#rf,Y#rf ,m")
+               "=f,m,f,r  ,o ,Y*x,Y*x,Y*x,m  ")
        (match_operand:DF 1 "general_operand"
-                       "fm#Yr,f#Yr,G   ,roF#Yf,Fr#Yf,C   ,Y#rf,Ym#rf,Y#rf"))]
+               "fm,f,G,roF,Fr,C  ,Y*x,m  ,Y*x"))]
   "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
    && ((!optimize_size && TARGET_INTEGER_DFMODE_MOVES) || TARGET_64BIT)
    && (reload_in_progress || reload_completed
        case MODE_TI:
          return "pxor\t%0, %0";
        default:
-         abort ();
+         gcc_unreachable ();
        }
     case 6:
     case 7:
          return "movsd\t{%1, %0|%0, %1}";
        case MODE_V1DF:
          return "movlpd\t{%1, %0|%0, %1}";
+       case MODE_V2SF:
+         return "movlps\t{%1, %0|%0, %1}";
        default:
-         abort ();
+         gcc_unreachable ();
        }
 
     default:
-      abort();
+      gcc_unreachable();
     }
 }
-  [(set_attr "type" "fmov,fmov,fmov,multi,multi,ssemov,ssemov,ssemov,ssemov")
+  [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
    (set (attr "mode")
-        (cond [(eq_attr "alternative" "3,4")
+        (cond [(eq_attr "alternative" "0,1,2")
+                (const_string "DF")
+              (eq_attr "alternative" "3,4")
                 (const_string "SI")
 
               /* For SSE1, we have many fewer alternatives.  */
               (eq (symbol_ref "TARGET_SSE2") (const_int 0))
                 (cond [(eq_attr "alternative" "5,6")
-                         (if_then_else
-                           (ne (symbol_ref "optimize_size") (const_int 0))
-                           (const_string "V4SF")
-                           (const_string "TI"))
+                         (const_string "V4SF")
                       ]
-                  (const_string "DI"))
+                  (const_string "V2SF"))
 
               /* xorps is one byte shorter.  */
               (eq_attr "alternative" "5")
   "optimize_size"
 {
   /* This insn should be already split before reg-stack.  */
-  abort ();
+  gcc_unreachable ();
 }
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*,*")
    (set_attr "mode" "XF,SI,SI")])
 
 (define_insn "*pushxf_integer"
   [(set (match_operand:XF 0 "push_operand" "=<,<")
-       (match_operand:XF 1 "general_no_elim_operand" "f#r,ro#f"))]
+       (match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
   "!optimize_size"
 {
   /* This insn should be already split before reg-stack.  */
-  abort ();
+  gcc_unreachable ();
 }
   [(set_attr "type" "multi")
+   (set_attr "unit" "i387,*")
    (set_attr "mode" "XF,SI")])
 
 (define_split
 
     case 3: case 4:
       return "#";
+    default:
+      gcc_unreachable ();
     }
-  abort();
 }
   [(set_attr "type" "fmov,fmov,fmov,multi,multi")
    (set_attr "mode" "XF,XF,XF,SI,SI")])
 
 (define_insn "*movxf_integer"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o")
-       (match_operand:XF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))]
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
+       (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
   "!optimize_size
    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
    && (reload_in_progress || reload_completed
 
     case 3: case 4:
       return "#";
+
+    default:
+      gcc_unreachable ();
     }
-  abort();
 }
   [(set_attr "type" "fmov,fmov,fmov,multi,multi")
    (set_attr "mode" "XF,XF,XF,SI,SI")])
    && GET_CODE (operands[1]) == MEM
    && (GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == SFmode || GET_MODE (operands[0]) == DFmode)
-   && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
-   && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))"
+   && constant_pool_reference_p (operands[1])"
   [(set (match_dup 0) (match_dup 1))]
 {
-  rtx c = get_pool_constant (XEXP (operands[1], 0));
+  rtx c = avoid_constant_pool_reference (operands[1]);
   rtx r = operands[0];
 
   if (GET_CODE (r) == SUBREG)
 }
   [(set_attr "type" "fxch")
    (set_attr "mode" "XF")])
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+       (match_operand:TF 1 "nonimmediate_operand" ""))]
+  "TARGET_64BIT"
+{
+  ix86_expand_move (TFmode, operands);
+  DONE;
+})
+
+(define_insn "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm")
+       (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))]
+  "TARGET_64BIT
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      return "#";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "pxor\t%0, %0";
+    case 3:
+    case 4:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movdqa\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "2,3")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "TI"))
+              (eq_attr "alternative" "4")
+                (if_then_else
+                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+                           (const_int 0))
+                       (ne (symbol_ref "optimize_size")
+                           (const_int 0)))
+                  (const_string "V4SF")
+                  (const_string "TI"))]
+              (const_string "DI")))])
+
+(define_split
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+        (match_operand:TF 1 "general_operand" ""))]
+  "reload_completed && !SSE_REG_P (operands[0])
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
 \f
 ;; Zero extension instructions
 
   [(set_attr "type" "imovx,alu1")
    (set_attr "mode" "HI")])
 
-(define_insn "*zero_extendqihi2_movzbw"
+; zero extend to SImode here to avoid partial register stalls
+(define_insn "*zero_extendqihi2_movzbl"
   [(set (match_operand:HI 0 "register_operand" "=r")
      (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
   "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed"
-  "movz{bw|x}\t{%1, %0|%0, %1}"
+  "movz{bl|x}\t{%1, %k0|%k0, %k1}"
   [(set_attr "type" "imovx")
-   (set_attr "mode" "HI")])
+   (set_attr "mode" "SI")])
 
 ;; For the movzbw case strip only the clobber
 (define_split
   ")
 
 (define_insn "zero_extendsidi2_32"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,!?y,!?Y")
-       (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,m,m")))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && !TARGET_INTER_UNIT_MOVES"
-  "@
-   #
-   #
-   #
-   movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}"
-  [(set_attr "mode" "SI,SI,SI,DI,TI")
-   (set_attr "type" "multi,multi,multi,mmxmov,ssemov")])
-
-(define_insn "*zero_extendsidi2_32_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,!?y,!?Y")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?*o,?*y,?*Y")
        (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "0,rm,r,rm,rm")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_INTER_UNIT_MOVES"
+  "!TARGET_64BIT"
   "@
    #
    #
    (set_attr "type" "multi,multi,multi,mmxmov,ssemov")])
 
 (define_insn "zero_extendsidi2_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!?y,!?Y")
-     (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0,m,m")))]
-  "TARGET_64BIT && !TARGET_INTER_UNIT_MOVES"
-  "@
-   mov\t{%k1, %k0|%k0, %k1}
-   #
-   movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imovx,imov,mmxmov,ssemov")
-   (set_attr "mode" "SI,DI,DI,TI")])
-
-(define_insn "*zero_extendsidi2_rex64_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,!?y,!*?")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*y,?*Y")
      (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm,0,rm,rm")))]
-  "TARGET_64BIT && TARGET_INTER_UNIT_MOVES"
+  "TARGET_64BIT"
   "@
    mov\t{%k1, %k0|%k0, %k1}
    #
   "split_di (&operands[0], 1, &operands[3], &operands[4]);")
 
 (define_insn "zero_extendhidi2"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-     (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+     (zero_extend:DI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
   "TARGET_64BIT"
-  "@
-   movz{wl|x}\t{%1, %k0|%k0, %1}
-   movz{wq|x}\t{%1, %0|%0, %1}"
+  "movz{wl|x}\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "imovx")
-   (set_attr "mode" "SI,DI")])
+   (set_attr "mode" "DI")])
 
 (define_insn "zero_extendqidi2"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-     (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "Q,m")))]
+  [(set (match_operand:DI 0 "register_operand" "=r")
+     (zero_extend:DI (match_operand:QI 1 "nonimmediate_operand" "rm")))]
   "TARGET_64BIT"
-  "@
-   movz{bl|x}\t{%1, %k0|%k0, %1}
-   movz{bq|x}\t{%1, %0|%0, %1}"
+  "movz{bl|x}\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "imovx")
-   (set_attr "mode" "SI,DI")])
+   (set_attr "mode" "DI")])
 \f
 ;; Sign extension instructions
 
   /* ??? Needed for compress_float_constant since all fp constants
      are LEGITIMATE_CONSTANT_P.  */
   if (GET_CODE (operands[1]) == CONST_DOUBLE)
-    operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
-  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
-    operands[1] = force_reg (SFmode, operands[1]);
-})
+    {
+      if ((!TARGET_SSE2 || TARGET_MIX_SSE_I387)
+         && standard_80387_constant_p (operands[1]) > 0)
+       {
+         operands[1] = simplify_const_unary_operation
+           (FLOAT_EXTEND, DFmode, operands[1], SFmode);
+         emit_move_insn_1 (operands[0], operands[1]);
+         DONE;
+       }
+      operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+    }
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    operands[1] = force_reg (SFmode, operands[1]);
+})
 
 (define_insn "*extendsfdf2_mixed"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f#Y,m#fY,Y#f")
-        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm#Y,f#Y,mY#f")))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,Y")
+        (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f,mY")))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387
    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
 {
       return "cvtss2sd\t{%1, %0|%0, %1}";
 
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov,fmov,ssecvt")
    (set_attr "mode" "SF,XF,DF")])
 
 (define_insn "*extendsfdf2_sse"
-  [(set (match_operand:DF 0 "register_operand" "=Y")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=Y")
         (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "mY")))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
         return "fst%z0\t%y0";
 
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov")
   /* ??? Needed for compress_float_constant since all fp constants
      are LEGITIMATE_CONSTANT_P.  */
   if (GET_CODE (operands[1]) == CONST_DOUBLE)
-    operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+    {
+      if (standard_80387_constant_p (operands[1]) > 0)
+       {
+         operands[1] = simplify_const_unary_operation
+           (FLOAT_EXTEND, XFmode, operands[1], SFmode);
+         emit_move_insn_1 (operands[0], operands[1]);
+         DONE;
+       }
+      operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
+    }
   if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
     operands[1] = force_reg (SFmode, operands[1]);
 })
         return "fstp%z0\t%y0\n\tfld%z0\t%y0";
 
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov")
   /* ??? Needed for compress_float_constant since all fp constants
      are LEGITIMATE_CONSTANT_P.  */
   if (GET_CODE (operands[1]) == CONST_DOUBLE)
-    operands[1] = validize_mem (force_const_mem (DFmode, operands[1]));
+    {
+      if (standard_80387_constant_p (operands[1]) > 0)
+       {
+         operands[1] = simplify_const_unary_operation
+           (FLOAT_EXTEND, XFmode, operands[1], DFmode);
+         emit_move_insn_1 (operands[0], operands[1]);
+         DONE;
+       }
+      operands[1] = validize_mem (force_const_mem (DFmode, operands[1]));
+    }
   if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
     operands[1] = force_reg (DFmode, operands[1]);
 })
         return "fstp%z0\t%y0";
 
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov")
     ;
   else
     {
-      rtx temp = assign_386_stack_local (SFmode, 0);
+      rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP);
       emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
       DONE;
     }
     case 2:
       return "cvtsd2ss\t{%1, %0|%0, %1}";
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov,fmov,ssecvt")
     case 2:
       return "cvtsd2ss\t{%1, %0|%0, %1}";
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov,multi,ssecvt")
+   (set_attr "unit" "*,i387,*")
    (set_attr "mode" "SF")])
 
 (define_insn "*truncdfsf_i387"
     case 1:
       return "#";
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov,multi")
+   (set_attr "unit" "*,i387")
+   (set_attr "mode" "SF")])
+
+(define_insn "*truncdfsf2_i387_1"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+       (float_truncate:SF
+         (match_operand:DF 1 "register_operand" "f")))]
+  "TARGET_80387
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)
+   && !TARGET_MIX_SSE_I387"
+{
+  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+    return "fstp%z0\t%y0";
+  else
+    return "fst%z0\t%y0";
+}
+  [(set_attr "type" "fmov")
    (set_attr "mode" "SF")])
 
 (define_split
       DONE;
     }
   else
-    operands[2] = assign_386_stack_local (SFmode, 0);
+    operands[2] = assign_386_stack_local (SFmode, SLOT_TEMP);
 })
 
 (define_insn "*truncxfsf2_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#rx,?r#fx,?x#rf")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r,?x")
        (float_truncate:SF
         (match_operand:XF 1 "register_operand" "f,f,f,f")))
    (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
   "TARGET_MIX_SSE_I387"
 {
-  switch (which_alternative)
-    {
-    case 0:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-       return "fstp%z0\t%y0";
-      else
-       return "fst%z0\t%y0";
-    default:
-      abort();
-    }
+  gcc_assert (!which_alternative);
+  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+    return "fstp%z0\t%y0";
+  else
+    return "fst%z0\t%y0";
 }
   [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
    (set_attr "mode" "SF")])
 
 (define_insn "truncxfsf2_i387_noop"
    (set_attr "mode" "SF")])
 
 (define_insn "*truncxfsf2_i387"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f#r,?r#f")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r")
        (float_truncate:SF
         (match_operand:XF 1 "register_operand" "f,f,f")))
    (clobber (match_operand:SF 2 "memory_operand" "=X,m,m"))]
   "TARGET_80387"
 {
-  switch (which_alternative)
-    {
-    case 0:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-       return "fstp%z0\t%y0";
-      else
-       return "fst%z0\t%y0";
-    default:
-      abort ();
-    }
+  gcc_assert (!which_alternative);
+  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+    return "fstp%z0\t%y0";
+   else
+     return "fst%z0\t%y0";
 }
   [(set_attr "type" "fmov,multi,multi")
+   (set_attr "unit" "*,i387,i387")
    (set_attr "mode" "SF")])
 
 (define_insn "*truncxfsf2_i387_1"
       DONE;
     }
   else
-    operands[2] = assign_386_stack_local (DFmode, 0);
+    operands[2] = assign_386_stack_local (DFmode, SLOT_TEMP);
 })
 
 (define_insn "*truncxfdf2_mixed"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#rY,?r#fY,?Y#rf")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r,?Y")
        (float_truncate:DF
         (match_operand:XF 1 "register_operand" "f,f,f,f")))
    (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387"
 {
-  switch (which_alternative)
-    {
-    case 0:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-       return "fstp%z0\t%y0";
-      else
-       return "fst%z0\t%y0";
-    default:
-      abort();
-    }
-  abort ();
+  gcc_assert (!which_alternative);
+  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+    return "fstp%z0\t%y0";
+  else
+    return "fst%z0\t%y0";
 }
   [(set_attr "type" "fmov,multi,multi,multi")
+   (set_attr "unit" "*,i387,i387,i387")
    (set_attr "mode" "DF")])
 
 (define_insn "truncxfdf2_i387_noop"
    (set_attr "mode" "DF")])
 
 (define_insn "*truncxfdf2_i387"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f#r,?r#f")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r")
        (float_truncate:DF
         (match_operand:XF 1 "register_operand" "f,f,f")))
    (clobber (match_operand:DF 2 "memory_operand" "=X,m,m"))]
   "TARGET_80387"
 {
-  switch (which_alternative)
-    {
-    case 0:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-       return "fstp%z0\t%y0";
-      else
-       return "fst%z0\t%y0";
-    default:
-      abort ();
-    }
+  gcc_assert (!which_alternative);
+  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
+    return "fstp%z0\t%y0";
+  else
+    return "fst%z0\t%y0";
 }
   [(set_attr "type" "fmov,multi,multi")
+   (set_attr "unit" "*,i387,i387")
    (set_attr "mode" "DF")])
 
 (define_insn "*truncxfdf2_i387_1"
   [(set (match_dup 0) (float_truncate:DF (match_dup 1)))]
   "")
 \f
-;; %%% Break up all these bad boys.
-
 ;; Signed conversion to DImode.
 
 (define_expand "fix_truncxfdi2"
                    (fix:DI (match_operand:XF 1 "register_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
   "TARGET_80387"
-  "")
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
 
-(define_expand "fix_truncdfdi2"
+(define_expand "fix_trunc<mode>di2"
   [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
-                   (fix:DI (match_operand:DF 1 "register_operand" "")))
+                   (fix:DI (match_operand:SSEMODEF 1 "register_operand" "")))
               (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_80387 || (TARGET_64BIT && TARGET_SSE2)"
+  "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
 {
-  if (TARGET_64BIT && TARGET_SSE2)
+  if (TARGET_FISTTP
+      && !(TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   {
+     emit_insn (gen_fix_truncdi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+  if (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))
    {
      rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
-     emit_insn (gen_fix_truncdfdi_sse (out, operands[1]));
+     emit_insn (gen_fix_trunc<mode>di_sse (out, operands[1]));
      if (out != operands[0])
        emit_move_insn (operands[0], out);
      DONE;
    }
 })
 
-(define_expand "fix_truncsfdi2"
-  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
-                  (fix:DI (match_operand:SF 1 "register_operand" "")))
-              (clobber (reg:CC FLAGS_REG))])] 
-  "TARGET_80387 || (TARGET_64BIT && TARGET_SSE)"
+;; Signed conversion to SImode.
+
+(define_expand "fix_truncxfsi2"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+                   (fix:SI (match_operand:XF 1 "register_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387"
 {
-  if (TARGET_64BIT && TARGET_SSE)
+  if (TARGET_FISTTP)
    {
-     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DImode);
-     emit_insn (gen_fix_truncsfdi_sse (out, operands[1]));
+     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+(define_expand "fix_trunc<mode>si2"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
+                  (fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
+{
+  if (TARGET_FISTTP
+      && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
+   {
+     emit_insn (gen_fix_truncsi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+  if (SSE_FLOAT_MODE_P (<MODE>mode))
+   {
+     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
+     emit_insn (gen_fix_trunc<mode>si_sse (out, operands[1]));
      if (out != operands[0])
        emit_move_insn (operands[0], out);
      DONE;
    }
 })
 
+;; Signed conversion to HImode.
+
+(define_expand "fix_trunc<mode>hi2"
+  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
+                  (fix:HI (match_operand:X87MODEF 1 "register_operand" "")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_80387
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && (!TARGET_FISTTP || TARGET_SSE_MATH))"
+{
+  if (TARGET_FISTTP)
+   {
+     emit_insn (gen_fix_trunchi_fisttp_i387_1 (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+;; When SSE is available, it is always faster to use it!
+(define_insn "fix_truncsfdi_sse"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+       (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
+  "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "cvttss2si{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "fix_truncdfdi_sse"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+       (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))]
+  "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "cvttsd2si{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "fix_truncsfsi_sse"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
+  "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "cvttss2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "double,vector")])
+
+(define_insn "fix_truncdfsi_sse"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+       (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))]
+  "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "cvttsd2si\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "double,vector")])
+
+;; Avoid vector decoded forms of the instruction.
+(define_peephole2
+  [(match_scratch:DF 2 "Y")
+   (set (match_operand:SSEMODEI24 0 "register_operand" "")
+       (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
+  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(match_scratch:SF 2 "x")
+   (set (match_operand:SSEMODEI24 0 "register_operand" "")
+       (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
+  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
+  "")
+
+(define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+       (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))]
+  "TARGET_FISTTP
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+        && (TARGET_64BIT || <MODE>mode != DImode))
+       && TARGET_SSE_MATH)
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fix_trunc<mode>_i387_fisttp (operands[0], operands[1]));
+  else
+    {
+      operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fix_trunc<mode>_i387_fisttp_with_temp (operands[0],
+                                                           operands[1],
+                                                           operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp"
+  [(set (match_operand:X87MODEI 0 "memory_operand" "=m")
+       (fix:X87MODEI (match_operand 1 "register_operand" "f")))
+   (clobber (match_scratch:XF 2 "=&1f"))]
+  "TARGET_FISTTP
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+        && (TARGET_64BIT || <MODE>mode != DImode))
+       && TARGET_SSE_MATH)"
+  "* return output_fix_trunc (insn, operands, 1);"
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_trunc<mode>_i387_fisttp_with_temp"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+       (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m"))
+   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
+  "TARGET_FISTTP
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+       && (TARGET_64BIT || <MODE>mode != DImode))
+       && TARGET_SSE_MATH)"
+  "#"
+  [(set_attr "type" "fisttp")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEI 0 "register_operand" "")
+       (fix:X87MODEI (match_operand 1 "register_operand" "")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 2) (fix:X87MODEI (match_dup 1)))
+             (clobber (match_dup 3))])
+   (set (match_dup 0) (match_dup 2))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEI 0 "memory_operand" "")
+       (fix:X87MODEI (match_operand 1 "register_operand" "")))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (fix:X87MODEI (match_dup 1)))
+             (clobber (match_dup 3))])]
+  "")
+
 ;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
-;; of the machinery.
-(define_insn_and_split "*fix_truncdi_i387"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
-       (fix:DI (match_operand 1 "register_operand" "f,f")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !reload_completed && !reload_in_progress
-   && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
+;; of the machinery. Please note the clobber of FLAGS_REG. In i387 control
+;; word calculation (inserted by LCM in mode switching pass) a FLAGS_REG
+;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
+;; function in i386.c.
+(define_insn_and_split "*fix_trunc<mode>_i387_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+       (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_80387 && !TARGET_FISTTP
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+        && (TARGET_64BIT || <MODE>mode != DImode))
+   && !(reload_completed || reload_in_progress)"
   "#"
   "&& 1"
   [(const_int 0)]
 {
-  ix86_optimize_mode_switching = 1;
-  operands[2] = assign_386_stack_local (HImode, 1);
-  operands[3] = assign_386_stack_local (HImode, 2);
+  ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
   if (memory_operand (operands[0], VOIDmode))
-    emit_insn (gen_fix_truncdi_memory (operands[0], operands[1],
-                                      operands[2], operands[3]));
+    emit_insn (gen_fix_trunc<mode>_i387 (operands[0], operands[1],
+                                        operands[2], operands[3]));
   else
     {
-      operands[4] = assign_386_stack_local (DImode, 0);
-      emit_insn (gen_fix_truncdi_nomemory (operands[0], operands[1],
-                                          operands[2], operands[3],
-                                          operands[4]));
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fix_trunc<mode>_i387_with_temp (operands[0], operands[1],
+                                                    operands[2], operands[3],
+                                                    operands[4]));
     }
   DONE;
 }
   [(set_attr "type" "fistp")
    (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fix_truncdi_i387"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+       (fix:DI (match_operand 1 "register_operand" "f")))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "TARGET_80387 && !TARGET_FISTTP
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
    (set_attr "mode" "DI")])
 
-(define_insn "fix_truncdi_nomemory"
+(define_insn "fix_truncdi_i387_with_temp"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
        (fix:DI (match_operand 1 "register_operand" "f,f")))
    (use (match_operand:HI 2 "memory_operand" "m,m"))
    (use (match_operand:HI 3 "memory_operand" "m,m"))
    (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
-   (clobber (match_scratch:DF 5 "=&1f,&1f"))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "TARGET_80387 && !TARGET_FISTTP
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
   "#"
   [(set_attr "type" "fistp")
    (set_attr "i387_cw" "trunc")
    (set_attr "mode" "DI")])
 
-(define_insn "fix_truncdi_memory"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-       (fix:DI (match_operand 1 "register_operand" "f")))
-   (use (match_operand:HI 2 "memory_operand" "m"))
-   (use (match_operand:HI 3 "memory_operand" "m"))
-   (clobber (match_scratch:DF 4 "=&1f"))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && (!SSE_FLOAT_MODE_P (GET_MODE (operands[1])) || !TARGET_64BIT)"
-  "* return output_fix_trunc (insn, operands);"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "DI")])
-
 (define_split 
   [(set (match_operand:DI 0 "register_operand" "")
        (fix:DI (match_operand 1 "register_operand" "")))
              (clobber (match_dup 5))])]
   "")
 
-;; When SSE available, it is always faster to use it!
-(define_insn "fix_truncsfdi_sse"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
-  "TARGET_64BIT && TARGET_SSE"
-  "cvttss2si{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double,vector")])
-
-;; Avoid vector decoded form of the instruction.
-(define_peephole2
-  [(match_scratch:SF 2 "x")
-   (set (match_operand:DI 0 "register_operand" "")
-       (fix:DI (match_operand:SF 1 "memory_operand" "")))]
-  "TARGET_K8 && !optimize_size"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:DI (match_dup 2)))]
-  "")
+(define_insn "fix_trunc<mode>_i387"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+       (fix:X87MODEI12 (match_operand 1 "register_operand" "f")))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_80387 && !TARGET_FISTTP
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "fix_truncdfdi_sse"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (fix:DI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))]
-  "TARGET_64BIT && TARGET_SSE2"
-  "cvttsd2si{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt,sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,vector")])
+(define_insn "fix_trunc<mode>_i387_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+       (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f")))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+  "TARGET_80387 && !TARGET_FISTTP
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "<MODE>")])
 
-;; Avoid vector decoded form of the instruction.
-(define_peephole2
-  [(match_scratch:DF 2 "Y")
-   (set (match_operand:DI 0 "register_operand" "")
-       (fix:DI (match_operand:DF 1 "memory_operand" "")))]
-  "TARGET_K8 && !optimize_size"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:DI (match_dup 2)))]
+(define_split 
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+       (fix:X87MODEI12 (match_operand 1 "register_operand" "")))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (fix:X87MODEI12 (match_dup 1)))
+             (use (match_dup 2))
+             (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))]
   "")
 
-;; Signed conversion to SImode.
-
-(define_expand "fix_truncxfsi2"
-  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
-                  (fix:SI (match_operand:XF 1 "register_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_80387"
-  "")
-
-(define_expand "fix_truncdfsi2"
-  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
-                  (fix:SI (match_operand:DF 1 "register_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_80387 || TARGET_SSE2"
-{
-  if (TARGET_SSE2)
-   {
-     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
-     emit_insn (gen_fix_truncdfsi_sse (out, operands[1]));
-     if (out != operands[0])
-       emit_move_insn (operands[0], out);
-     DONE;
-   }
-})
-
-(define_expand "fix_truncsfsi2"
-  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
-                  (fix:SI (match_operand:SF 1 "register_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])] 
-  "TARGET_80387 || TARGET_SSE"
-{
-  if (TARGET_SSE)
-   {
-     rtx out = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SImode);
-     emit_insn (gen_fix_truncsfsi_sse (out, operands[1]));
-     if (out != operands[0])
-       emit_move_insn (operands[0], out);
-     DONE;
-   }
-})
-
-;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
-;; of the machinery.
-(define_insn_and_split "*fix_truncsi_i387"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r")
-       (fix:SI (match_operand 1 "register_operand" "f,f")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !reload_completed && !reload_in_progress
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  ix86_optimize_mode_switching = 1;
-  operands[2] = assign_386_stack_local (HImode, 1);
-  operands[3] = assign_386_stack_local (HImode, 2);
-  if (memory_operand (operands[0], VOIDmode))
-    emit_insn (gen_fix_truncsi_memory (operands[0], operands[1],
-                                      operands[2], operands[3]));
-  else
-    {
-      operands[4] = assign_386_stack_local (SImode, 0);
-      emit_insn (gen_fix_truncsi_nomemory (operands[0], operands[1],
-                                          operands[2], operands[3],
-                                          operands[4]));
-    }
-  DONE;
-}
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "SI")])
-
-(define_insn "fix_truncsi_nomemory"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=m,?r")
-       (fix:SI (match_operand 1 "register_operand" "f,f")))
-   (use (match_operand:HI 2 "memory_operand" "m,m"))
-   (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:SI 4 "memory_operand" "=m,m"))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
-  "#"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "SI")])
-
-(define_insn "fix_truncsi_memory"
-  [(set (match_operand:SI 0 "memory_operand" "=m")
-       (fix:SI (match_operand 1 "register_operand" "f")))
-   (use (match_operand:HI 2 "memory_operand" "m"))
-   (use (match_operand:HI 3 "memory_operand" "m"))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
-  "* return output_fix_trunc (insn, operands);"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "SI")])
-
-;; When SSE available, it is always faster to use it!
-(define_insn "fix_truncsfsi_sse"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
-  "TARGET_SSE"
-  "cvttss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,vector")])
-
-;; Avoid vector decoded form of the instruction.
-(define_peephole2
-  [(match_scratch:SF 2 "x")
-   (set (match_operand:SI 0 "register_operand" "")
-       (fix:SI (match_operand:SF 1 "memory_operand" "")))]
-  "TARGET_K8 && !optimize_size"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SI (match_dup 2)))]
-  "")
-
-(define_insn "fix_truncdfsi_sse"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (fix:SI (match_operand:DF 1 "nonimmediate_operand" "Y,Ym")))]
-  "TARGET_SSE2"
-  "cvttsd2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,vector")])
-
-;; Avoid vector decoded form of the instruction.
-(define_peephole2
-  [(match_scratch:DF 2 "Y")
-   (set (match_operand:SI 0 "register_operand" "")
-       (fix:SI (match_operand:DF 1 "memory_operand" "")))]
-  "TARGET_K8 && !optimize_size"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SI (match_dup 2)))]
-  "")
-
-(define_split 
-  [(set (match_operand:SI 0 "register_operand" "")
-       (fix:SI (match_operand 1 "register_operand" "")))
-   (use (match_operand:HI 2 "memory_operand" ""))
-   (use (match_operand:HI 3 "memory_operand" ""))
-   (clobber (match_operand:SI 4 "memory_operand" ""))]
-  "reload_completed"
-  [(parallel [(set (match_dup 4) (fix:SI (match_dup 1)))
-             (use (match_dup 2))
-             (use (match_dup 3))])
-   (set (match_dup 0) (match_dup 4))]
-  "")
-
-(define_split 
-  [(set (match_operand:SI 0 "memory_operand" "")
-       (fix:SI (match_operand 1 "register_operand" "")))
-   (use (match_operand:HI 2 "memory_operand" ""))
-   (use (match_operand:HI 3 "memory_operand" ""))
-   (clobber (match_operand:SI 4 "memory_operand" ""))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (fix:SI (match_dup 1)))
-             (use (match_dup 2))
-             (use (match_dup 3))])]
-  "")
-
-;; Signed conversion to HImode.
-
-(define_expand "fix_truncxfhi2"
-  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
-                   (fix:HI (match_operand:XF 1 "register_operand" "")))
-              (clobber (reg:CC FLAGS_REG))])] 
-  "TARGET_80387"
-  "")
-
-(define_expand "fix_truncdfhi2"
-  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
-                  (fix:HI (match_operand:DF 1 "register_operand" "")))
-              (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_80387 && !TARGET_SSE2"
-  "")
-
-(define_expand "fix_truncsfhi2"
-  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
-                  (fix:HI (match_operand:SF 1 "register_operand" "")))
-               (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_80387 && !TARGET_SSE"
-  "")
-
-;; See the comments in i386.h near OPTIMIZE_MODE_SWITCHING for the description
-;; of the machinery.
-(define_insn_and_split "*fix_trunchi_i387"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r")
-       (fix:HI (match_operand 1 "register_operand" "f,f")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !reload_completed && !reload_in_progress
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
-  "#"
-  "&& 1"
-  [(const_int 0)]
-{
-  ix86_optimize_mode_switching = 1;
-  operands[2] = assign_386_stack_local (HImode, 1);
-  operands[3] = assign_386_stack_local (HImode, 2);
-  if (memory_operand (operands[0], VOIDmode))
-    emit_insn (gen_fix_trunchi_memory (operands[0], operands[1],
-                                      operands[2], operands[3]));
-  else
-    {
-      operands[4] = assign_386_stack_local (HImode, 0);
-      emit_insn (gen_fix_trunchi_nomemory (operands[0], operands[1],
-                                          operands[2], operands[3],
-                                          operands[4]));
-    }
-  DONE;
-}
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "HI")])
-
-(define_insn "fix_trunchi_nomemory"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=m,?r")
-       (fix:HI (match_operand 1 "register_operand" "f,f")))
-   (use (match_operand:HI 2 "memory_operand" "m,m"))
-   (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:HI 4 "memory_operand" "=m,m"))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
-  "#"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "HI")])
-
-(define_insn "fix_trunchi_memory"
-  [(set (match_operand:HI 0 "memory_operand" "=m")
-       (fix:HI (match_operand 1 "register_operand" "f")))
-   (use (match_operand:HI 2 "memory_operand" "m"))
-   (use (match_operand:HI 3 "memory_operand" "m"))]
-  "TARGET_80387 && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
-  "* return output_fix_trunc (insn, operands);"
-  [(set_attr "type" "fistp")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "HI")])
-
 (define_split 
-  [(set (match_operand:HI 0 "memory_operand" "")
-       (fix:HI (match_operand 1 "register_operand" "")))
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+       (fix:X87MODEI12 (match_operand 1 "register_operand" "")))
    (use (match_operand:HI 2 "memory_operand" ""))
    (use (match_operand:HI 3 "memory_operand" ""))
-   (clobber (match_operand:HI 4 "memory_operand" ""))]
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
   "reload_completed"
-  [(parallel [(set (match_dup 0) (fix:HI (match_dup 1)))
+  [(parallel [(set (match_dup 0) (fix:X87MODEI12 (match_dup 1)))
              (use (match_dup 2))
              (use (match_dup 3))])]
   "")
 
-(define_split 
-  [(set (match_operand:HI 0 "register_operand" "")
-       (fix:HI (match_operand 1 "register_operand" "")))
-   (use (match_operand:HI 2 "memory_operand" ""))
-   (use (match_operand:HI 3 "memory_operand" ""))
-   (clobber (match_operand:HI 4 "memory_operand" ""))]
-  "reload_completed"
-  [(parallel [(set (match_dup 4) (fix:HI (match_dup 1)))
-             (use (match_dup 2))
-             (use (match_dup 3))
-             (clobber (match_dup 4))])
-   (set (match_dup 0) (match_dup 4))]
-  "")
-
 (define_insn "x86_fnstcw_1"
   [(set (match_operand:HI 0 "memory_operand" "=m")
        (unspec:HI [(reg:HI FPSR_REG)] UNSPEC_FSTCW))]
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "SF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 (define_expand "floatsisf2"
   "")
 
 (define_insn "*floatsisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f")
+  [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
        (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))]
   "TARGET_MIX_SSE_I387"
   "@
    cvtsi2ss\t{%1, %0|%0, %1}"
   [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
    (set_attr "mode" "SF")
+   (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "SF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 (define_expand "floatdisf2"
   "")
 
 (define_insn "*floatdisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f#x,?f#x,x#f,x#f")
+  [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
        (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))]
   "TARGET_64BIT && TARGET_MIX_SSE_I387"
   "@
    cvtsi2ss{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
    (set_attr "mode" "SF")
+   (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "SF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 (define_expand "floathidf2"
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "DF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 (define_expand "floatsidf2"
   "")
 
 (define_insn "*floatsidf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f")
+  [(set (match_operand:DF 0 "register_operand" "=f,?f,Y,Y")
        (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387"
   "@
    cvtsi2sd\t{%1, %0|%0, %1}"
   [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
    (set_attr "mode" "DF")
+   (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,double,direct")
    (set_attr "fp_int_src" "true")])
 
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "DF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 (define_expand "floatdidf2"
   "")
 
 (define_insn "*floatdidf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f#Y,?f#Y,Y#f,Y#f")
+  [(set (match_operand:DF 0 "register_operand" "=f,?f,Y,Y")
        (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))]
   "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387"
   "@
    cvtsi2sd{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
    (set_attr "mode" "DF")
+   (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,double,direct")
    (set_attr "fp_int_src" "true")])
 
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "DF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "floathixf2"
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "XF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "floatsixf2"
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "XF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "floatdixf2"
    #"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "XF")
+   (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
 ;; %%% Kill these when reload knows how to do it.
 \f
 ;; SSE extract/set expanders
 
-(define_expand "vec_setv2df"
-  [(match_operand:V2DF 0 "register_operand" "")
-   (match_operand:DF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE2"
-{
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_insn (gen_sse2_loadlpd (operands[0], operands[0], operands[1]));
-      break;
-    case 1:
-      emit_insn (gen_sse2_loadhpd (operands[0], operands[0], operands[1]));
-      break;
-    default:
-      abort ();
-    }
-  DONE;
-})
-
-(define_expand "vec_extractv2df"
-  [(match_operand:DF 0 "register_operand" "")
-   (match_operand:V2DF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE2"
-{
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_insn (gen_sse2_storelpd (operands[0], operands[1]));
-      break;
-    case 1:
-      emit_insn (gen_sse2_storehpd (operands[0], operands[1]));
-      break;
-    default:
-      abort ();
-    }
-  DONE;
-})
-
-(define_expand "vec_initv2df"
-  [(match_operand:V2DF 0 "register_operand" "")
-   (match_operand 1 "" "")]
-  "TARGET_SSE2"
-{
-  ix86_expand_vector_init (operands[0], operands[1]);
-  DONE;
-})
-
-(define_expand "vec_setv4sf"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand:SF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE"
-{
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_insn (gen_sse_movss (operands[0], operands[0],
-                               simplify_gen_subreg (V4SFmode, operands[1],
-                                                    SFmode, 0)));
-      break;
-    case 1:
-      {
-       rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-       rtx tmp = gen_reg_rtx (V4SFmode);
-        emit_move_insn (tmp, operands[0]);
-       emit_insn (gen_sse_unpcklps (operands[0], operands[0], operands[0]));
-       emit_insn (gen_sse_movss (operands[0], operands[0], op1));
-        emit_insn (gen_sse_shufps (operands[0], operands[0], tmp,
-                                   GEN_INT (1 + (0<<2) + (2<<4) + (3<<6))));
-      }
-      break;
-    case 2:
-      {
-        rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-        rtx tmp = gen_reg_rtx (V4SFmode);
+\f
+;; Add instructions
 
-        emit_move_insn (tmp, operands[0]);
-        emit_insn (gen_sse_movss (tmp, tmp, op1));
-        emit_insn (gen_sse_shufps (operands[0], operands[0], tmp,
-                                   GEN_INT (0 + (1<<2) + (0<<4) + (3<<6))));
-      }
-      break;
-    case 3:
-      {
-        rtx op1 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-        rtx tmp = gen_reg_rtx (V4SFmode);
+;; %%% splits for addditi3
 
-        emit_move_insn (tmp, operands[0]);
-        emit_insn (gen_sse_movss (tmp, tmp, op1));
-        emit_insn (gen_sse_shufps (operands[0], operands[0], tmp,
-                                   GEN_INT (0 + (1<<2) + (2<<4) + (0<<6))));
-      }
-      break;
-    default:
-      abort ();
-    }
-  DONE;
-})
+(define_expand "addti3"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+       (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+                (match_operand:TI 2 "x86_64_general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;")
 
-(define_expand "vec_extractv4sf"
-  [(match_operand:SF 0 "register_operand" "")
-   (match_operand:V4SF 1 "register_operand" "")
-   (match_operand 2 "const_int_operand" "")]
-  "TARGET_SSE"
-{
-  switch (INTVAL (operands[2]))
-    {
-    case 0:
-      emit_move_insn (operands[0], gen_lowpart (SFmode, operands[1]));
-      break;
-    case 1:
-      {
-       rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-       rtx tmp = gen_reg_rtx (V4SFmode);
-        emit_move_insn (tmp, operands[1]);
-        emit_insn (gen_sse_shufps (op0, tmp, tmp,
-                                   const1_rtx));
-      }
-      break;
-    case 2:
-      {
-       rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-       rtx tmp = gen_reg_rtx (V4SFmode);
-        emit_move_insn (tmp, operands[1]);
-        emit_insn (gen_sse_unpckhps (op0, tmp, tmp));
-      }
-      break;
-    case 3:
-      {
-       rtx op0 = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
-       rtx tmp = gen_reg_rtx (V4SFmode);
-        emit_move_insn (tmp, operands[1]);
-        emit_insn (gen_sse_shufps (op0, tmp, tmp,
-                                   GEN_INT (3)));
-      }
-      break;
-    default:
-      abort ();
-    }
-  DONE;
-})
+(define_insn "*addti3_1"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+       (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0")
+                (match_operand:TI 2 "general_operand" "roiF,riF")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)"
+  "#")
 
-(define_expand "vec_initv4sf"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand 1 "" "")]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_init (operands[0], operands[1]);
-  DONE;
-})
-\f
-;; Add instructions
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+       (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+                (match_operand:TI 2 "general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
+                                         UNSPEC_ADD_CARRY))
+             (set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+                  (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+                                    (match_dup 4))
+                           (match_dup 5)))
+             (clobber (reg:CC FLAGS_REG))])]
+  "split_ti (operands+0, 1, operands+0, operands+3);
+   split_ti (operands+1, 1, operands+1, operands+4);
+   split_ti (operands+2, 1, operands+2, operands+5);")
 
 ;; %%% splits for addsidi3
 ;  [(set (match_operand:DI 0 "nonimmediate_operand" "")
       return "lea{q}\t{%a2, %0|%0, %a2}";
 
     case TYPE_INCDEC:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{q}\t%0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{q}\t%0";
       else
-       abort ();
-
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{q}\t%0";
+       }
+
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
 
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{q}\t%0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{q}\t%0";
       else
-       abort ();
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{q}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* ???? We ought to handle there the 32bit case too
         - do we need new constraint?  */
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{q}\t%0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{q}\t%0";
       else
-       abort ();
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{q}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* ???? We ought to handle there the 32bit case too
         - do we need new constraint?  */
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
     case TYPE_INCDEC:
       if (operands[2] == constm1_rtx)
         return "inc{q}\t%0";
-      else if (operands[2] == const1_rtx)
-        return "dec{q}\t%0";
       else
-       abort();
+        {
+         gcc_assert (operands[2] == const1_rtx);
+          return "dec{q}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if ((INTVAL (operands[2]) == -128
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{q}\t%0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{q}\t%0";
       else
-       abort();
+        {
+          gcc_assert (operands[2] == constm1_rtx);
+          return "dec{q}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (GET_CODE (operands[2]) == CONST_INT
       return "lea{l}\t{%a2, %0|%0, %a2}";
 
     case TYPE_INCDEC:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{l}\t%0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{l}\t%0";
       else
-       abort();
+       {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
 
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
         return "inc{l}\t%k0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{l}\t%k0";
       else
-       abort();
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{l}\t%0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{l}\t%0";
       else
-       abort();
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (GET_CODE (operands[2]) == CONST_INT
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
         return "inc{l}\t%k0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{l}\t%k0";
       else
-       abort();
+       {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{l}\t%0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{l}\t%0";
       else
-       abort();
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (GET_CODE (operands[2]) == CONST_INT
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
         return "inc{l}\t%k0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{l}\t%k0";
       else
-       abort();
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%k0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
     case TYPE_INCDEC:
       if (operands[2] == constm1_rtx)
         return "inc{l}\t%0";
-      else if (operands[2] == const1_rtx)
-        return "dec{l}\t%0";
       else
-       abort();
+        {
+         gcc_assert (operands[2] == const1_rtx);
+          return "dec{l}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if ((INTVAL (operands[2]) == -128
   switch (get_attr_type (insn))
     {
     case TYPE_INCDEC:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (operands[2] == const1_rtx)
         return "inc{l}\t%0";
-      else if (operands[2] == constm1_rtx)
-        return "dec{l}\t%0";
       else
-       abort();
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+          return "dec{l}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if (GET_CODE (operands[2]) == CONST_INT
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{w}\t%0";
-      else if (operands[2] == constm1_rtx)
-       return "dec{w}\t%0";
-      abort();
+      else
+       {
+         gcc_assert (operands[2] == constm1_rtx);
+         return "dec{w}\t%0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{w}\t%0";
-      else if (operands[2] == constm1_rtx)
-       return "dec{w}\t%0";
-      abort();
+      else
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+         return "dec{w}\t%0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{w}\t%0";
-      else if (operands[2] == constm1_rtx)
-       return "dec{w}\t%0";
-      abort();
+      else
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+         return "dec{w}\t%0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{w}\t%0";
-      else if (operands[2] == constm1_rtx)
-       return "dec{w}\t%0";
-      abort();
+      else
+        {
+         gcc_assert (operands[2] == constm1_rtx);
+         return "dec{w}\t%0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
        (const_string "alu")))
    (set_attr "mode" "HI")])
 
-; See comments above addsi_3_imm for details.
+; See comments above addsi_4 for details.
 (define_insn "*addhi_4"
   [(set (reg FLAGS_REG)
        (compare (match_operand:HI 1 "nonimmediate_operand" "0")
     case TYPE_INCDEC:
       if (operands[2] == constm1_rtx)
         return "inc{w}\t%0";
-      else if (operands[2] == const1_rtx)
-        return "dec{w}\t%0";
       else
-       abort();
+       {
+         gcc_assert (operands[2] == const1_rtx);
+          return "dec{w}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
         Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
       if ((INTVAL (operands[2]) == -128
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{w}\t%0";
-      else if (operands[2] == constm1_rtx)
-       return "dec{w}\t%0";
-      abort();
+      else
+       {
+         gcc_assert (operands[2] == constm1_rtx);
+         return "dec{w}\t%0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
-      else if (operands[2] == constm1_rtx)
-       return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
-      abort();
+      else
+       {
+         gcc_assert (operands[2] == constm1_rtx);
+         return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return widen ? "inc{l}\t%k0" : "inc{b}\t%0";
-      else if (operands[2] == constm1_rtx)
-       return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
-      abort();
+      else
+       {
+         gcc_assert (operands[2] == constm1_rtx);
+         return widen ? "dec{l}\t%k0" : "dec{b}\t%0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.
     case TYPE_INCDEC:
       if (operands[1] == const1_rtx)
        return "inc{b}\t%0";
-      else if (operands[1] == constm1_rtx)
-       return "dec{b}\t%0";
-      abort();
+      else
+       {
+         gcc_assert (operands[1] == constm1_rtx);
+         return "dec{b}\t%0";
+       }
 
     default:
       /* Make things pretty and `subl $4,%eax' rather than `addl $-4, %eax'.  */
      (if_then_else (match_operand:QI 1 "incdec_operand" "")
        (const_string "incdec")
        (const_string "alu1")))
+   (set (attr "memory")
+     (if_then_else (match_operand 1 "memory_operand" "")
+        (const_string "load")
+        (const_string "none")))
    (set_attr "mode" "QI")])
 
 (define_insn "*addqi_2"
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{b}\t%0";
-      else if (operands[2] == constm1_rtx
-              || (GET_CODE (operands[2]) == CONST_INT
-                  && INTVAL (operands[2]) == 255))
-       return "dec{b}\t%0";
-      abort();
+      else
+        {
+         gcc_assert (operands[2] == constm1_rtx
+                     || (GET_CODE (operands[2]) == CONST_INT
+                         && INTVAL (operands[2]) == 255));
+         return "dec{b}\t%0";
+       }
 
     default:
       /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{b}\t%0";
-      else if (operands[2] == constm1_rtx
-              || (GET_CODE (operands[2]) == CONST_INT
-                  && INTVAL (operands[2]) == 255))
-       return "dec{b}\t%0";
-      abort();
+      else
+        {
+         gcc_assert (operands[2] == constm1_rtx
+                     || (GET_CODE (operands[2]) == CONST_INT
+                         && INTVAL (operands[2]) == 255));
+         return "dec{b}\t%0";
+       }
 
     default:
       /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
        (const_string "alu")))
    (set_attr "mode" "QI")])
 
-; See comments above addsi_3_imm for details.
+; See comments above addsi_4 for details.
 (define_insn "*addqi_4"
   [(set (reg FLAGS_REG)
        (compare (match_operand:QI 1 "nonimmediate_operand" "0")
          || (GET_CODE (operands[2]) == CONST_INT
              && INTVAL (operands[2]) == 255))
         return "inc{b}\t%0";
-      else if (operands[2] == const1_rtx)
-        return "dec{b}\t%0";
       else
-       abort();
+       {
+         gcc_assert (operands[2] == const1_rtx);
+          return "dec{b}\t%0";
+       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (INTVAL (operands[2]) < 0)
         {
           operands[2] = GEN_INT (-INTVAL (operands[2]));
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{b}\t%0";
-      else if (operands[2] == constm1_rtx
-              || (GET_CODE (operands[2]) == CONST_INT
-                  && INTVAL (operands[2]) == 255))
-       return "dec{b}\t%0";
-      abort();
+      else
+        {
+         gcc_assert (operands[2] == constm1_rtx
+                     || (GET_CODE (operands[2]) == CONST_INT
+                         && INTVAL (operands[2]) == 255));
+         return "dec{b}\t%0";
+       }
 
     default:
       /* Make things pretty and `subb $4,%al' rather than `addb $-4, %al'.  */
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{b}\t%h0";
-      else if (operands[2] == constm1_rtx
-              || (GET_CODE (operands[2]) == CONST_INT
-                  && INTVAL (operands[2]) == 255))
-       return "dec{b}\t%h0";
-      abort();
+      else
+        {
+         gcc_assert (operands[2] == constm1_rtx
+                     || (GET_CODE (operands[2]) == CONST_INT
+                         && INTVAL (operands[2]) == 255));
+          return "dec{b}\t%h0";
+       }
 
     default:
       return "add{b}\t{%2, %h0|%h0, %2}";
     case TYPE_INCDEC:
       if (operands[2] == const1_rtx)
        return "inc{b}\t%h0";
-      else if (operands[2] == constm1_rtx
-              || (GET_CODE (operands[2]) == CONST_INT
-                  && INTVAL (operands[2]) == 255))
-       return "dec{b}\t%h0";
-      abort();
+      else
+        {
+         gcc_assert (operands[2] == constm1_rtx
+                     || (GET_CODE (operands[2]) == CONST_INT
+                         && INTVAL (operands[2]) == 255));
+          return "dec{b}\t%h0";
+        }
 
     default:
       return "add{b}\t{%2, %h0|%h0, %2}";
 \f
 ;; Subtract instructions
 
+;; %%% splits for subditi3
+
+(define_expand "subti3"
+  [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "")
+                  (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+                            (match_operand:TI 2 "x86_64_general_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;")
+
+(define_insn "*subti3_1"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
+       (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0")
+                 (match_operand:TI 2 "general_operand" "roiF,riF")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)"
+  "#")
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+       (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+                 (match_operand:TI 2 "general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
+             (set (match_dup 0) (minus:DI (match_dup 1) (match_dup 2)))])
+   (parallel [(set (match_dup 3)
+                  (minus:DI (match_dup 4)
+                            (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+                                     (match_dup 5))))
+             (clobber (reg:CC FLAGS_REG))])]
+  "split_ti (operands+0, 1, operands+0, operands+3);
+   split_ti (operands+1, 1, operands+1, operands+4);
+   split_ti (operands+2, 1, operands+2, operands+5);")
+
 ;; %%% splits for subsidi3
 
 (define_expand "subdi3"
                    (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
    && ix86_binary_operator_ok (MINUS, SImode, operands)"
-  "sub{q}\t{%2, %0|%0, %2}"
+  "sub{l}\t{%2, %1|%1, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "DI")])
 
     }
   else
     {
-      if (true_regnum (operands[1]))
-       abort();
+      gcc_assert (!true_regnum (operands[1]));
       operands[4] = operands[1];
     }
 })
     }
   else
     {
-      if (true_regnum (operands[1]))
-       abort();
+      gcc_assert (!true_regnum (operands[1]));
       operands[4] = operands[1];
     }
 })
                   (match_operand:SI 2 "const_int_operand" ""))
                 (const_int 0)))]
   "ix86_match_ccmode (insn, CCNOmode)
+   && INTVAL (operands[1]) > 0
+   && INTVAL (operands[2]) >= 0
+   && INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
    && (GET_MODE (operands[0]) == SImode
        || (TARGET_64BIT && GET_MODE (operands[0]) == DImode)
        || GET_MODE (operands[0]) == HImode
                 (const_int 0)))]
   "TARGET_64BIT
    && ix86_match_ccmode (insn, CCNOmode)
-   /* The code below cannot deal with constants outside HOST_WIDE_INT.  */
-   && INTVAL (operands[1]) + INTVAL (operands[2]) < HOST_BITS_PER_WIDE_INT
+   && INTVAL (operands[1]) > 0
+   && INTVAL (operands[2]) >= 0
    /* Ensure that resulting mask is zero or sign extended operand.  */
    && (INTVAL (operands[1]) + INTVAL (operands[2]) <= 32
        || (INTVAL (operands[1]) + INTVAL (operands[2]) == 64
       val = gen_lowpart (QImode, val);
     }
 
-  mask  = ((HOST_WIDE_INT)1 << (pos + len)) - 1;
-  mask &= ~(((HOST_WIDE_INT)1 << pos) - 1);
+  if (len == HOST_BITS_PER_WIDE_INT)
+    mask = -1;
+  else
+    mask = ((HOST_WIDE_INT)1 << len) - 1;
+  mask <<= pos;
 
   operands[2] = gen_rtx_AND (mode, val, gen_int_mode (mask, mode));
 })
       {
        enum machine_mode mode;
 
-       if (GET_CODE (operands[2]) != CONST_INT)
-         abort ();
+       gcc_assert (GET_CODE (operands[2]) == CONST_INT);
         if (INTVAL (operands[2]) == 0xff)
          mode = QImode;
-       else if (INTVAL (operands[2]) == 0xffff)
-         mode = HImode;
        else
-         abort ();
+         {
+           gcc_assert (INTVAL (operands[2]) == 0xffff);
+           mode = HImode;
+         }
        
        operands[1] = gen_lowpart (mode, operands[1]);
        if (mode == QImode)
       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       if (get_attr_mode (insn) == MODE_SI)
        return "and{l}\t{%k2, %k0|%k0, %k2}";
       else
       {
        enum machine_mode mode;
 
-       if (GET_CODE (operands[2]) != CONST_INT)
-         abort ();
+       gcc_assert (GET_CODE (operands[2]) == CONST_INT);
         if (INTVAL (operands[2]) == 0xff)
          mode = QImode;
-       else if (INTVAL (operands[2]) == 0xffff)
-         mode = HImode;
        else
-         abort ();
+         {
+           gcc_assert (INTVAL (operands[2]) == 0xffff);
+           mode = HImode;
+         }
        
        operands[1] = gen_lowpart (mode, operands[1]);
        if (mode == QImode)
       }
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       return "and{l}\t{%2, %0|%0, %2}";
     }
 }
   switch (get_attr_type (insn))
     {
     case TYPE_IMOVX:
-      if (GET_CODE (operands[2]) != CONST_INT)
-       abort ();
-      if (INTVAL (operands[2]) == 0xff)
-       return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
-      abort ();
+      gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+      gcc_assert (INTVAL (operands[2]) == 0xff);
+      return "movz{bl|x}\t{%b1, %k0|%k0, %b1}";
 
     default:
-      if (! rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
 
       return "and{w}\t{%2, %0|%0, %2}";
     }
 \f
 ;; Negation instructions
 
+(define_expand "negti2"
+  [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "")
+                  (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+  "ix86_expand_unary_operator (NEG, TImode, operands); DONE;")
+
+(define_insn "*negti2_1"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=ro")
+       (neg:TI (match_operand:TI 1 "general_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && ix86_unary_operator_ok (NEG, TImode, operands)"
+  "#")
+
+(define_split
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+       (neg:TI (match_operand:TI 1 "general_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(parallel
+    [(set (reg:CCZ FLAGS_REG)
+         (compare:CCZ (neg:DI (match_dup 2)) (const_int 0)))
+     (set (match_dup 0) (neg:DI (match_dup 2)))])
+   (parallel
+    [(set (match_dup 1)
+         (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
+                           (match_dup 3))
+                  (const_int 0)))
+     (clobber (reg:CC FLAGS_REG))])
+   (parallel
+    [(set (match_dup 1)
+         (neg:DI (match_dup 1)))
+     (clobber (reg:CC FLAGS_REG))])]
+  "split_ti (operands+1, 1, operands+2, operands+3);
+   split_ti (operands+0, 1, operands+0, operands+1);")
+
 (define_expand "negdi2"
   [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
                   (neg:DI (match_operand:DI 1 "nonimmediate_operand" "")))
   "ix86_expand_fp_absneg_operator (ABS, SFmode, operands); DONE;")
 
 (define_insn "*absnegsf2_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand"    "=x#fr,x#fr,f#xr,rm#xf")
+  [(set (match_operand:SF 0 "nonimmediate_operand"    "=x  ,x,f,rm")
        (match_operator:SF 3 "absneg_operator"
-         [(match_operand:SF 1 "nonimmediate_operand" "0    ,x#fr,0   ,0")]))
-   (use (match_operand:V4SF 2 "nonimmediate_operand"  "xm   ,0   ,X   ,X"))
+         [(match_operand:SF 1 "nonimmediate_operand" "0   ,x,0,0 ")]))
+   (use (match_operand:V4SF 2 "nonimmediate_operand"  "xm  ,0,X,X "))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_SSE_MATH && TARGET_MIX_SSE_I387
    && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
   "#")
 
 (define_insn "*absnegsf2_sse"
-  [(set (match_operand:SF 0 "nonimmediate_operand"    "=x#r,x#r,rm#x")
+  [(set (match_operand:SF 0 "nonimmediate_operand"    "=x,x,rm")
        (match_operator:SF 3 "absneg_operator"
-         [(match_operand:SF 1 "nonimmediate_operand" "0   ,x#r,0")]))
-   (use (match_operand:V4SF 2 "nonimmediate_operand"  "xm  ,0  ,X"))
+         [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0")]))
+   (use (match_operand:V4SF 2 "nonimmediate_operand"  "xm,0,X"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_SSE_MATH
    && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
   "#")
 
 (define_insn "*absnegsf2_i387"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=f#r,rm#f")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,rm")
        (match_operator:SF 3 "absneg_operator"
          [(match_operand:SF 1 "nonimmediate_operand" "0,0")]))
    (use (match_operand 2 "" ""))
    && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
   "#")
 
+(define_expand "copysignsf3"
+  [(match_operand:SF 0 "register_operand" "")
+   (match_operand:SF 1 "nonmemory_operand" "")
+   (match_operand:SF 2 "register_operand" "")]
+  "TARGET_SSE_MATH"
+{
+  ix86_expand_copysign (operands);
+  DONE;
+})
+
+(define_insn_and_split "copysignsf3_const"
+  [(set (match_operand:SF 0 "register_operand"          "=x")
+       (unspec:SF
+         [(match_operand:V4SF 1 "vector_move_operand"  "xmC")
+          (match_operand:SF 2 "register_operand"       "0")
+          (match_operand:V4SF 3 "nonimmediate_operand" "xm")]
+         UNSPEC_COPYSIGN))]
+  "TARGET_SSE_MATH"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_copysign_const (operands);
+  DONE;
+})
+
+(define_insn "copysignsf3_var"
+  [(set (match_operand:SF 0 "register_operand"          "=x, x, x, x,x")
+       (unspec:SF
+         [(match_operand:SF 2 "register_operand"       " x, 0, 0, x,x")
+          (match_operand:SF 3 "register_operand"       " 1, 1, x, 1,x")
+          (match_operand:V4SF 4 "nonimmediate_operand" " X,xm,xm, 0,0")
+          (match_operand:V4SF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")]
+         UNSPEC_COPYSIGN))
+   (clobber (match_scratch:V4SF 1                      "=x, x, x, x,x"))]
+  "TARGET_SSE_MATH"
+  "#")
+
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+       (unspec:SF
+         [(match_operand:SF 2 "register_operand" "")
+          (match_operand:SF 3 "register_operand" "")
+          (match_operand:V4SF 4 "" "")
+          (match_operand:V4SF 5 "" "")]
+         UNSPEC_COPYSIGN))
+   (clobber (match_scratch:V4SF 1 ""))]
+  "TARGET_SSE_MATH && reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_copysign_var (operands);
+  DONE;
+})
+
 (define_expand "negdf2"
   [(set (match_operand:DF 0 "nonimmediate_operand" "")
        (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
   "ix86_expand_fp_absneg_operator (ABS, DFmode, operands); DONE;")
 
 (define_insn "*absnegdf2_mixed"
-  [(set (match_operand:DF 0 "nonimmediate_operand"    "=Y#fr,Y#fr,f#Yr,rm#Yf")
+  [(set (match_operand:DF 0 "nonimmediate_operand"    "=Y,Y,f,rm")
        (match_operator:DF 3 "absneg_operator"
-         [(match_operand:DF 1 "nonimmediate_operand" "0    ,Y#fr,0   ,0")]))
-   (use (match_operand:V2DF 2 "nonimmediate_operand"  "Ym   ,0   ,X   ,X"))
+         [(match_operand:DF 1 "nonimmediate_operand" "0 ,Y,0,0")]))
+   (use (match_operand:V2DF 2 "nonimmediate_operand"  "Ym,0,X,X"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
    && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
   "#")
 
 (define_insn "*absnegdf2_sse"
-  [(set (match_operand:DF 0 "nonimmediate_operand"    "=Y#r,Y#r,rm#Y")
+  [(set (match_operand:DF 0 "nonimmediate_operand"    "=Y,Y,rm")
        (match_operator:DF 3 "absneg_operator"
-         [(match_operand:DF 1 "nonimmediate_operand" "0   ,Y#r,0")]))
-   (use (match_operand:V2DF 2 "nonimmediate_operand"  "Ym  ,0  ,X"))
+         [(match_operand:DF 1 "nonimmediate_operand" "0 ,Y,0 ")]))
+   (use (match_operand:V2DF 2 "nonimmediate_operand"  "Ym,0,X "))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_SSE2 && TARGET_SSE_MATH
    && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
   "#")
 
 (define_insn "*absnegdf2_i387"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,rm#f")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,rm")
        (match_operator:DF 3 "absneg_operator"
          [(match_operand:DF 1 "nonimmediate_operand" "0,0")]))
    (use (match_operand 2 "" ""))
    && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
   "#")
 
+(define_expand "copysigndf3"
+  [(match_operand:DF 0 "register_operand" "")
+   (match_operand:DF 1 "nonmemory_operand" "")
+   (match_operand:DF 2 "register_operand" "")]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+{
+  ix86_expand_copysign (operands);
+  DONE;
+})
+
+(define_insn_and_split "copysigndf3_const"
+  [(set (match_operand:DF 0 "register_operand"          "=x")
+       (unspec:DF
+         [(match_operand:V2DF 1 "vector_move_operand"  "xmC")
+          (match_operand:DF 2 "register_operand"       "0")
+          (match_operand:V2DF 3 "nonimmediate_operand" "xm")]
+         UNSPEC_COPYSIGN))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_copysign_const (operands);
+  DONE;
+})
+
+(define_insn "copysigndf3_var"
+  [(set (match_operand:DF 0 "register_operand"          "=x, x, x, x,x")
+       (unspec:DF
+         [(match_operand:DF 2 "register_operand"       " x, 0, 0, x,x")
+          (match_operand:DF 3 "register_operand"       " 1, 1, x, 1,x")
+          (match_operand:V2DF 4 "nonimmediate_operand" " X,xm,xm, 0,0")
+          (match_operand:V2DF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")]
+         UNSPEC_COPYSIGN))
+   (clobber (match_scratch:V2DF 1                      "=x, x, x, x,x"))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "#")
+
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+       (unspec:DF
+         [(match_operand:DF 2 "register_operand" "")
+          (match_operand:DF 3 "register_operand" "")
+          (match_operand:V2DF 4 "" "")
+          (match_operand:V2DF 5 "" "")]
+         UNSPEC_COPYSIGN))
+   (clobber (match_scratch:V2DF 1 ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_copysign_var (operands);
+  DONE;
+})
+
 (define_expand "negxf2"
   [(set (match_operand:XF 0 "nonimmediate_operand" "")
        (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
 
 (define_expand "absxf2"
   [(set (match_operand:XF 0 "nonimmediate_operand" "")
-       (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
+       (abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
   "TARGET_80387"
   "ix86_expand_fp_absneg_operator (ABS, XFmode, operands); DONE;")
 
 (define_insn "*absnegxf2_i387"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f#r,rm#f")
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,?rm")
        (match_operator:XF 3 "absneg_operator"
          [(match_operand:XF 1 "nonimmediate_operand" "0,0")]))
    (use (match_operand 2 "" ""))
 (define_insn "*negsf2_1"
   [(set (match_operand:SF 0 "register_operand" "=f")
        (neg:SF (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_80387 && reload_completed"
+  "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)"
   "fchs"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "SF")])
 (define_insn "*negdf2_1"
   [(set (match_operand:DF 0 "register_operand" "=f")
        (neg:DF (match_operand:DF 1 "register_operand" "0")))]
-  "TARGET_80387 && reload_completed"
+  "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))"
   "fchs"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "DF")])
 (define_insn "*negxf2_1"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (neg:XF (match_operand:XF 1 "register_operand" "0")))]
-  "TARGET_80387 && reload_completed"
+  "TARGET_80387"
   "fchs"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "XF")])
 (define_insn "*abssf2_1"
   [(set (match_operand:SF 0 "register_operand" "=f")
        (abs:SF (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_80387 && reload_completed"
+  "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)"
   "fabs"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "SF")])
 (define_insn "*absdf2_1"
   [(set (match_operand:DF 0 "register_operand" "=f")
        (abs:DF (match_operand:DF 1 "register_operand" "0")))]
-  "TARGET_80387 && reload_completed"
+  "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))"
   "fabs"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "DF")])
 (define_insn "*absxf2_1"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (abs:XF (match_operand:XF 1 "register_operand" "0")))]
-  "TARGET_80387 && reload_completed"
+  "TARGET_80387"
   "fabs"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "DF")])
 ;; shift pair, instead using moves and sign extension for counts greater
 ;; than 31.
 
+(define_expand "ashlti3"
+  [(parallel [(set (match_operand:TI 0 "register_operand" "")
+                  (ashift:TI (match_operand:TI 1 "register_operand" "")
+                             (match_operand:QI 2 "nonmemory_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+{
+  if (! immediate_operand (operands[2], QImode))
+    {
+      emit_insn (gen_ashlti3_1 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  ix86_expand_binary_operator (ASHIFT, TImode, operands);
+  DONE;
+})
+
+(define_insn "ashlti3_1"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+       (ashift:TI (match_operand:TI 1 "register_operand" "0")
+                  (match_operand:QI 2 "register_operand" "c")))
+   (clobber (match_scratch:DI 3 "=&r"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_insn "*ashlti3_2"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+       (ashift:TI (match_operand:TI 1 "register_operand" "0")
+                  (match_operand:QI 2 "immediate_operand" "O")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+       (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
+                  (match_operand:QI 2 "register_operand" "")))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, operands[3], TImode); DONE;")
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+       (ashift:TI (match_operand:TI 1 "register_operand" "")
+                  (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(const_int 0)]
+  "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;")
+
+(define_insn "x86_64_shld"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+                 (match_operand:QI 2 "nonmemory_operand" "J,c"))
+               (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,r")
+                 (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "@
+   shld{q}\t{%2, %1, %0|%0, %1, %2}
+   shld{q}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")])
+
+(define_expand "x86_64_shift_adj"
+  [(set (reg:CCZ FLAGS_REG)
+       (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
+                            (const_int 64))
+                    (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "")
+        (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+                        (match_operand:DI 1 "register_operand" "")
+                        (match_dup 0)))
+   (set (match_dup 1)
+       (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0))
+                        (match_operand:DI 3 "register_operand" "r")
+                        (match_dup 1)))]
+  "TARGET_64BIT"
+  "")
+
 (define_expand "ashldi3"
   [(set (match_operand:DI 0 "shiftdi_operand" "")
        (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
-      if (!rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       return "add{q}\t{%0, %0|%0, %0}";
 
     case TYPE_LEA:
-      if (GET_CODE (operands[2]) != CONST_INT
-         || (unsigned HOST_WIDE_INT) INTVAL (operands[2]) > 3)
-       abort ();
+      gcc_assert (GET_CODE (operands[2]) == CONST_INT);
+      gcc_assert ((unsigned HOST_WIDE_INT) INTVAL (operands[2]) <= 3);
       operands[1] = gen_rtx_MULT (DImode, operands[1],
                                  GEN_INT (1 << INTVAL (operands[2])));
       return "lea{q}\t{%a1, %0|%0, %a1}";
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{q}\t{%0, %0|%0, %0}";
+
+    default:
+      if (REG_P (operands[2]))
+       return "sal{q}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+              && (TARGET_SHIFT1 || optimize_size))
+       return "sal{q}\t%0";
+      else
+       return "sal{q}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+                         (const_int 0))
+                     (match_operand 0 "register_operand" ""))
+                (match_operand 2 "const1_operand" ""))
+             (const_string "alu")
+          ]
+          (const_string "ishift")))
+   (set_attr "mode" "DI")])
+
+(define_insn "*ashldi3_cconly_rex64"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+                    (match_operand:QI 2 "immediate_operand" "e"))
+         (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
       return "add{q}\t{%0, %0|%0, %0}";
 
     default:
    (match_dup 3)]
   "!TARGET_64BIT && TARGET_CMOVE"
   [(const_int 0)]
-  "ix86_split_ashldi (operands, operands[3]); DONE;")
+  "ix86_split_ashl (operands, operands[3], DImode); DONE;")
 
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
        (ashift:DI (match_operand:DI 1 "nonmemory_operand" "")
                   (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
+  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+                    ? flow2_completed : reload_completed)"
   [(const_int 0)]
-  "ix86_split_ashldi (operands, NULL_RTX); DONE;")
+  "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;")
 
 (define_insn "x86_shld_1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m")
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
-      if (!rtx_equal_p (operands[0], operands[1]))
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
       return "add{l}\t{%0, %0|%0, %0}";
 
     case TYPE_LEA:
   [(const_int 0)]
 {
   rtx pat, clob;
-  emit_move_insn (operands[1], operands[0]);
+  emit_move_insn (operands[0], operands[1]);
   pat = gen_rtx_SET (VOIDmode, operands[0],
                     gen_rtx_ASHIFT (GET_MODE (operands[0]),
                                     operands[0], operands[2]));
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
       return "add{l}\t{%k0, %k0|%k0, %k0}";
 
     case TYPE_LEA:
   [(set (reg FLAGS_REG)
        (compare
          (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashift:SI (match_dup 1) (match_dup 2)))]
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
       return "add{l}\t{%0, %0|%0, %0}";
 
     default:
           (const_string "ishift")))
    (set_attr "mode" "SI")])
 
-(define_insn "*ashlsi3_cmp_zext"
+(define_insn "*ashlsi3_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{l}\t{%0, %0|%0, %0}";
+
+    default:
+      if (REG_P (operands[2]))
+       return "sal{l}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+              && (TARGET_SHIFT1 || optimize_size))
+       return "sal{l}\t%0";
+      else
+       return "sal{l}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+                         (const_int 0))
+                     (match_operand 0 "register_operand" ""))
+                (match_operand 2 "const1_operand" ""))
+             (const_string "alu")
+          ]
+          (const_string "ishift")))
+   (set_attr "mode" "SI")])
+
+(define_insn "*ashlsi3_cmp_zext"
   [(set (reg FLAGS_REG)
        (compare
          (ashift:SI (match_operand:SI 1 "register_operand" "0")
-                    (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
       return "add{l}\t{%k0, %k0|%k0, %k0}";
 
     default:
     case TYPE_LEA:
       return "#";
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
       return "add{w}\t{%0, %0|%0, %0}";
 
     default:
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
       return "add{w}\t{%0, %0|%0, %0}";
 
     default:
   [(set (reg FLAGS_REG)
        (compare
          (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashift:HI (match_dup 1) (match_dup 2)))]
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t{%0, %0|%0, %0}";
+
+    default:
+      if (REG_P (operands[2]))
+       return "sal{w}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+              && (TARGET_SHIFT1 || optimize_size))
+       return "sal{w}\t%0";
+      else
+       return "sal{w}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+                         (const_int 0))
+                     (match_operand 0 "register_operand" ""))
+                (match_operand 2 "const1_operand" ""))
+             (const_string "alu")
+          ]
+          (const_string "ishift")))
+   (set_attr "mode" "HI")])
+
+(define_insn "*ashlhi3_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
       return "add{w}\t{%0, %0|%0, %0}";
 
     default:
     case TYPE_LEA:
       return "#";
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
       if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
         return "add{l}\t{%k0, %k0|%k0, %k0}";
       else
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
       if (REG_P (operands[1]) && !ANY_QI_REG_P (operands[1]))
         return "add{l}\t{%k0, %k0|%k0, %k0}";
       else
   [(set (reg FLAGS_REG)
        (compare
          (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashift:QI (match_dup 1) (match_dup 2)))]
   switch (get_attr_type (insn))
     {
     case TYPE_ALU:
-      if (operands[2] != const1_rtx)
-       abort ();
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{b}\t{%0, %0|%0, %0}";
+
+    default:
+      if (REG_P (operands[2]))
+       return "sal{b}\t{%b2, %0|%0, %b2}";
+      else if (operands[2] == const1_rtx
+              && (TARGET_SHIFT1 || optimize_size))
+       return "sal{b}\t%0";
+      else
+       return "sal{b}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (cond [(and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
+                         (const_int 0))
+                     (match_operand 0 "register_operand" ""))
+                (match_operand 2 "const1_operand" ""))
+             (const_string "alu")
+          ]
+          (const_string "ishift")))
+   (set_attr "mode" "QI")])
+
+(define_insn "*ashlqi3_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
       return "add{b}\t{%0, %0|%0, %0}";
 
     default:
 
 ;; See comment above `ashldi3' about how this works.
 
+(define_expand "ashrti3"
+  [(parallel [(set (match_operand:TI 0 "register_operand" "")
+                  (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+                               (match_operand:QI 2 "nonmemory_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+{
+  if (! immediate_operand (operands[2], QImode))
+    {
+      emit_insn (gen_ashrti3_1 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  ix86_expand_binary_operator (ASHIFTRT, TImode, operands);
+  DONE;
+})
+
+(define_insn "ashrti3_1"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+       (ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
+                    (match_operand:QI 2 "register_operand" "c")))
+   (clobber (match_scratch:DI 3 "=&r"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_insn "*ashrti3_2"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+       (ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
+                    (match_operand:QI 2 "immediate_operand" "O")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+       (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+                    (match_operand:QI 2 "register_operand" "")))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(const_int 0)]
+  "ix86_split_ashr (operands, operands[3], TImode); DONE;")
+
+(define_split
+  [(set (match_operand:TI 0 "register_operand" "")
+       (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+                    (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(const_int 0)]
+  "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;")
+
+(define_insn "x86_64_shrd"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m")
+        (ior:DI (ashiftrt:DI (match_dup 0)
+                 (match_operand:QI 2 "nonmemory_operand" "J,c"))
+               (ashift:DI (match_operand:DI 1 "register_operand" "r,r")
+                 (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "@
+   shrd{q}\t{%2, %1, %0|%0, %1, %2}
+   shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")])
+
 (define_expand "ashrdi3"
   [(set (match_operand:DI 0 "shiftdi_operand" "")
        (ashiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
        (const_string "2")
        (const_string "*")))])
 
+(define_insn "*ashrdi3_one_bit_cconly_rex64"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const1_operand" ""))
+         (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "sar{q}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
 ;; This pattern can't accept a variable shift count, since shifts by
 ;; zero don't affect the flags.  We assume that shifts by constant
 ;; zero are optimized away.
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
+(define_insn "*ashrdi3_cconly_rex64"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const_int_operand" "n"))
+         (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
+  "sar{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
 (define_insn "*ashrdi3_1"
   [(set (match_operand:DI 0 "register_operand" "=r")
        (ashiftrt:DI (match_operand:DI 1 "register_operand" "0")
    (match_dup 3)]
   "!TARGET_64BIT && TARGET_CMOVE"
   [(const_int 0)]
-  "ix86_split_ashrdi (operands, operands[3]); DONE;")
+  "ix86_split_ashr (operands, operands[3], DImode); DONE;")
 
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
        (ashiftrt:DI (match_operand:DI 1 "register_operand" "")
                     (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
+  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+                    ? flow2_completed : reload_completed)"
   [(const_int 0)]
-  "ix86_split_ashrdi (operands, NULL_RTX); DONE;")
+  "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;")
 
 (define_insn "x86_shrd_1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m")
        (const_string "2")
        (const_string "*")))])
 
+(define_insn "*ashrsi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const1_operand" ""))
+         (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
 (define_insn "*ashrsi3_one_bit_cmp_zext"
   [(set (reg FLAGS_REG)
        (compare
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:SI (match_dup 1) (match_dup 2)))]
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
 
+(define_insn "*ashrsi3_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
+  "sar{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
 (define_insn "*ashrsi3_cmp_zext"
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
        (const_string "2")
        (const_string "*")))])
 
+(define_insn "*ashrhi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const1_operand" ""))
+         (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+  "sar{w}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
 ;; This pattern can't accept a variable shift count, since shifts by
 ;; zero don't affect the flags.  We assume that shifts by constant
 ;; zero are optimized away.
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:HI (match_dup 1) (match_dup 2)))]
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
 
+(define_insn "*ashrhi3_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
+  "sar{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "HI")])
+
 (define_expand "ashrqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
        (const_string "2")
        (const_string "*")))])
 
+(define_insn "*ashrqi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const1_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "sar{b}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
 ;; This pattern can't accept a variable shift count, since shifts by
 ;; zero don't affect the flags.  We assume that shifts by constant
 ;; zero are optimized away.
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashiftrt:QI (match_dup 1) (match_dup 2)))]
   "sar{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
+
+(define_insn "*ashrqi3_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
+  "sar{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "QI")])
+
 \f
 ;; Logical shift instructions
 
 ;; See comment above `ashldi3' about how this works.
 
-(define_expand "lshrdi3"
-  [(set (match_operand:DI 0 "shiftdi_operand" "")
-       (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
+(define_expand "lshrti3"
+  [(parallel [(set (match_operand:TI 0 "register_operand" "")
+                  (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+                               (match_operand:QI 2 "nonmemory_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT"
+{
+  if (! immediate_operand (operands[2], QImode))
+    {
+      emit_insn (gen_lshrti3_1 (operands[0], operands[1], operands[2]));
+      DONE;
+    }
+  ix86_expand_binary_operator (LSHIFTRT, TImode, operands);
+  DONE;
+})
 
-(define_insn "*lshrdi3_1_one_bit_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-       (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const1_operand" "")))
+(define_insn "lshrti3_1"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+       (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+                    (match_operand:QI 2 "register_operand" "c")))
+   (clobber (match_scratch:DI 3 "=&r"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_insn "*lshrti3_2"
+  [(set (match_operand:TI 0 "register_operand" "=r")
+       (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+                    (match_operand:QI 2 "immediate_operand" "O")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_split 
+  [(set (match_operand:TI 0 "register_operand" "")
+       (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+                    (match_operand:QI 2 "register_operand" "")))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(const_int 0)]
+  "ix86_split_lshr (operands, operands[3], TImode); DONE;")
+
+(define_split 
+  [(set (match_operand:TI 0 "register_operand" "")
+       (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+                    (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && reload_completed"
+  [(const_int 0)]
+  "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;")
+
+(define_expand "lshrdi3"
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+       (lshiftrt:DI (match_operand:DI 1 "shiftdi_operand" "")
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (LSHIFTRT, DImode, operands); DONE;")
+
+(define_insn "*lshrdi3_1_one_bit_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+       (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+                    (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
    && (TARGET_SHIFT1 || optimize_size)"
        (const_string "2")
        (const_string "*")))])
 
+(define_insn "*lshrdi3_cconly_one_bit_rex64"
+  [(set (reg FLAGS_REG)
+       (compare
+         (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const1_operand" ""))
+         (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{q}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
 ;; This pattern can't accept a variable shift count, since shifts by
 ;; zero don't affect the flags.  We assume that shifts by constant
 ;; zero are optimized away.
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
 
+(define_insn "*lshrdi3_cconly_rex64"
+  [(set (reg FLAGS_REG)
+       (compare
+         (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const_int_operand" "e"))
+         (const_int 0)))
+   (clobber (match_scratch:DI 0 "=r"))]
+  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{q}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "DI")])
+
 (define_insn "*lshrdi3_1"
   [(set (match_operand:DI 0 "register_operand" "=r")
        (lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
    (match_dup 3)]
   "!TARGET_64BIT && TARGET_CMOVE"
   [(const_int 0)]
-  "ix86_split_lshrdi (operands, operands[3]); DONE;")
+  "ix86_split_lshr (operands, operands[3], DImode); DONE;")
 
 (define_split 
   [(set (match_operand:DI 0 "register_operand" "")
        (lshiftrt:DI (match_operand:DI 1 "register_operand" "")
                     (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && (flag_peephole2 ? flow2_completed : reload_completed)"
+  "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+                    ? flow2_completed : reload_completed)"
   [(const_int 0)]
-  "ix86_split_lshrdi (operands, NULL_RTX); DONE;")
+  "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;")
 
 (define_expand "lshrsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (const_string "2")
        (const_string "*")))])
 
+(define_insn "*lshrsi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const1_operand" ""))
+         (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
 (define_insn "*lshrsi3_cmp_one_bit_zext"
   [(set (reg FLAGS_REG)
        (compare
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:SI (match_dup 1) (match_dup 2)))]
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
 
+(define_insn "*lshrsi3_cconly"
+  [(set (reg FLAGS_REG)
+      (compare
+       (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
+                    (match_operand:QI 2 "const_1_to_31_operand" "I"))
+        (const_int 0)))
+   (clobber (match_scratch:SI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "SI")])
+
 (define_insn "*lshrsi3_cmp_zext"
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:SI (match_operand:SI 1 "register_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
        (const_string "2")
        (const_string "*")))])
 
+(define_insn "*lshrhi3_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const1_operand" ""))
+         (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{w}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
 ;; This pattern can't accept a variable shift count, since shifts by
 ;; zero don't affect the flags.  We assume that shifts by constant
 ;; zero are optimized away.
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:HI (match_dup 1) (match_dup 2)))]
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
 
+(define_insn "*lshrhi3_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:HI 0 "=r"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+  "shr{w}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "HI")])
+
 (define_expand "lshrqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
        (const_string "2")
        (const_string "*")))])
 
+(define_insn "*lshrqi2_one_bit_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const1_operand" ""))
+         (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "shr{b}\t%0"
+  [(set_attr "type" "ishift")
+   (set_attr "length" "2")])
+
 ;; This pattern can't accept a variable shift count, since shifts by
 ;; zero don't affect the flags.  We assume that shifts by constant
 ;; zero are optimized away.
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_1_31_operand" "I"))
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (lshiftrt:QI (match_dup 1) (match_dup 2)))]
   "shr{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
+
+(define_insn "*lshrqi2_cconly"
+  [(set (reg FLAGS_REG)
+       (compare
+         (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
+                      (match_operand:QI 2 "const_1_to_31_operand" "I"))
+         (const_int 0)))
+   (clobber (match_scratch:QI 0 "=q"))]
+  "ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+  "shr{b}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "QI")])
 \f
 ;; Rotate instructions
 
 (define_expand "rotldi3"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-       (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "")
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+       (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
                   (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "ix86_expand_binary_operator (ROTATE, DImode, operands); DONE;")
+ ""
+{
+  if (TARGET_64BIT)
+    {
+      ix86_expand_binary_operator (ROTATE, DImode, operands);
+      DONE;
+    }
+  if (!const_1_to_31_operand (operands[2], VOIDmode))
+    FAIL;
+  emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
 
+;; Implement rotation using two double-precision shift instructions
+;; and a scratch register.   
+(define_insn_and_split "ix86_rotldi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+       (rotate:DI (match_operand:DI 1 "register_operand" "0")
+                  (match_operand:QI 2 "const_1_to_31_operand" "I")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:SI 3 "=&r"))]
+ "!TARGET_64BIT"
+ "" 
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+         (ior:SI (ashift:SI (match_dup 4) (match_dup 2))
+                 (lshiftrt:SI (match_dup 5)
+                              (minus:QI (const_int 32) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+         (ior:SI (ashift:SI (match_dup 5) (match_dup 2))
+                 (lshiftrt:SI (match_dup 3)
+                              (minus:QI (const_int 32) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+ "split_di (operands, 1, operands + 4, operands + 5);")
 (define_insn "*rotlsi3_1_one_bit_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0")
    (set_attr "mode" "QI")])
 
 (define_expand "rotrdi3"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-       (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
+  [(set (match_operand:DI 0 "shiftdi_operand" "")
+       (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+                  (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "ix86_expand_binary_operator (ROTATERT, DImode, operands); DONE;")
+ ""
+{
+  if (TARGET_64BIT)
+    {
+      ix86_expand_binary_operator (ROTATERT, DImode, operands);
+      DONE;
+    }
+  if (!const_1_to_31_operand (operands[2], VOIDmode))
+    FAIL;
+  emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2]));
+  DONE;
+})
+  
+;; Implement rotation using two double-precision shift instructions
+;; and a scratch register.   
+(define_insn_and_split "ix86_rotrdi3"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+       (rotatert:DI (match_operand:DI 1 "register_operand" "0")
+                    (match_operand:QI 2 "const_1_to_31_operand" "I")))
+  (clobber (reg:CC FLAGS_REG))
+  (clobber (match_scratch:SI 3 "=&r"))]
+ "!TARGET_64BIT"
+ ""
+ "&& reload_completed"
+ [(set (match_dup 3) (match_dup 4))
+  (parallel
+   [(set (match_dup 4)
+         (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2))
+                 (ashift:SI (match_dup 5)
+                            (minus:QI (const_int 32) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])
+  (parallel
+   [(set (match_dup 5)
+         (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2))
+                 (ashift:SI (match_dup 3)
+                            (minus:QI (const_int 32) (match_dup 2)))))
+    (clobber (reg:CC FLAGS_REG))])]
+ "split_di (operands, 1, operands + 4, operands + 5);")
 
 (define_insn "*rotrdi3_1_one_bit_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 (define_expand "extv"
   [(set (match_operand:SI 0 "register_operand" "")
        (sign_extract:SI (match_operand:SI 1 "register_operand" "")
-                        (match_operand:SI 2 "immediate_operand" "")
-                        (match_operand:SI 3 "immediate_operand" "")))]
+                        (match_operand:SI 2 "const8_operand" "")
+                        (match_operand:SI 3 "const8_operand" "")))]
   ""
 {
   /* Handle extractions from %ah et al.  */
 (define_expand "extzv"
   [(set (match_operand:SI 0 "register_operand" "")
        (zero_extract:SI (match_operand 1 "ext_register_operand" "")
-                        (match_operand:SI 2 "immediate_operand" "")
-                        (match_operand:SI 3 "immediate_operand" "")))]
+                        (match_operand:SI 2 "const8_operand" "")
+                        (match_operand:SI 3 "const8_operand" "")))]
   ""
 {
   /* Handle extractions from %ah et al.  */
 
 (define_expand "insv"
   [(set (zero_extract (match_operand 0 "ext_register_operand" "")
-                     (match_operand 1 "immediate_operand" "")
-                     (match_operand 2 "immediate_operand" ""))
+                     (match_operand 1 "const8_operand" "")
+                     (match_operand 2 "const8_operand" ""))
         (match_operand 3 "register_operand" ""))]
   ""
 {
-  /* Handle extractions from %ah et al.  */
+  /* Handle insertions to %ah et al.  */
   if (INTVAL (operands[1]) != 8 || INTVAL (operands[2]) != 8)
     FAIL;
 
 ;; The SSE store flag instructions saves 0 or 0xffffffff to the result.
 ;; subsequent logical operations are used to imitate conditional moves.
 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
-;; it directly.  Further holding this value in pseudo register might bring
-;; problem in implicit normalization in spill code.
-;; So we don't define FLOAT_STORE_FLAG_VALUE and create these
-;; instructions after reload by splitting the conditional move patterns.
+;; it directly.
 
 (define_insn "*sse_setccsf"
   [(set (match_operand:SF 0 "register_operand" "=x")
        (match_operator:SF 1 "sse_comparison_operator"
          [(match_operand:SF 2 "register_operand" "0")
           (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE && reload_completed"
+  "TARGET_SSE"
   "cmp%D1ss\t{%3, %0|%0, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "mode" "SF")])
        (match_operator:DF 1 "sse_comparison_operator"
          [(match_operand:DF 2 "register_operand" "0")
           (match_operand:DF 3 "nonimmediate_operand" "Ym")]))]
-  "TARGET_SSE2 && reload_completed"
+  "TARGET_SSE2"
   "cmp%D1sd\t{%3, %0|%0, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "mode" "DF")])
        (if_then_else (match_dup 1)
                      (label_ref (match_operand 0 "" ""))
                      (pc)))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
   "ix86_expand_branch (UNORDERED, operands[0]); DONE;")
 
 (define_expand "bordered"
        (if_then_else (match_dup 1)
                      (label_ref (match_operand 0 "" ""))
                      (pc)))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
   "ix86_expand_branch (ORDERED, operands[0]); DONE;")
 
 (define_expand "buneq"
        (if_then_else (match_dup 1)
                      (label_ref (match_operand 0 "" ""))
                      (pc)))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
   "ix86_expand_branch (UNEQ, operands[0]); DONE;")
 
 (define_expand "bunge"
        (if_then_else (match_dup 1)
                      (label_ref (match_operand 0 "" ""))
                      (pc)))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
   "ix86_expand_branch (UNGE, operands[0]); DONE;")
 
 (define_expand "bungt"
        (if_then_else (match_dup 1)
                      (label_ref (match_operand 0 "" ""))
                      (pc)))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
   "ix86_expand_branch (UNGT, operands[0]); DONE;")
 
 (define_expand "bunle"
        (if_then_else (match_dup 1)
                      (label_ref (match_operand 0 "" ""))
                      (pc)))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
   "ix86_expand_branch (UNLE, operands[0]); DONE;")
 
 (define_expand "bunlt"
        (if_then_else (match_dup 1)
                      (label_ref (match_operand 0 "" ""))
                      (pc)))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
   "ix86_expand_branch (UNLT, operands[0]); DONE;")
 
 (define_expand "bltgt"
        (if_then_else (match_dup 1)
                      (label_ref (match_operand 0 "" ""))
                      (pc)))]
-  "TARGET_80387 || TARGET_SSE"
+  "TARGET_80387 || TARGET_SSE_MATH"
   "ix86_expand_branch (LTGT, operands[0]); DONE;")
 
 (define_insn "*jcc_1"
 ;; during early optimization.  Splitting the operation apart early makes
 ;; for bad code when we want to reverse the operation.
 
-(define_insn "*fp_jcc_1"
+(define_insn "*fp_jcc_1_mixed"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
-                       [(match_operand 1 "register_operand" "f")
-                        (match_operand 2 "register_operand" "f")])
+                       [(match_operand 1 "register_operand" "f,x")
+                        (match_operand 2 "nonimmediate_operand" "f,xm")])
          (label_ref (match_operand 3 "" ""))
          (pc)))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))]
-  "TARGET_CMOVE && TARGET_80387
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 (define_insn "*fp_jcc_1_sse"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
-                       [(match_operand 1 "register_operand" "f#x,x#f")
-                        (match_operand 2 "nonimmediate_operand" "f#x,xm#f")])
+                       [(match_operand 1 "register_operand" "x")
+                        (match_operand 2 "nonimmediate_operand" "xm")])
          (label_ref (match_operand 3 "" ""))
          (pc)))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))]
-  "TARGET_80387
+  "TARGET_SSE_MATH
    && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-(define_insn "*fp_jcc_1_sse_only"
+(define_insn "*fp_jcc_1_387"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
-                       [(match_operand 1 "register_operand" "x")
-                        (match_operand 2 "nonimmediate_operand" "xm")])
+                       [(match_operand 1 "register_operand" "f")
+                        (match_operand 2 "register_operand" "f")])
          (label_ref (match_operand 3 "" ""))
          (pc)))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))]
-  "SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+  "TARGET_CMOVE && TARGET_80387
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-(define_insn "*fp_jcc_2"
+(define_insn "*fp_jcc_2_mixed"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
-                       [(match_operand 1 "register_operand" "f")
-                        (match_operand 2 "register_operand" "f")])
+                       [(match_operand 1 "register_operand" "f,x")
+                        (match_operand 2 "nonimmediate_operand" "f,xm")])
          (pc)
          (label_ref (match_operand 3 "" ""))))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))]
-  "TARGET_CMOVE && TARGET_80387
-   && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "TARGET_MIX_SSE_I387
+   && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 (define_insn "*fp_jcc_2_sse"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
-                       [(match_operand 1 "register_operand" "f#x,x#f")
-                        (match_operand 2 "nonimmediate_operand" "f#x,xm#f")])
+                       [(match_operand 1 "register_operand" "x")
+                        (match_operand 2 "nonimmediate_operand" "xm")])
          (pc)
          (label_ref (match_operand 3 "" ""))))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))]
-  "TARGET_80387
+  "TARGET_SSE_MATH
    && SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-(define_insn "*fp_jcc_2_sse_only"
+(define_insn "*fp_jcc_2_387"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
-                       [(match_operand 1 "register_operand" "x")
-                        (match_operand 2 "nonimmediate_operand" "xm")])
+                       [(match_operand 1 "register_operand" "f")
+                        (match_operand 2 "register_operand" "f")])
          (pc)
          (label_ref (match_operand 3 "" ""))))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))]
-  "SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
+  "TARGET_CMOVE && TARGET_80387
+   && FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-(define_insn "*fp_jcc_3"
+(define_insn "*fp_jcc_3_387"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operand 1 "register_operand" "f")
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-(define_insn "*fp_jcc_4"
+(define_insn "*fp_jcc_4_387"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operand 1 "register_operand" "f")
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-(define_insn "*fp_jcc_5"
+(define_insn "*fp_jcc_5_387"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operand 1 "register_operand" "f")
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-(define_insn "*fp_jcc_6"
+(define_insn "*fp_jcc_6_387"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operand 1 "register_operand" "f")
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-(define_insn "*fp_jcc_7"
+(define_insn "*fp_jcc_7_387"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operand 1 "register_operand" "f")
-                        (match_operand 2 "const_double_operand" "C")])
+                        (match_operand 2 "const0_operand" "X")])
          (label_ref (match_operand 3 "" ""))
          (pc)))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (match_scratch:HI 4 "=a"))]
   "TARGET_80387
    && FLOAT_MODE_P (GET_MODE (operands[1]))
-   && operands[2] == CONST0_RTX (GET_MODE (operands[1]))
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
    && SELECT_CC_MODE (GET_CODE (operands[0]),
                      operands[1], operands[2]) == CCFPmode
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
 
-;; The order of operands in *fp_jcc_8 is forced by combine in
+;; The order of operands in *fp_jcc_8_387 is forced by combine in
 ;; simplify_comparison () function. Float operator is treated as RTX_OBJ
 ;; with a precedence over other operators and is always put in the first
 ;; place. Swap condition and operands to match ficom instruction.
 
-(define_insn "*fp_jcc_8"
+(define_insn "*fp_jcc_8<mode>_387"
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operator 1 "float_operator"
-                          [(match_operand:SI 2 "nonimmediate_operand" "m,?r")])
+                          [(match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r")])
                           (match_operand 3 "register_operand" "f,f")])
          (label_ref (match_operand 4 "" ""))
          (pc)))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))
    (clobber (match_scratch:HI 5 "=a,a"))]
-  "TARGET_80387 && TARGET_USE_FIOP
+  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
    && FLOAT_MODE_P (GET_MODE (operands[3]))
    && GET_MODE (operands[1]) == GET_MODE (operands[3])
    && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0])))
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operator 1 "float_operator"
-                          [(match_operand:SI 2 "memory_operand" "")])
+                          [(match_operand:X87MODEI12 2 "memory_operand" "")])
                           (match_operand 3 "register_operand" "")])
          (match_operand 4 "" "")
          (match_operand 5 "" "")))
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operator 1 "float_operator"
-                          [(match_operand:SI 2 "register_operand" "")])
+                          [(match_operand:X87MODEI12 2 "register_operand" "")])
                           (match_operand 3 "register_operand" "")])
          (match_operand 4 "" "")
          (match_operand 5 "" "")))
   [(set_attr "type" "ibr")
    (set_attr "length_immediate" "0")])
 \f
-;; Loop instruction
-;;
-;; This is all complicated by the fact that since this is a jump insn
-;; we must handle our own reloads.
-
-(define_expand "doloop_end"
-  [(use (match_operand 0 "" ""))        ; loop pseudo
-   (use (match_operand 1 "" ""))        ; iterations; zero if unknown
-   (use (match_operand 2 "" ""))        ; max iterations
-   (use (match_operand 3 "" ""))        ; loop level 
-   (use (match_operand 4 "" ""))]       ; label
-  "!TARGET_64BIT && TARGET_USE_LOOP"
-  "                                 
-{
-  /* Only use cloop on innermost loops.  */
-  if (INTVAL (operands[3]) > 1)
-    FAIL;
-  if (GET_MODE (operands[0]) != SImode)
-    FAIL;
-  emit_jump_insn (gen_doloop_end_internal (operands[4], operands[0],
-                                          operands[0]));
-  DONE;
-}")
-
-(define_insn "doloop_end_internal"
-  [(set (pc)
-       (if_then_else (ne (match_operand:SI 1 "register_operand" "c,?*r,?*r")
-                         (const_int 1))
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))
-   (set (match_operand:SI 2 "nonimmediate_operand" "=1,1,*m*r")
-       (plus:SI (match_dup 1)
-                (const_int -1)))
-   (clobber (match_scratch:SI 3 "=X,X,r"))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_USE_LOOP
-   && (reload_in_progress || reload_completed
-       || register_operand (operands[2], VOIDmode))"
-{
-  if (which_alternative != 0)
-    return "#";
-  if (get_attr_length (insn) == 2)
-    return "%+loop\t%l0";
-  else
-    return "dec{l}\t%1\;%+jne\t%l0";
-}
-  [(set (attr "length")
-       (if_then_else (and (eq_attr "alternative" "0")
-                          (and (ge (minus (match_dup 0) (pc))
-                                   (const_int -126))
-                               (lt (minus (match_dup 0) (pc))
-                                   (const_int 128))))
-                     (const_int 2)
-                     (const_int 16)))
-   ;; We don't know the type before shorten branches.  Optimistically expect
-   ;; the loop instruction to match.
-   (set (attr "type") (const_string "ibr"))])
-
-(define_split
-  [(set (pc)
-       (if_then_else (ne (match_operand:SI 1 "register_operand" "")
-                         (const_int 1))
-                     (match_operand 0 "" "")
-                     (pc)))
-   (set (match_dup 1)
-       (plus:SI (match_dup 1)
-                (const_int -1)))
-   (clobber (match_scratch:SI 2 ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_USE_LOOP
-   && reload_completed
-   && REGNO (operands[1]) != 2"
-  [(parallel [(set (reg:CCZ FLAGS_REG)
-                  (compare:CCZ (plus:SI (match_dup 1) (const_int -1))
-                                (const_int 0)))
-             (set (match_dup 1) (plus:SI (match_dup 1) (const_int -1)))])
-   (set (pc) (if_then_else (ne (reg:CCZ FLAGS_REG) (const_int 0))
-                          (match_dup 0)
-                          (pc)))]
-  "")
-  
-(define_split
-  [(set (pc)
-       (if_then_else (ne (match_operand:SI 1 "register_operand" "")
-                         (const_int 1))
-                     (match_operand 0 "" "")
-                     (pc)))
-   (set (match_operand:SI 2 "nonimmediate_operand" "")
-       (plus:SI (match_dup 1)
-                (const_int -1)))
-   (clobber (match_scratch:SI 3 ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_64BIT && TARGET_USE_LOOP
-   && reload_completed
-   && (! REG_P (operands[2])
-       || ! rtx_equal_p (operands[1], operands[2]))"
-  [(set (match_dup 3) (match_dup 1))
-   (parallel [(set (reg:CCZ FLAGS_REG)
-                  (compare:CCZ (plus:SI (match_dup 3) (const_int -1))
-                               (const_int 0)))
-             (set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))])
-   (set (match_dup 2) (match_dup 3))
-   (set (pc) (if_then_else (ne (reg:CCZ FLAGS_REG) (const_int 0))
-                          (match_dup 0)
-                          (pc)))]
-  "")
-
 ;; Convert setcc + movzbl to xor + setcc if operands don't overlap.
 
 (define_peephole2
    (set (strict_low_part (match_dup 5))
        (match_dup 2))]
 {
-  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17);
+  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
   operands[5] = gen_lowpart (QImode, operands[3]);
   ix86_expand_clear (operands[3]);
 })
    (set (strict_low_part (match_dup 5))
        (match_dup 2))]
 {
-  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), 17);
+  operands[4] = gen_rtx_REG (GET_MODE (operands[0]), FLAGS_REG);
   operands[5] = gen_lowpart (QImode, operands[3]);
   ix86_expand_clear (operands[3]);
 })
 #else
   /* It is tempting to use ASM_OUTPUT_ALIGN here, but we don't want to do that.
      The align insn is used to avoid 3 jump instructions in the row to improve
-     branch prediction and the benefits hardly outweight the cost of extra 8
+     branch prediction and the benefits hardly outweigh the cost of extra 8
      nops on the average inserted by full alignment pseudo operation.  */
 #endif
   return "";
        (unspec:SI [(const_int 0)] UNSPEC_SET_GOT))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT"
-  { return output_set_got (operands[0]); }
+  { return output_set_got (operands[0], NULL_RTX); }
+  [(set_attr "type" "multi")
+   (set_attr "length" "12")])
+
+(define_insn "set_got_labelled"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(label_ref (match_operand 1 "" ""))]
+        UNSPEC_SET_GOT))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT"
+  { return output_set_got (operands[0], operands[1]); }
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
+(define_insn "set_got_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
+  "TARGET_64BIT"
+  "lea{q}\t_GLOBAL_OFFSET_TABLE_(%%rip), %0"
+  [(set_attr "type" "lea")
+   (set_attr "length" "6")])
+
 (define_expand "epilogue"
   [(const_int 1)]
   ""
       operands[2] = gen_reg_rtx (Pmode);
       emit_insn (gen_set_got (operands[2]));
     }
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_32
+                 (operands[0], operands[1], operands[2]));
+       DONE;
+    }
   operands[3] = ix86_tls_get_addr ();
 })
 
 (define_insn "*tls_global_dynamic_64"
   [(set (match_operand:DI 0 "register_operand" "=a")
-       (call (mem:QI (match_operand:DI 2 "call_insn_operand" ""))
-                     (match_operand:DI 3 "" "")))
+       (call:DI (mem:QI (match_operand:DI 2 "call_insn_operand" ""))
+                (match_operand:DI 3 "" "")))
    (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
              UNSPEC_TLS_GD)]
   "TARGET_64BIT"
 
 (define_expand "tls_global_dynamic_64"
   [(parallel [(set (match_operand:DI 0 "register_operand" "")
-                  (call (mem:QI (match_dup 2)) (const_int 0)))
+                  (call:DI (mem:QI (match_dup 2)) (const_int 0)))
              (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
                         UNSPEC_TLS_GD)])]
   ""
 {
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_64
+                 (operands[0], operands[1]));
+       DONE;
+    }
   operands[2] = ix86_tls_get_addr ();
 })
 
       operands[1] = gen_reg_rtx (Pmode);
       emit_insn (gen_set_got (operands[1]));
     }
-  operands[2] = ix86_tls_get_addr ();
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_32
+                 (operands[0], ix86_tls_module_base (), operands[1]));
+       DONE;
+    }
+  operands[2] = ix86_tls_get_addr ();
 })
 
 (define_insn "*tls_local_dynamic_base_64"
   [(set (match_operand:DI 0 "register_operand" "=a")
-       (call (mem:QI (match_operand:DI 1 "call_insn_operand" ""))
-                     (match_operand:DI 2 "" "")))
+       (call:DI (mem:QI (match_operand:DI 1 "call_insn_operand" ""))
+                (match_operand:DI 2 "" "")))
    (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
   "TARGET_64BIT"
   "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1"
 
 (define_expand "tls_local_dynamic_base_64"
   [(parallel [(set (match_operand:DI 0 "register_operand" "")
-                  (call (mem:QI (match_dup 1)) (const_int 0)))
+                  (call:DI (mem:QI (match_dup 1)) (const_int 0)))
              (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])]
   ""
 {
+  if (TARGET_GNU2_TLS)
+    {
+       emit_insn (gen_tls_dynamic_gnu2_64
+                 (operands[0], ix86_tls_module_base ()));
+       DONE;
+    }
   operands[1] = ix86_tls_get_addr ();
 })
 
    (set_attr "length" "7")
    (set_attr "memory" "load")
    (set_attr "imm_disp" "false")])
+
+;; GNU2 TLS patterns can be split.
+
+(define_expand "tls_dynamic_gnu2_32"
+  [(set (match_dup 3)
+       (plus:SI (match_operand:SI 2 "register_operand" "")
+                (const:SI
+                 (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")]
+                            UNSPEC_TLSDESC))))
+   (parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+         (unspec:SI [(match_dup 1) (match_dup 3)
+                     (match_dup 2) (reg:SI SP_REG)]
+                     UNSPEC_TLSDESC))
+     (clobber (reg:CC FLAGS_REG))])]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (plus:SI (match_operand:SI 1 "register_operand" "b")
+                (const:SI
+                 (unspec:SI [(match_operand:SI 2 "tls_symbolic_operand" "")]
+                             UNSPEC_TLSDESC))))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{l}\t{%a2@TLSDESC(%1), %0|%0, %a2@TLSDESC[%1]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "SI")
+   (set_attr "length" "6")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_32"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (unspec:SI [(match_operand:SI 1 "tls_symbolic_operand" "")
+                   (match_operand:SI 2 "register_operand" "0")
+                   ;; we have to make sure %ebx still points to the GOT
+                   (match_operand:SI 3 "register_operand" "b")
+                   (reg:SI SP_REG)]
+                  UNSPEC_TLSDESC))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_32"
+  [(set (match_operand:SI 0 "register_operand" "=&a")
+       (plus:SI
+        (unspec:SI [(match_operand:SI 3 "tls_modbase_operand" "")
+                    (match_operand:SI 4 "" "")
+                    (match_operand:SI 2 "register_operand" "b")
+                    (reg:SI SP_REG)]
+                   UNSPEC_TLSDESC)
+        (const:SI (unspec:SI
+                   [(match_operand:SI 1 "tls_symbolic_operand" "")]
+                   UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 5))]
+{
+  operands[5] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+  emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
+})
+
+(define_expand "tls_dynamic_gnu2_64"
+  [(set (match_dup 2)
+       (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+                  UNSPEC_TLSDESC))
+   (parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+         (unspec:DI [(match_dup 1) (match_dup 2) (reg:DI SP_REG)]
+                    UNSPEC_TLSDESC))
+     (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+{
+  operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+  ix86_tls_descriptor_calls_expanded_in_cfun = true;
+})
+
+(define_insn "*tls_dynamic_lea_64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
+                  UNSPEC_TLSDESC))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}"
+  [(set_attr "type" "lea")
+   (set_attr "mode" "DI")
+   (set_attr "length" "7")
+   (set_attr "length_address" "4")])
+
+(define_insn "*tls_dynamic_call_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")
+                   (match_operand:DI 2 "register_operand" "0")
+                   (reg:DI SP_REG)]
+                  UNSPEC_TLSDESC))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}"
+  [(set_attr "type" "call")
+   (set_attr "length" "2")
+   (set_attr "length_address" "0")])
+
+(define_insn_and_split "*tls_dynamic_gnu2_combine_64"
+  [(set (match_operand:DI 0 "register_operand" "=&a")
+       (plus:DI
+        (unspec:DI [(match_operand:DI 2 "tls_modbase_operand" "")
+                    (match_operand:DI 3 "" "")
+                    (reg:DI SP_REG)]
+                   UNSPEC_TLSDESC)
+        (const:DI (unspec:DI
+                   [(match_operand:DI 1 "tls_symbolic_operand" "")]
+                   UNSPEC_DTPOFF))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_GNU2_TLS"
+  "#"
+  ""
+  [(set (match_dup 0) (match_dup 4))]
+{
+  operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+  emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
+})
+
+;;
 \f
 ;; These patterns match the binary 387 instructions for addM3, subM3,
 ;; mulM3 and divM3.  There are three patterns for each of DFmode and
 ;; so use special patterns for add and mull.
 
 (define_insn "*fop_sf_comm_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f#x,x#f")
+  [(set (match_operand:SF 0 "register_operand" "=f,x")
        (match_operator:SF 3 "binary_fp_operator"
                        [(match_operand:SF 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:SF 2 "nonimmediate_operand" "fm#x,xm#f")]))]
+                        (match_operand:SF 2 "nonimmediate_operand" "fm,xm")]))]
   "TARGET_MIX_SSE_I387
    && COMMUTATIVE_ARITH_P (operands[3])
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
   [(set (match_operand:SF 0 "register_operand" "=f,f,x")
        (match_operator:SF 3 "binary_fp_operator"
                        [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0")
-                        (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm#f")]))]
+                        (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm")]))]
   "TARGET_MIX_SSE_I387
    && !COMMUTATIVE_ARITH_P (operands[3])
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
               (const_string "sseadd")))
    (set_attr "mode" "SF")])
 
+;; This pattern is not fully shadowed by the pattern above.
 (define_insn "*fop_sf_1_i387"
   [(set (match_operand:SF 0 "register_operand" "=f,f")
        (match_operator:SF 3 "binary_fp_operator"
                        [(match_operand:SF 1 "nonimmediate_operand" "0,fm")
                         (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))]
-  "TARGET_80387
+  "TARGET_80387 && !TARGET_SSE_MATH
    && !COMMUTATIVE_ARITH_P (operands[3])
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
   "* return output_387_binary_op (insn, operands);"
               (const_string "fop")))
    (set_attr "mode" "SF")])
 
-
 ;; ??? Add SSE splitters for these!
-(define_insn "*fop_sf_2_i387"
+(define_insn "*fop_sf_2<mode>_i387"
   [(set (match_operand:SF 0 "register_operand" "=f,f")
        (match_operator:SF 3 "binary_fp_operator"
-         [(float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r"))
+         [(float:SF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
           (match_operand:SF 2 "register_operand" "0,0")]))]
-  "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH"
+  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP && !TARGET_SSE_MATH"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type") 
         (cond [(match_operand:SF 3 "mult_operator" "") 
               ]
               (const_string "fop")))
    (set_attr "fp_int_src" "true")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_sf_3_i387"
+(define_insn "*fop_sf_3<mode>_i387"
   [(set (match_operand:SF 0 "register_operand" "=f,f")
        (match_operator:SF 3 "binary_fp_operator"
          [(match_operand:SF 1 "register_operand" "0,0")
-          (float:SF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))]
-  "TARGET_80387 && TARGET_USE_FIOP && !TARGET_SSE_MATH"
+          (float:SF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP && !TARGET_SSE_MATH"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type") 
         (cond [(match_operand:SF 3 "mult_operator" "") 
               ]
               (const_string "fop")))
    (set_attr "fp_int_src" "true")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*fop_df_comm_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f")
+  [(set (match_operand:DF 0 "register_operand" "=f,Y")
        (match_operator:DF 3 "binary_fp_operator"
                        [(match_operand:DF 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:DF 2 "nonimmediate_operand" "fm#Y,Ym#f")]))]
+                        (match_operand:DF 2 "nonimmediate_operand" "fm,Ym")]))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387
    && COMMUTATIVE_ARITH_P (operands[3])
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type") 
        (if_then_else (eq_attr "alternative" "1")
-          (if_then_else (match_operand:SF 3 "mult_operator" "") 
+          (if_then_else (match_operand:DF 3 "mult_operator" "") 
              (const_string "ssemul")
              (const_string "sseadd"))
-          (if_then_else (match_operand:SF 3 "mult_operator" "") 
+          (if_then_else (match_operand:DF 3 "mult_operator" "") 
              (const_string "fmul")
              (const_string "fop"))))
    (set_attr "mode" "DF")])
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type") 
-        (if_then_else (match_operand:SF 3 "mult_operator" "") 
+        (if_then_else (match_operand:DF 3 "mult_operator" "") 
           (const_string "ssemul")
           (const_string "sseadd")))
    (set_attr "mode" "DF")])
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type") 
-       (if_then_else (match_operand:SF 3 "mult_operator" "") 
+       (if_then_else (match_operand:DF 3 "mult_operator" "") 
           (const_string "fmul")
           (const_string "fop")))
    (set_attr "mode" "DF")])
 
 (define_insn "*fop_df_1_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f#Y,f#Y,Y#f")
+  [(set (match_operand:DF 0 "register_operand" "=f,f,Y")
        (match_operator:DF 3 "binary_fp_operator"
                        [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0")
-                        (match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym#f")]))]
+                        (match_operand:DF 2 "nonimmediate_operand" "fm,0,Ym")]))]
   "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
    && !COMMUTATIVE_ARITH_P (operands[3])
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type") 
         (cond [(and (eq_attr "alternative" "2")
-                   (match_operand:SF 3 "mult_operator" ""))
+                   (match_operand:DF 3 "mult_operator" ""))
                  (const_string "ssemul")
               (and (eq_attr "alternative" "2")
-                   (match_operand:SF 3 "div_operator" ""))
+                   (match_operand:DF 3 "div_operator" ""))
                  (const_string "ssediv")
               (eq_attr "alternative" "2")
                  (const_string "sseadd")
   "* return output_387_binary_op (insn, operands);"
   [(set_attr "mode" "DF")
    (set (attr "type") 
-        (cond [(match_operand:SF 3 "mult_operator" "")
+        (cond [(match_operand:DF 3 "mult_operator" "")
                  (const_string "ssemul")
-              (match_operand:SF 3 "div_operator" "")
+              (match_operand:DF 3 "div_operator" "")
                  (const_string "ssediv")
               ]
               (const_string "sseadd")))])
 
+;; This pattern is not fully shadowed by the pattern above.
 (define_insn "*fop_df_1_i387"
   [(set (match_operand:DF 0 "register_operand" "=f,f")
        (match_operator:DF 3 "binary_fp_operator"
                        [(match_operand:DF 1 "nonimmediate_operand" "0,fm")
                         (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))]
-  "TARGET_80387
+  "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
    && !COMMUTATIVE_ARITH_P (operands[3])
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
   "* return output_387_binary_op (insn, operands);"
    (set_attr "mode" "DF")])
 
 ;; ??? Add SSE splitters for these!
-(define_insn "*fop_df_2_i387"
+(define_insn "*fop_df_2<mode>_i387"
   [(set (match_operand:DF 0 "register_operand" "=f,f")
        (match_operator:DF 3 "binary_fp_operator"
-          [(float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r"))
+          [(float:DF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
            (match_operand:DF 2 "register_operand" "0,0")]))]
-  "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type") 
         (cond [(match_operand:DF 3 "mult_operator" "") 
               ]
               (const_string "fop")))
    (set_attr "fp_int_src" "true")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_df_3_i387"
+(define_insn "*fop_df_3<mode>_i387"
   [(set (match_operand:DF 0 "register_operand" "=f,f")
        (match_operator:DF 3 "binary_fp_operator"
           [(match_operand:DF 1 "register_operand" "0,0")
-           (float:DF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))]
-  "TARGET_80387 && TARGET_USE_FIOP && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+           (float:DF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
+   && !(TARGET_SSE2 && TARGET_SSE_MATH)"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type") 
         (cond [(match_operand:DF 3 "mult_operator" "") 
               ]
               (const_string "fop")))
    (set_attr "fp_int_src" "true")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*fop_df_4_i387"
   [(set (match_operand:DF 0 "register_operand" "=f,f")
               (const_string "fop")))
    (set_attr "mode" "XF")])
 
-(define_insn "*fop_xf_2_i387"
+(define_insn "*fop_xf_2<mode>_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
        (match_operator:XF 3 "binary_fp_operator"
-          [(float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r"))
+          [(float:XF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
            (match_operand:XF 2 "register_operand" "0,0")]))]
-  "TARGET_80387 && TARGET_USE_FIOP"
+  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type") 
         (cond [(match_operand:XF 3 "mult_operator" "") 
               ]
               (const_string "fop")))
    (set_attr "fp_int_src" "true")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_xf_3_i387"
+(define_insn "*fop_xf_3<mode>_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
        (match_operator:XF 3 "binary_fp_operator"
          [(match_operand:XF 1 "register_operand" "0,0")
-          (float:XF (match_operand:SI 2 "nonimmediate_operand" "m,?r"))]))]
-  "TARGET_80387 && TARGET_USE_FIOP"
+          (float:XF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type") 
         (cond [(match_operand:XF 3 "mult_operator" "") 
               ]
               (const_string "fop")))
    (set_attr "fp_int_src" "true")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*fop_xf_4_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
 (define_split
   [(set (match_operand 0 "register_operand" "")
        (match_operator 3 "binary_fp_operator"
-          [(float (match_operand:SI 1 "register_operand" ""))
+          [(float (match_operand:X87MODEI12 1 "register_operand" ""))
            (match_operand 2 "register_operand" "")]))]
   "TARGET_80387 && reload_completed
    && FLOAT_MODE_P (GET_MODE (operands[0]))"
   [(set (match_operand 0 "register_operand" "")
        (match_operator 3 "binary_fp_operator"
           [(match_operand 1 "register_operand" "")
-           (float (match_operand:SI 2 "register_operand" ""))]))]
+           (float (match_operand:X87MODEI12 2 "register_operand" ""))]))]
   "TARGET_80387 && reload_completed
    && FLOAT_MODE_P (GET_MODE (operands[0]))"
   [(const_int 0)]
 })
 
 (define_insn "*sqrtsf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f#x,x#f")
-       (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0#x,xm#f")))]
+  [(set (match_operand:SF 0 "register_operand" "=f,x")
+       (sqrt:SF (match_operand:SF 1 "nonimmediate_operand" "0,xm")))]
   "TARGET_USE_FANCY_MATH_387 && TARGET_MIX_SSE_I387"
   "@
    fsqrt
 })
 
 (define_insn "*sqrtdf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f#Y,Y#f")
-       (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0#Y,Ym#f")))]
+  [(set (match_operand:DF 0 "register_operand" "=f,Y")
+       (sqrt:DF (match_operand:DF 1 "nonimmediate_operand" "0,Ym")))]
   "TARGET_USE_FANCY_MATH_387 && TARGET_SSE2 && TARGET_MIX_SSE_I387"
   "@
    fsqrt
    (use (match_operand:SF 1 "register_operand" ""))
    (use (match_operand:SF 2 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx label = gen_label_rtx ();
    (use (match_operand:DF 1 "register_operand" ""))
    (use (match_operand:DF 2 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx label = gen_label_rtx ();
    (use (match_operand:SF 1 "register_operand" ""))
    (use (match_operand:SF 2 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx label = gen_label_rtx ();
    (use (match_operand:DF 1 "register_operand" ""))
    (use (match_operand:DF 2 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx label = gen_label_rtx ();
   [(set (match_operand:DF 0 "register_operand" "=f")
        (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_SIN))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fsin"
   [(set_attr "type" "fpspc")
   [(set (match_operand:SF 0 "register_operand" "=f")
        (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_SIN))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fsin"
   [(set_attr "type" "fpspc")
                     (match_operand:SF 1 "register_operand" "0"))]
                   UNSPEC_SIN))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fsin"
   [(set_attr "type" "fpspc")
   [(set (match_operand:DF 0 "register_operand" "=f")
        (unspec:DF [(match_operand:DF 1 "register_operand" "0")] UNSPEC_COS))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fcos"
   [(set_attr "type" "fpspc")
   [(set (match_operand:SF 0 "register_operand" "=f")
        (unspec:SF [(match_operand:SF 1 "register_operand" "0")] UNSPEC_COS))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fcos"
   [(set_attr "type" "fpspc")
                     (match_operand:SF 1 "register_operand" "0"))]
                   UNSPEC_COS))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fcos"
   [(set_attr "type" "fpspc")
    (set (match_operand:DF 1 "register_operand" "=u")
         (unspec:DF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fsincos"
   [(set_attr "type" "fpspc")
    (set (match_operand:SF 1 "register_operand" "=u")
         (unspec:SF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fsincos"
   [(set_attr "type" "fpspc")
         (unspec:DF [(float_extend:DF
                     (match_dup 2))] UNSPEC_SINCOS_SIN))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fsincos"
   [(set_attr "type" "fpspc")
    (set (match_operand:DF 1 "register_operand" "=u")
         (unspec:DF [(match_dup 2)] UNSPEC_TAN_TAN))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fptan"
   [(set_attr "type" "fpspc")
              (set (match_operand:DF 0 "register_operand" "")
                   (unspec:DF [(match_dup 1)] UNSPEC_TAN_TAN))])]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (DFmode);
    (set (match_operand:SF 1 "register_operand" "=u")
         (unspec:SF [(match_dup 2)] UNSPEC_TAN_TAN))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fptan"
   [(set_attr "type" "fpspc")
              (set (match_operand:SF 0 "register_operand" "")
                   (unspec:SF [(match_dup 1)] UNSPEC_TAN_TAN))])]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (SFmode);
                   UNSPEC_FPATAN))
    (clobber (match_scratch:DF 3 "=1"))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fpatan"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "DF")])
 
 (define_expand "atan2df3"
-  [(use (match_operand:DF 0 "register_operand" "=f"))
-   (use (match_operand:DF 2 "register_operand" "0"))
-   (use (match_operand:DF 1 "register_operand" "u"))]
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 2 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx copy = gen_reg_rtx (DFmode);
                    UNSPEC_FPATAN))
              (clobber (match_scratch:DF 3 ""))])]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (DFmode);
                   UNSPEC_FPATAN))
    (clobber (match_scratch:SF 3 "=1"))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
   "fpatan"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "SF")])
 
 (define_expand "atan2sf3"
-  [(use (match_operand:SF 0 "register_operand" "=f"))
-   (use (match_operand:SF 2 "register_operand" "0"))
-   (use (match_operand:SF 1 "register_operand" "u"))]
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 2 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx copy = gen_reg_rtx (SFmode);
                    UNSPEC_FPATAN))
              (clobber (match_scratch:SF 3 ""))])]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (SFmode);
    (set_attr "mode" "XF")])
 
 (define_expand "atan2xf3"
-  [(use (match_operand:XF 0 "register_operand" "=f"))
-   (use (match_operand:XF 2 "register_operand" "0"))
-   (use (match_operand:XF 1 "register_operand" "u"))]
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 2 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 7)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   int i;
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 7)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   int i;
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 7)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   int i;
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 7)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   int i;
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 4)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 4)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 4)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 4)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 4)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (XFmode);
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 4)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (XFmode);
   [(use (match_operand:SF 0 "register_operand" ""))
    (use (match_operand:SF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 4)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (XFmode);
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 4)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (XFmode);
                   (fix:SI (match_dup 3)))
              (clobber (reg:CC FLAGS_REG))])]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   operands[2] = gen_reg_rtx (XFmode);
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 10)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 10)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 10)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 10)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 8)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   int i;
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 8)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   int i;
    (set (match_operand:DF 0 "register_operand" "")
        (float_truncate:DF (match_dup 14)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
    (set (match_operand:SF 0 "register_operand" "")
        (float_truncate:SF (match_dup 14)))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx temp;
   emit_move_insn (operands[2], temp);
   emit_move_insn (operands[9], CONST1_RTX (XFmode));  /* fld1 */
 })
+
+(define_expand "ldexpdf3"
+  [(set (match_dup 3)
+       (float_extend:XF (match_operand:DF 1 "register_operand" "")))
+   (set (match_dup 4)
+       (float:XF (match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_dup 5)
+                  (unspec:XF [(match_dup 3) (match_dup 4)]
+                             UNSPEC_FSCALE_FRACT))
+             (set (match_dup 6)
+                  (unspec:XF [(match_dup 3) (match_dup 4)]
+                             UNSPEC_FSCALE_EXP))])
+   (set (match_operand:DF 0 "register_operand" "")
+       (float_truncate:DF (match_dup 5)))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  for (i=3; i<7; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ldexpsf3"
+  [(set (match_dup 3)
+       (float_extend:XF (match_operand:SF 1 "register_operand" "")))
+   (set (match_dup 4)
+       (float:XF (match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_dup 5)
+                  (unspec:XF [(match_dup 3) (match_dup 4)]
+                             UNSPEC_FSCALE_FRACT))
+             (set (match_dup 6)
+                  (unspec:XF [(match_dup 3) (match_dup 4)]
+                             UNSPEC_FSCALE_EXP))])
+   (set (match_operand:SF 0 "register_operand" "")
+       (float_truncate:SF (match_dup 5)))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  for (i=3; i<7; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ldexpxf3"
+  [(set (match_dup 3)
+       (float:XF (match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_operand:XF 0 " register_operand" "")
+                  (unspec:XF [(match_operand:XF 1 "register_operand" "")
+                              (match_dup 3)]
+                             UNSPEC_FSCALE_FRACT))
+             (set (match_dup 4)
+                  (unspec:XF [(match_dup 1) (match_dup 3)]
+                             UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  int i;
+
+  for (i=3; i<5; i++)
+    operands[i] = gen_reg_rtx (XFmode);
+})
 \f
 
 (define_insn "frndintxf2"
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   [(use (match_operand:SF 0 "register_operand" ""))
    (use (match_operand:SF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   DONE;
 })
 
-(define_insn "frndintxf2_floor"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-        UNSPEC_FRNDINT_FLOOR))
-   (use (match_operand:HI 2 "memory_operand" "m"))
-   (use (match_operand:HI 3 "memory_operand" "m"))]
+(define_insn_and_split "*fistdi2_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+        UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fistdi2 (operands[0], operands[1]));
+  else
+    {
+      operands[2] = assign_386_stack_local (DImode, SLOT_TEMP);
+      emit_insn (gen_fistdi2_with_temp (operands[0], operands[1],
+                                        operands[2]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+        UNSPEC_FIST))
+   (clobber (match_scratch:XF 2 "=&1f"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
-  [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "floor")
-   (set_attr "mode" "XF")])
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
 
-(define_expand "floordf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
+(define_insn "fistdi2_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+        UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+   (clobber (match_scratch:XF 3 "=&1f,&1f"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-{
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
+  "#"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "DI")])
 
-  emit_insn (gen_extenddfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_floor (op0, op1, op2, op3));
+(define_split 
+  [(set (match_operand:DI 0 "register_operand" "")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 2) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+             (clobber (match_dup 3))])
+   (set (match_dup 0) (match_dup 2))]
+  "")
 
-  emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+(define_split 
+  [(set (match_operand:DI 0 "memory_operand" "")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch 3 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST))
+             (clobber (match_dup 3))])]
+  "")
+
+(define_insn_and_split "*fist<mode>2_1"
+  [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+        UNSPEC_FIST))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  operands[2] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+  emit_insn (gen_fist<mode>2_with_temp (operands[0], operands[1],
+                                       operands[2]));
   DONE;
-})
+}
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
 
-(define_expand "floorsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
+(define_insn "fist<mode>2"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+        UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-{
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
 
-  emit_insn (gen_extendsfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_floor (op0, op1, op2, op3));
+(define_insn "fist<mode>2_with_temp"
+  [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+        UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fpspc")
+   (set_attr "mode" "<MODE>")])
 
-  emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
-  DONE;
-})
+(define_split 
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)]
+                      UNSPEC_FIST))
+   (set (match_dup 0) (match_dup 2))]
+  "")
 
-(define_expand "floorxf2"
-  [(use (match_operand:XF 0 "register_operand" ""))
-   (use (match_operand:XF 1 "register_operand" ""))]
+(define_split 
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST))
+   (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
+  "reload_completed"
+  [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+                      UNSPEC_FIST))]
+  "")
+
+(define_expand "lrint<mode>2"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+       (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
+  "")
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_floor"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+        UNSPEC_FRNDINT_FLOOR))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
 {
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
+  ix86_optimize_mode_switching[I387_FLOOR] = 1;
 
-  emit_insn (gen_frndintxf2_floor (operands[0], operands[1], op2, op3));
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+
+  emit_insn (gen_frndintxf2_floor_i387 (operands[0], operands[1],
+                                       operands[2], operands[3]));
   DONE;
-})
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "XF")])
 
-(define_insn "frndintxf2_ceil"
+(define_insn "frndintxf2_floor_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-        UNSPEC_FRNDINT_CEIL))
+        UNSPEC_FRNDINT_FLOOR))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
   "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
   [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "ceil")
+   (set_attr "i387_cw" "floor")
    (set_attr "mode" "XF")])
 
-(define_expand "ceildf2"
+(define_expand "floorxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  emit_insn (gen_frndintxf2_floor (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "floordf2"
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
 
   emit_insn (gen_extenddfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_ceil (op0, op1, op2, op3));
+  emit_insn (gen_frndintxf2_floor (op0, op1));
 
   emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
   DONE;
 })
 
-(define_expand "ceilsf2"
+(define_expand "floorsf2"
   [(use (match_operand:SF 0 "register_operand" ""))
    (use (match_operand:SF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
 
   emit_insn (gen_extendsfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_ceil (op0, op1, op2, op3));
+  emit_insn (gen_frndintxf2_floor (op0, op1));
 
   emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
   DONE;
 })
 
-(define_expand "ceilxf2"
-  [(use (match_operand:XF 0 "register_operand" ""))
-   (use (match_operand:XF 1 "register_operand" ""))]
+(define_insn_and_split "*fist<mode>2_floor_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+       (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")]
+        UNSPEC_FIST_FLOOR))
+   (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations"
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
 {
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
+  ix86_optimize_mode_switching[I387_FLOOR] = 1;
 
-  emit_insn (gen_frndintxf2_ceil (operands[0], operands[1], op2, op3));
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_FLOOR);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fist<mode>2_floor (operands[0], operands[1],
+                                     operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fist<mode>2_floor_with_temp (operands[0], operands[1],
+                                                 operands[2], operands[3],
+                                                 operands[4]));
+    }
   DONE;
-})
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "frndintxf2_trunc"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-        UNSPEC_FRNDINT_TRUNC))
+(define_insn "fistdi2_floor"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+        UNSPEC_FIST_FLOOR))
    (use (match_operand:HI 2 "memory_operand" "m"))
-   (use (match_operand:HI 3 "memory_operand" "m"))]
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
-  [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "trunc")
-   (set_attr "mode" "XF")])
-
-(define_expand "btruncdf2"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_floor_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+        UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "DI")])
+
+(define_split 
+  [(set (match_operand:DI 0 "register_operand" "")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+             (use (match_dup 2))
+             (use (match_dup 3))
+             (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split 
+  [(set (match_operand:DI 0 "memory_operand" "")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_FLOOR))
+             (use (match_dup 2))
+             (use (match_dup 3))
+             (clobber (match_dup 5))])]
+  "")
+
+(define_insn "fist<mode>2_floor"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+        UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_floor_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+        UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "floor")
+   (set_attr "mode" "<MODE>")])
+
+(define_split 
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+                                 UNSPEC_FIST_FLOOR))
+             (use (match_dup 2))
+             (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split 
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST_FLOOR))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+                                 UNSPEC_FIST_FLOOR))
+             (use (match_dup 2))
+             (use (match_dup 3))])]
+  "")
+
+(define_expand "lfloor<mode>2"
+  [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+                  (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+                   UNSPEC_FIST_FLOOR))
+             (clobber (reg:CC FLAGS_REG))])]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+  "")
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_ceil"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+        UNSPEC_FRNDINT_CEIL))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+
+  emit_insn (gen_frndintxf2_ceil_i387 (operands[0], operands[1],
+                                      operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_ceil_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+        UNSPEC_FRNDINT_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "XF")])
+
+(define_expand "ceilxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  emit_insn (gen_frndintxf2_ceil (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "ceildf2"
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
 
   emit_insn (gen_extenddfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_trunc (op0, op1, op2, op3));
+  emit_insn (gen_frndintxf2_ceil (op0, op1));
 
   emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
   DONE;
 })
 
-(define_expand "btruncsf2"
+(define_expand "ceilsf2"
   [(use (match_operand:SF 0 "register_operand" ""))
    (use (match_operand:SF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
 
   emit_insn (gen_extendsfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_trunc (op0, op1, op2, op3));
+  emit_insn (gen_frndintxf2_ceil (op0, op1));
 
   emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
   DONE;
 })
 
-(define_expand "btruncxf2"
-  [(use (match_operand:XF 0 "register_operand" ""))
-   (use (match_operand:XF 1 "register_operand" ""))]
+(define_insn_and_split "*fist<mode>2_ceil_1"
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
+       (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")]
+        UNSPEC_FIST_CEIL))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_CEIL] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_CEIL);
+  if (memory_operand (operands[0], VOIDmode))
+    emit_insn (gen_fist<mode>2_ceil (operands[0], operands[1],
+                                    operands[2], operands[3]));
+  else
+    {
+      operands[4] = assign_386_stack_local (<MODE>mode, SLOT_TEMP);
+      emit_insn (gen_fist<mode>2_ceil_with_temp (operands[0], operands[1],
+                                                operands[2], operands[3],
+                                                operands[4]));
+    }
+  DONE;
+}
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fistdi2_ceil"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
+        UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))
+   (clobber (match_scratch:XF 4 "=&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "DI")])
+
+(define_insn "fistdi2_ceil_with_temp"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
+        UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
+   (clobber (match_scratch:XF 5 "=&1f,&1f"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "DI")])
+
+(define_split 
+  [(set (match_operand:DI 0 "register_operand" "")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+             (use (match_dup 2))
+             (use (match_dup 3))
+             (clobber (match_dup 5))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split 
+  [(set (match_operand:DI 0 "memory_operand" "")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:DI 4 "memory_operand" ""))
+   (clobber (match_scratch 5 ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:DI [(match_dup 1)] UNSPEC_FIST_CEIL))
+             (use (match_dup 2))
+             (use (match_dup 3))
+             (clobber (match_dup 5))])]
+  "")
+
+(define_insn "fist<mode>2_ceil"
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
+        UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "* return output_fix_trunc (insn, operands, 0);"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "fist<mode>2_ceil_with_temp"
+  [(set (match_operand:X87MODEI12 0 "nonimmediate_operand" "=m,?r")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f,f")]
+        UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" "m,m"))
+   (use (match_operand:HI 3 "memory_operand" "m,m"))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "#"
+  [(set_attr "type" "fistp")
+   (set_attr "i387_cw" "ceil")
+   (set_attr "mode" "<MODE>")])
+
+(define_split 
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 4) (unspec:X87MODEI12 [(match_dup 1)]
+                                 UNSPEC_FIST_CEIL))
+             (use (match_dup 2))
+             (use (match_dup 3))])
+   (set (match_dup 0) (match_dup 4))]
+  "")
+
+(define_split 
+  [(set (match_operand:X87MODEI12 0 "memory_operand" "")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+        UNSPEC_FIST_CEIL))
+   (use (match_operand:HI 2 "memory_operand" ""))
+   (use (match_operand:HI 3 "memory_operand" ""))
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" ""))]
+  "reload_completed"
+  [(parallel [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
+                                 UNSPEC_FIST_CEIL))
+             (use (match_dup 2))
+             (use (match_dup 3))])]
+  "")
+
+(define_expand "lceil<mode>2"
+  [(parallel [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+                  (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
+                   UNSPEC_FIST_CEIL))
+             (clobber (reg:CC FLAGS_REG))])]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
+  "")
+
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_trunc"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+        UNSPEC_FRNDINT_TRUNC))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
 {
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
+  ix86_optimize_mode_switching[I387_TRUNC] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_TRUNC);
 
-  emit_insn (gen_frndintxf2_trunc (operands[0], operands[1], op2, op3));
+  emit_insn (gen_frndintxf2_trunc_i387 (operands[0], operands[1],
+                                       operands[2], operands[3]));
   DONE;
-})
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "trunc")
+   (set_attr "mode" "XF")])
 
-(define_insn "frndintxf2_mask_pm"
+(define_insn "frndintxf2_trunc_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-        UNSPEC_FRNDINT_MASK_PM))
+        UNSPEC_FRNDINT_TRUNC))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
-  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+  "fldcw\t%3\n\tfrndint\n\tfldcw\t%2"
   [(set_attr "type" "frndint")
-   (set_attr "i387_cw" "mask_pm")
+   (set_attr "i387_cw" "trunc")
    (set_attr "mode" "XF")])
 
-(define_expand "nearbyintdf2"
+(define_expand "btruncxf2"
+  [(use (match_operand:XF 0 "register_operand" ""))
+   (use (match_operand:XF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+{
+  emit_insn (gen_frndintxf2_trunc (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "btruncdf2"
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
 
   emit_insn (gen_extenddfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_mask_pm (op0, op1, op2, op3));
+  emit_insn (gen_frndintxf2_trunc (op0, op1));
 
   emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
   DONE;
 })
 
-(define_expand "nearbyintsf2"
+(define_expand "btruncsf2"
   [(use (match_operand:SF 0 "register_operand" ""))
    (use (match_operand:SF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
 
   emit_insn (gen_extendsfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_mask_pm (op0, op1, op2, op3));
+  emit_insn (gen_frndintxf2_trunc (op0, op1));
 
   emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
   DONE;
 })
 
+;; Rounding mode control word calculation could clobber FLAGS_REG.
+(define_insn_and_split "frndintxf2_mask_pm"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+        UNSPEC_FRNDINT_MASK_PM))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  ix86_optimize_mode_switching[I387_MASK_PM] = 1;
+
+  operands[2] = assign_386_stack_local (HImode, SLOT_CW_STORED);
+  operands[3] = assign_386_stack_local (HImode, SLOT_CW_MASK_PM);
+
+  emit_insn (gen_frndintxf2_mask_pm_i387 (operands[0], operands[1],
+                                         operands[2], operands[3]));
+  DONE;
+}
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "mask_pm")
+   (set_attr "mode" "XF")])
+
+(define_insn "frndintxf2_mask_pm_i387"
+  [(set (match_operand:XF 0 "register_operand" "=f")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+        UNSPEC_FRNDINT_MASK_PM))
+   (use (match_operand:HI 2 "memory_operand" "m"))
+   (use (match_operand:HI 3 "memory_operand" "m"))]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations"
+  "fldcw\t%3\n\tfrndint\n\tfclex\n\tfldcw\t%2"
+  [(set_attr "type" "frndint")
+   (set_attr "i387_cw" "mask_pm")
+   (set_attr "mode" "XF")])
+
 (define_expand "nearbyintxf2"
   [(use (match_operand:XF 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
 {
-  rtx op2 = assign_386_stack_local (HImode, 1);
-  rtx op3 = assign_386_stack_local (HImode, 2);
-       
-  ix86_optimize_mode_switching = 1;
+  emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1]));
+
+  DONE;
+})
+
+(define_expand "nearbyintdf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:DF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extenddfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
+
+  emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+  DONE;
+})
 
-  emit_insn (gen_frndintxf2_mask_pm (operands[0], operands[1],
-                                    op2, op3));
+(define_expand "nearbyintsf2"
+  [(use (match_operand:SF 0 "register_operand" ""))
+   (use (match_operand:SF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extendsfxf2 (op1, operands[1]));
+  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
+
+  emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
   DONE;
 })
 
    (use (match_operand:BLK 1 "memory_operand" ""))
    (use (match_operand:SI 2 "nonmemory_operand" ""))
    (use (match_operand:SI 3 "const_int_operand" ""))]
-  "! optimize_size"
+  "! optimize_size || TARGET_INLINE_ALL_STRINGOPS"
 {
  if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3]))
    DONE;
    (set_attr "memory" "both")
    (set_attr "mode" "SI")])
 
-(define_expand "clrmemsi"
+(define_expand "setmemsi"
    [(use (match_operand:BLK 0 "memory_operand" ""))
     (use (match_operand:SI 1 "nonmemory_operand" ""))
-    (use (match_operand 2 "const_int_operand" ""))]
+    (use (match_operand 2 "const_int_operand" ""))
+    (use (match_operand 3 "const_int_operand" ""))]
   ""
 {
- if (ix86_expand_clrmem (operands[0], operands[1], operands[2]))
+ /* If value to set is not zero, use the library routine.  */
+ if (operands[2] != const0_rtx)
+   FAIL;
+
+ if (ix86_expand_clrmem (operands[0], operands[1], operands[3]))
    DONE;
  else
    FAIL;
 })
 
-(define_expand "clrmemdi"
+(define_expand "setmemdi"
    [(use (match_operand:BLK 0 "memory_operand" ""))
     (use (match_operand:DI 1 "nonmemory_operand" ""))
-    (use (match_operand 2 "const_int_operand" ""))]
+    (use (match_operand 2 "const_int_operand" ""))
+    (use (match_operand 3 "const_int_operand" ""))]
   "TARGET_64BIT"
 {
- if (ix86_expand_clrmem (operands[0], operands[1], operands[2]))
+ /* If value to set is not zero, use the library routine.  */
+ if (operands[2] != const0_rtx)
+   FAIL;
+
+ if (ix86_expand_clrmem (operands[0], operands[1], operands[3]))
    DONE;
  else
    FAIL;
    (set_attr "memory" "store")
    (set_attr "mode" "QI")])
 
-(define_expand "cmpstrsi"
+(define_expand "cmpstrnsi"
   [(set (match_operand:SI 0 "register_operand" "")
        (compare:SI (match_operand:BLK 1 "general_operand" "")
                    (match_operand:BLK 2 "general_operand" "")))
          emit_move_insn (operands[0], const0_rtx);
          DONE;
        }
-      emit_insn (gen_cmpstrqi_nz_1 (addr1, addr2, countreg, align,
-                                   operands[1], operands[2]));
+      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
+                                    operands[1], operands[2]));
     }
   else
     {
        emit_insn (gen_cmpdi_1_rex64 (countreg, countreg));
       else
        emit_insn (gen_cmpsi_1 (countreg, countreg));
-      emit_insn (gen_cmpstrqi_1 (addr1, addr2, countreg, align,
-                                operands[1], operands[2]));
+      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
+                                 operands[1], operands[2]));
     }
 
   outlow = gen_lowpart (QImode, out);
 ;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
 ;; zero.  Emit extra code to make sure that a zero-length compare is EQ.
 
-(define_expand "cmpstrqi_nz_1"
+(define_expand "cmpstrnqi_nz_1"
   [(parallel [(set (reg:CC FLAGS_REG)
                   (compare:CC (match_operand 4 "memory_operand" "")
                               (match_operand 5 "memory_operand" "")))
   ""
   "")
 
-(define_insn "*cmpstrqi_nz_1"
+(define_insn "*cmpstrnqi_nz_1"
   [(set (reg:CC FLAGS_REG)
        (compare:CC (mem:BLK (match_operand:SI 4 "register_operand" "0"))
                    (mem:BLK (match_operand:SI 5 "register_operand" "1"))))
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
 
-(define_insn "*cmpstrqi_nz_rex_1"
+(define_insn "*cmpstrnqi_nz_rex_1"
   [(set (reg:CC FLAGS_REG)
        (compare:CC (mem:BLK (match_operand:DI 4 "register_operand" "0"))
                    (mem:BLK (match_operand:DI 5 "register_operand" "1"))))
 
 ;; The same, but the count is not known to not be zero.
 
-(define_expand "cmpstrqi_1"
+(define_expand "cmpstrnqi_1"
   [(parallel [(set (reg:CC FLAGS_REG)
                (if_then_else:CC (ne (match_operand 2 "register_operand" "")
                                     (const_int 0))
   ""
   "")
 
-(define_insn "*cmpstrqi_1"
+(define_insn "*cmpstrnqi_1"
   [(set (reg:CC FLAGS_REG)
        (if_then_else:CC (ne (match_operand:SI 6 "register_operand" "2")
                             (const_int 0))
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
 
-(define_insn "*cmpstrqi_rex_1"
+(define_insn "*cmpstrnqi_rex_1"
   [(set (reg:CC FLAGS_REG)
        (if_then_else:CC (ne (match_operand:DI 6 "register_operand" "2")
                             (const_int 0))
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
 
-;; Peephole optimizations to clean up after cmpstr*.  This should be
+;; Peephole optimizations to clean up after cmpstrn*.  This should be
 ;; handled in combine, but it is not currently up to the task.
-;; When used for their truth value, the cmpstr* expanders generate
+;; When used for their truth value, the cmpstrn* expanders generate
 ;; code like this:
 ;;
 ;;   repz cmpsb
 ;;
 ;; The intermediate three instructions are unnecessary.
 
-;; This one handles cmpstr*_nz_1...
+;; This one handles cmpstrn*_nz_1...
 (define_peephole2
   [(parallel[
      (set (reg:CC FLAGS_REG)
      (clobber (match_dup 2))])]
   "")
 
-;; ...and this one handles cmpstr*_1.
+;; ...and this one handles cmpstrn*_1.
 (define_peephole2
   [(parallel[
      (set (reg:CC FLAGS_REG)
    (set_attr "mode" "DI")
    (set_attr "length_immediate" "0")])
 
-(define_insn "movdicc_c_rex64"
+(define_insn "*movdicc_c_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
        (if_then_else:DI (match_operator 1 "ix86_comparison_operator" 
                                [(reg FLAGS_REG) (const_int 0)])
 (define_insn_and_split "*movqicc_noc"
   [(set (match_operand:QI 0 "register_operand" "=r,r")
        (if_then_else:QI (match_operator 1 "ix86_comparison_operator" 
-                               [(match_operand 4 "flags_reg_operand" "") (const_int 0)])
+                               [(match_operand 4 "flags_reg_operand" "")
+                                (const_int 0)])
                      (match_operand:QI 2 "register_operand" "r,0")
                      (match_operand:QI 3 "register_operand" "0,r")))]
   "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
        (if_then_else:SF (match_operand 1 "comparison_operator" "")
                         (match_operand:SF 2 "register_operand" "")
                         (match_operand:SF 3 "register_operand" "")))]
-  "TARGET_CMOVE"
+  "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
   "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
 
-(define_insn "*movsfcc_1"
-  [(set (match_operand:SF 0 "register_operand" "=f#r,f#r,r#f,r#f")
+(define_insn "*movsfcc_1_387"
+  [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
        (if_then_else:SF (match_operator 1 "fcmov_comparison_operator" 
                                [(reg FLAGS_REG) (const_int 0)])
-                     (match_operand:SF 2 "nonimmediate_operand" "f#r,0,rm#f,0")
-                     (match_operand:SF 3 "nonimmediate_operand" "0,f#r,0,rm#f")))]
-  "TARGET_CMOVE
+                     (match_operand:SF 2 "nonimmediate_operand" "f,0,rm,0")
+                     (match_operand:SF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_80387 && TARGET_CMOVE
    && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
   "@
    fcmov%F1\t{%2, %0|%0, %2}
        (if_then_else:DF (match_operand 1 "comparison_operator" "")
                         (match_operand:DF 2 "register_operand" "")
                         (match_operand:DF 3 "register_operand" "")))]
-  "TARGET_CMOVE"
+  "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
   "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
 
 (define_insn "*movdfcc_1"
-  [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,&r#f,&r#f")
+  [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
        (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" 
                                [(reg FLAGS_REG) (const_int 0)])
-                     (match_operand:DF 2 "nonimmediate_operand" "f#r,0,rm#f,0")
-                     (match_operand:DF 3 "nonimmediate_operand" "0,f#r,0,rm#f")))]
-  "!TARGET_64BIT && TARGET_CMOVE
+                     (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+                     (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
    && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
   "@
    fcmov%F1\t{%2, %0|%0, %2}
    (set_attr "mode" "DF")])
 
 (define_insn "*movdfcc_1_rex64"
-  [(set (match_operand:DF 0 "register_operand" "=f#r,f#r,r#f,r#f")
+  [(set (match_operand:DF 0 "register_operand" "=f,f,r,r")
        (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" 
                                [(reg FLAGS_REG) (const_int 0)])
-                     (match_operand:DF 2 "nonimmediate_operand" "f#r,0#r,rm#f,0#f")
-                     (match_operand:DF 3 "nonimmediate_operand" "0#r,f#r,0#f,rm#f")))]
-  "TARGET_64BIT && TARGET_CMOVE
+                     (match_operand:DF 2 "nonimmediate_operand" "f,0,rm,0")
+                     (match_operand:DF 3 "nonimmediate_operand" "0,f,0,rm")))]
+  "TARGET_64BIT && TARGET_80387 && TARGET_CMOVE
    && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
   "@
    fcmov%F1\t{%2, %0|%0, %2}
 (define_split
   [(set (match_operand:DF 0 "register_and_not_any_fp_reg_operand" "")
        (if_then_else:DF (match_operator 1 "fcmov_comparison_operator" 
-                               [(match_operand 4 "flags_reg_operand" "") (const_int 0)])
+                               [(match_operand 4 "flags_reg_operand" "")
+                                (const_int 0)])
                      (match_operand:DF 2 "nonimmediate_operand" "")
                      (match_operand:DF 3 "nonimmediate_operand" "")))]
   "!TARGET_64BIT && reload_completed"
        (if_then_else:XF (match_operand 1 "comparison_operator" "")
                         (match_operand:XF 2 "register_operand" "")
                         (match_operand:XF 3 "register_operand" "")))]
-  "TARGET_CMOVE"
+  "TARGET_80387 && TARGET_CMOVE"
   "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
 
 (define_insn "*movxfcc_1"
                                [(reg FLAGS_REG) (const_int 0)])
                      (match_operand:XF 2 "register_operand" "f,0")
                      (match_operand:XF 3 "register_operand" "0,f")))]
-  "TARGET_CMOVE"
+  "TARGET_80387 && TARGET_CMOVE"
   "@
    fcmov%F1\t{%2, %0|%0, %2}
    fcmov%f1\t{%3, %0|%0, %3}"
   [(set_attr "type" "fcmov")
    (set_attr "mode" "XF")])
 
-(define_expand "minsf3"
-  [(parallel [
-     (set (match_operand:SF 0 "register_operand" "")
-         (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "")
-                              (match_operand:SF 2 "nonimmediate_operand" ""))
-                          (match_dup 1)
-                          (match_dup 2)))
-     (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SSE"
-  "")
+;; These versions of the min/max patterns are intentionally ignorant of
+;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
+;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
+;; are undefined in this condition, we're certain this is correct.
 
-(define_insn "*minsf"
-  [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x")
-       (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0,0,f#x")
-                            (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0"))
-                        (match_dup 1)
-                        (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE && TARGET_IEEE_FP"
-  "#")
+(define_insn "sminsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
+                (match_operand:SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE_MATH"
+  "minss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
 
-(define_insn "*minsf_nonieee"
-  [(set (match_operand:SF 0 "register_operand" "=x#f,f#x")
-       (if_then_else:SF (lt (match_operand:SF 1 "nonimmediate_operand" "%0,0")
-                            (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x"))
-                        (match_dup 1)
-                        (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE && !TARGET_IEEE_FP
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "#")
+(define_insn "smaxsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
+                (match_operand:SF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE_MATH"
+  "maxss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
 
-(define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "")
-                            (match_operand:SF 2 "nonimmediate_operand" ""))
-                        (match_operand:SF 3 "register_operand" "")
-                        (match_operand:SF 4 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "SSE_REG_P (operands[0]) && reload_completed
-   && ((operands_match_p (operands[1], operands[3])
-       && operands_match_p (operands[2], operands[4]))
-       || (operands_match_p (operands[1], operands[4])
-          && operands_match_p (operands[2], operands[3])))"
-  [(set (match_dup 0)
-       (if_then_else:SF (lt (match_dup 1)
-                            (match_dup 2))
-                        (match_dup 1)
-                        (match_dup 2)))])
+(define_insn "smindf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
+                (match_operand:DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "minsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+(define_insn "smaxdf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
+                (match_operand:DF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "maxsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+;; These versions of the min/max patterns implement exactly the operations
+;;   min = (op1 < op2 ? op1 : op2)
+;;   max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_sminsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+                   (match_operand:SF 2 "nonimmediate_operand" "xm")]
+                  UNSPEC_IEEE_MIN))]
+  "TARGET_SSE_MATH"
+  "minss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smaxsf3"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+                   (match_operand:SF 2 "nonimmediate_operand" "xm")]
+                  UNSPEC_IEEE_MAX))]
+  "TARGET_SSE_MATH"
+  "maxss\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "SF")])
+
+(define_insn "*ieee_smindf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+                   (match_operand:DF 2 "nonimmediate_operand" "xm")]
+                  UNSPEC_IEEE_MIN))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "minsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+(define_insn "*ieee_smaxdf3"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (unspec:DF [(match_operand:DF 1 "register_operand" "0")
+                   (match_operand:DF 2 "nonimmediate_operand" "xm")]
+                  UNSPEC_IEEE_MAX))]
+  "TARGET_SSE2 && TARGET_SSE_MATH"
+  "maxsd\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "mode" "DF")])
+
+;; Make two stack loads independent:
+;;   fld aa              fld aa
+;;   fld %st(0)     ->   fld bb
+;;   fmul bb             fmul %st(1), %st
+;;
+;; Actually we only match the last two instructions for simplicity.
+(define_peephole2
+  [(set (match_operand 0 "fp_register_operand" "")
+       (match_operand 1 "fp_register_operand" ""))
+   (set (match_dup 0)
+       (match_operator 2 "binary_fp_operator"
+          [(match_dup 0)
+           (match_operand 3 "memory_operand" "")]))]
+  "REGNO (operands[0]) != REGNO (operands[1])"
+  [(set (match_dup 0) (match_dup 3))
+   (set (match_dup 0) (match_dup 4))]
+
+  ;; The % modifier is not operational anymore in peephole2's, so we have to
+  ;; swap the operands manually in the case of addition and multiplication.
+  "if (COMMUTATIVE_ARITH_P (operands[2]))
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+                                operands[0], operands[1]);
+   else
+     operands[4] = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+                                operands[1], operands[0]);")
 
 ;; Conditional addition patterns
 (define_expand "addqicc"
   "TARGET_64BIT"
   "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
 
-;; We can't represent the LT test directly.  Do this by swapping the operands.
-
-(define_split
-  [(set (match_operand:SF 0 "fp_register_operand" "")
-       (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "")
-                            (match_operand:SF 2 "register_operand" ""))
-                        (match_operand:SF 3 "register_operand" "")
-                        (match_operand:SF 4 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && ((operands_match_p (operands[1], operands[3])
-       && operands_match_p (operands[2], operands[4]))
-       || (operands_match_p (operands[1], operands[4])
-          && operands_match_p (operands[2], operands[3])))"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP (match_dup 2)
-                     (match_dup 1)))
-   (set (match_dup 0)
-       (if_then_else:SF (ge (reg:CCFP FLAGS_REG) (const_int 0))
-                        (match_dup 1)
-                        (match_dup 2)))])
-
-(define_insn "*minsf_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (if_then_else:SF (lt (match_operand:SF 1 "register_operand" "0")
-                            (match_operand:SF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                        (match_dup 2)))]
-  "TARGET_SSE && reload_completed"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-(define_expand "mindf3"
-  [(parallel [
-     (set (match_operand:DF 0 "register_operand" "")
-         (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "")
-                              (match_operand:DF 2 "nonimmediate_operand" ""))
-                          (match_dup 1)
-                          (match_dup 2)))
-     (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "#")
-
-(define_insn "*mindf"
-  [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y")
-       (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0,0,f#Y")
-                            (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0"))
-                        (match_dup 1)
-                        (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE2 && TARGET_IEEE_FP && TARGET_SSE_MATH"
-  "#")
-
-(define_insn "*mindf_nonieee"
-  [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y")
-       (if_then_else:DF (lt (match_operand:DF 1 "nonimmediate_operand" "%0,0")
-                            (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y"))
-                        (match_dup 1)
-                        (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "#")
-
-(define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "")
-                            (match_operand:DF 2 "nonimmediate_operand" ""))
-                        (match_operand:DF 3 "register_operand" "")
-                        (match_operand:DF 4 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "SSE_REG_P (operands[0]) && reload_completed
-   && ((operands_match_p (operands[1], operands[3])
-       && operands_match_p (operands[2], operands[4]))
-       || (operands_match_p (operands[1], operands[4])
-          && operands_match_p (operands[2], operands[3])))"
-  [(set (match_dup 0)
-       (if_then_else:DF (lt (match_dup 1)
-                            (match_dup 2))
-                        (match_dup 1)
-                        (match_dup 2)))])
-
-;; We can't represent the LT test directly.  Do this by swapping the operands.
-(define_split
-  [(set (match_operand:DF 0 "fp_register_operand" "")
-       (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "")
-                            (match_operand:DF 2 "register_operand" ""))
-                        (match_operand:DF 3 "register_operand" "")
-                        (match_operand:DF 4 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && ((operands_match_p (operands[1], operands[3])
-       && operands_match_p (operands[2], operands[4]))
-       || (operands_match_p (operands[1], operands[4])
-          && operands_match_p (operands[2], operands[3])))"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP (match_dup 2)
-                     (match_dup 1)))
-   (set (match_dup 0)
-       (if_then_else:DF (ge (reg:CCFP FLAGS_REG) (const_int 0))
-                        (match_dup 1)
-                        (match_dup 2)))])
-
-(define_insn "*mindf_sse"
-  [(set (match_operand:DF 0 "register_operand" "=Y")
-       (if_then_else:DF (lt (match_operand:DF 1 "register_operand" "0")
-                            (match_operand:DF 2 "nonimmediate_operand" "Ym"))
-                        (match_dup 1)
-                        (match_dup 2)))]
-  "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed"
-  "minsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "DF")])
-
-(define_expand "maxsf3"
-  [(parallel [
-     (set (match_operand:SF 0 "register_operand" "")
-         (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "")
-                              (match_operand:SF 2 "nonimmediate_operand" ""))
-                          (match_dup 1)
-                          (match_dup 2)))
-     (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SSE"
-  "#")
-
-(define_insn "*maxsf"
-  [(set (match_operand:SF 0 "register_operand" "=x#f,f#x,f#x")
-       (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0,0,f#x")
-                            (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x,0"))
-                        (match_dup 1)
-                        (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE && TARGET_IEEE_FP"
-  "#")
-
-(define_insn "*maxsf_nonieee"
-  [(set (match_operand:SF 0 "register_operand" "=x#f,f#x")
-       (if_then_else:SF (gt (match_operand:SF 1 "nonimmediate_operand" "%0,0")
-                            (match_operand:SF 2 "nonimmediate_operand" "xm#f,f#x"))
-                        (match_dup 1)
-                        (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE && !TARGET_IEEE_FP
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "#")
-
-(define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "")
-                            (match_operand:SF 2 "nonimmediate_operand" ""))
-                        (match_operand:SF 3 "register_operand" "")
-                        (match_operand:SF 4 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "SSE_REG_P (operands[0]) && reload_completed
-   && ((operands_match_p (operands[1], operands[3])
-       && operands_match_p (operands[2], operands[4]))
-       || (operands_match_p (operands[1], operands[4])
-          && operands_match_p (operands[2], operands[3])))"
-  [(set (match_dup 0)
-       (if_then_else:SF (gt (match_dup 1)
-                            (match_dup 2))
-                        (match_dup 1)
-                        (match_dup 2)))])
-
-(define_split
-  [(set (match_operand:SF 0 "fp_register_operand" "")
-       (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "")
-                            (match_operand:SF 2 "register_operand" ""))
-                        (match_operand:SF 3 "register_operand" "")
-                        (match_operand:SF 4 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && ((operands_match_p (operands[1], operands[3])
-       && operands_match_p (operands[2], operands[4]))
-       || (operands_match_p (operands[1], operands[4])
-          && operands_match_p (operands[2], operands[3])))"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP (match_dup 1)
-                     (match_dup 2)))
-   (set (match_dup 0)
-       (if_then_else:SF (gt (reg:CCFP FLAGS_REG) (const_int 0))
-                        (match_dup 1)
-                        (match_dup 2)))])
-
-(define_insn "*maxsf_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (if_then_else:SF (gt (match_operand:SF 1 "register_operand" "0")
-                            (match_operand:SF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                        (match_dup 2)))]
-  "TARGET_SSE && reload_completed"
-  "maxss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-(define_expand "maxdf3"
-  [(parallel [
-     (set (match_operand:DF 0 "register_operand" "")
-         (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "")
-                              (match_operand:DF 2 "nonimmediate_operand" ""))
-                          (match_dup 1)
-                          (match_dup 2)))
-     (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "#")
-
-(define_insn "*maxdf"
-  [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y,f#Y")
-       (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0,0,f#Y")
-                            (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y,0"))
-                        (match_dup 1)
-                        (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_IEEE_FP"
-  "#")
-
-(define_insn "*maxdf_nonieee"
-  [(set (match_operand:DF 0 "register_operand" "=Y#f,f#Y")
-       (if_then_else:DF (gt (match_operand:DF 1 "nonimmediate_operand" "%0,0")
-                            (match_operand:DF 2 "nonimmediate_operand" "Ym#f,f#Y"))
-                        (match_dup 1)
-                        (match_dup 2)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE2 && TARGET_SSE_MATH && !TARGET_IEEE_FP
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "#")
-
-(define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "")
-                            (match_operand:DF 2 "nonimmediate_operand" ""))
-                        (match_operand:DF 3 "register_operand" "")
-                        (match_operand:DF 4 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "SSE_REG_P (operands[0]) && reload_completed
-   && ((operands_match_p (operands[1], operands[3])
-       && operands_match_p (operands[2], operands[4]))
-       || (operands_match_p (operands[1], operands[4])
-          && operands_match_p (operands[2], operands[3])))"
-  [(set (match_dup 0)
-       (if_then_else:DF (gt (match_dup 1)
-                            (match_dup 2))
-                        (match_dup 1)
-                        (match_dup 2)))])
-
-(define_split
-  [(set (match_operand:DF 0 "fp_register_operand" "")
-       (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "")
-                            (match_operand:DF 2 "register_operand" ""))
-                        (match_operand:DF 3 "register_operand" "")
-                        (match_operand:DF 4 "register_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && ((operands_match_p (operands[1], operands[3])
-       && operands_match_p (operands[2], operands[4]))
-       || (operands_match_p (operands[1], operands[4])
-          && operands_match_p (operands[2], operands[3])))"
-  [(set (reg:CCFP FLAGS_REG)
-       (compare:CCFP (match_dup 1)
-                     (match_dup 2)))
-   (set (match_dup 0)
-       (if_then_else:DF (gt (reg:CCFP FLAGS_REG) (const_int 0))
-                        (match_dup 1)
-                        (match_dup 2)))])
-
-(define_insn "*maxdf_sse"
-  [(set (match_operand:DF 0 "register_operand" "=Y")
-       (if_then_else:DF (gt (match_operand:DF 1 "register_operand" "0")
-                            (match_operand:DF 2 "nonimmediate_operand" "Ym"))
-                        (match_dup 1)
-                        (match_dup 2)))]
-  "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed"
-  "maxsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "DF")])
 \f
 ;; Misc patterns (?)
 
       return "lea{l}\t{%a2, %0|%0, %a2}";
 
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set (attr "type")
       return "lea{q}\t{%a2, %0|%0, %a2}";
 
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set (attr "type")
       return "lea{q}\t{%a2, %0|%0, %a2}";
 
     default:
-      abort ();
+      gcc_unreachable ();
     }
 }
   [(set_attr "type" "alu,lea")
    (set_attr "mode" "DI")])
 
-;; Placeholder for the conditional moves.  This one is split either to SSE
-;; based moves emulation or to usual cmove sequence.  Little bit unfortunate
-;; fact is that compares supported by the cmp??ss instructions are exactly
-;; swapped of those supported by cmove sequence.
-;; The EQ/NE comparisons also needs bit care, since they are not directly
-;; supported by i387 comparisons and we do need to emit two conditional moves
-;; in tandem.
-
-(define_insn "sse_movsfcc"
-  [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?f#xr,?f#xr,?r#xf,?r#xf,?r#xf,?r#xf")
-       (if_then_else:SF (match_operator 1 "sse_comparison_operator"
-                       [(match_operand:SF 4 "nonimmediate_operand" "0#fx,x#fx,f#x,f#x,xm#f,xm#f,f#x,f#x,xm#f,xm#f")
-                        (match_operand:SF 5 "nonimmediate_operand" "xm#f,xm#f,f#x,f#x,x#f,x#f,f#x,f#x,x#f,x#f")])
-                     (match_operand:SF 2 "nonimmediate_operand" "x#fr,0#fr,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx,0#rx")
-                     (match_operand:SF 3 "nonimmediate_operand" "x#fr,x#fr,0#fx,f#fx,0#fx,f#fx,0#fx,rm#rx,0#rx,rm#rx")))
-   (clobber (match_scratch:SF 6 "=2,&4,X,X,X,X,X,X,X,X"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE
-   && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)
-   /* Avoid combine from being smart and converting min/max
-      instruction patterns into conditional moves.  */
-   && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT
-       && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE)
-       || !rtx_equal_p (operands[4], operands[2])
-       || !rtx_equal_p (operands[5], operands[3]))
-   && (!TARGET_IEEE_FP
-       || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))"
-  "#")
-
-(define_insn "sse_movsfcc_eq"
-  [(set (match_operand:SF 0 "register_operand" "=&x#rf,x#rf,?f#xr,?f#xr,?r#xf,?r#xf")
-       (if_then_else:SF (eq (match_operand:SF 3 "nonimmediate_operand" "%0#fx,x#fx,f#x,xm#f,f#x,xm#f")
-                            (match_operand:SF 4 "nonimmediate_operand" "xm#f,xm#f,f#x,x#f,f#x,x#f"))
-                     (match_operand:SF 1 "nonimmediate_operand" "x#fr,0#fr,0#fx,0#fx,0#rx,0#rx")
-                     (match_operand:SF 2 "nonimmediate_operand" "x#fr,x#fr,f#fx,f#fx,rm#rx,rm#rx")))
-   (clobber (match_scratch:SF 5 "=1,&3,X,X,X,X"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE
-   && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
-  "#")
-
-(define_insn "sse_movdfcc"
-  [(set (match_operand:DF 0 "register_operand" "=&Y#rf,Y#rf,?f#Yr,?f#Yr,?f#Yr,?f#Yr,?r#Yf,?r#Yf,?r#Yf,?r#Yf")
-       (if_then_else:DF (match_operator 1 "sse_comparison_operator"
-                       [(match_operand:DF 4 "nonimmediate_operand" "0#fY,Y#fY,f#Y,f#Y,Ym#f,Ym#f,f#Y,f#Y,Ym#f,Ym#f")
-                        (match_operand:DF 5 "nonimmediate_operand" "Ym#f,Ym#f,f#Y,f#Y,Y#f,Y#f,f#Y,f#Y,Y#f,Y#f")])
-                     (match_operand:DF 2 "nonimmediate_operand" "Y#fr,0#fr,f#fY,0#fY,f#fY,0#fY,rm#rY,0#rY,rm#rY,0#rY")
-                     (match_operand:DF 3 "nonimmediate_operand" "Y#fr,Y#fr,0#fY,f#fY,0#fY,f#fY,0#fY,rm#rY,0#rY,rm#rY")))
-   (clobber (match_scratch:DF 6 "=2,&4,X,X,X,X,X,X,X,X"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)
-   /* Avoid combine from being smart and converting min/max
-      instruction patterns into conditional moves.  */
-   && ((GET_CODE (operands[1]) != LT && GET_CODE (operands[1]) != GT
-       && GET_CODE (operands[1]) != UNLE && GET_CODE (operands[1]) != UNGE)
-       || !rtx_equal_p (operands[4], operands[2])
-       || !rtx_equal_p (operands[5], operands[3]))
-   && (!TARGET_IEEE_FP
-       || (GET_CODE (operands[1]) != EQ && GET_CODE (operands[1]) != NE))"
-  "#")
-
-(define_insn "sse_movdfcc_eq"
-  [(set (match_operand:DF 0 "register_operand" "=&Y#rf,Y#rf,?f#Yr,?f#Yr,?r#Yf,?r#Yf")
-       (if_then_else:DF (eq (match_operand:DF 3 "nonimmediate_operand" "%0#fY,Y#fY,f#Y,Ym#f,f#Y,Ym#f")
-                            (match_operand:DF 4 "nonimmediate_operand" "Ym#f,Ym#f,f#Y,Y#f,f#Y,Y#f"))
-                     (match_operand:DF 1 "nonimmediate_operand" "Y#fr,0#fr,0#fY,0#fY,0#rY,0#rY")
-                     (match_operand:DF 2 "nonimmediate_operand" "Y#fr,Y#fr,f#fY,f#fY,rm#rY,rm#rY")))
-   (clobber (match_scratch:DF 5 "=1,&3,X,X,X,X"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE
-   && (GET_CODE (operands[2]) != MEM || GET_CODE (operands[3]) != MEM)"
-  "#")
-
-;; For non-sse moves just expand the usual cmove sequence.
-(define_split
-  [(set (match_operand 0 "register_operand" "")
-       (if_then_else (match_operator 1 "comparison_operator"
-                       [(match_operand 4 "nonimmediate_operand" "")
-                        (match_operand 5 "register_operand" "")])
-                     (match_operand 2 "nonimmediate_operand" "")
-                     (match_operand 3 "nonimmediate_operand" "")))
-   (clobber (match_operand 6 "" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "!SSE_REG_P (operands[0]) && reload_completed
-   && (GET_MODE (operands[0]) == SFmode
-       || (TARGET_SSE2 && GET_MODE (operands[0]) == DFmode))"
-  [(const_int 0)]
+(define_expand "allocate_stack_worker"
+  [(match_operand:SI 0 "register_operand" "")]
+  "TARGET_STACK_PROBE"
 {
-   ix86_compare_op0 = operands[5];
-   ix86_compare_op1 = operands[4];
-   operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
-                                VOIDmode, operands[5], operands[4]);
-   ix86_expand_fp_movcc (operands);
-   DONE;
-})
-
-;; Split SSE based conditional move into sequence:
-;; cmpCC op0, op4   -  set op0 to 0 or ffffffff depending on the comparison
-;; and   op2, op0   -  zero op2 if comparison was false
-;; nand  op0, op3   -  load op3 to op0 if comparison was false
-;; or   op2, op0   -  get the nonzero one into the result.
-(define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (if_then_else:SF (match_operator:SF 1 "sse_comparison_operator"
-                          [(match_operand:SF 4 "register_operand" "")
-                           (match_operand:SF 5 "nonimmediate_operand" "")])
-                        (match_operand:SF 2 "register_operand" "")
-                        (match_operand:SF 3 "register_operand" "")))
-   (clobber (match_operand 6 "" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "SSE_REG_P (operands[0]) && reload_completed"
-  [(set (match_dup 4) (match_op_dup 1 [(match_dup 4) (match_dup 5)]))
-   (set (match_dup 2) (and:V4SF (match_dup 2)
-                               (match_dup 8)))
-   (set (match_dup 8) (and:V4SF (not:V4SF (match_dup 8))
-                                         (match_dup 3)))
-   (set (match_dup 0) (ior:V4SF (match_dup 6)
-                               (match_dup 7)))]
-{
-  /* If op2 == op3, op3 would be clobbered before it is used.  */
-  if (operands_match_p (operands[2], operands[3]))
-    {
-      emit_move_insn (operands[0], operands[2]);
-      DONE;
-    }
-
-  PUT_MODE (operands[1], GET_MODE (operands[0]));
-  if (operands_match_p (operands[0], operands[4]))
-    operands[6] = operands[4], operands[7] = operands[2];
-  else
-    operands[6] = operands[2], operands[7] = operands[4];
-  operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
-  operands[2] = simplify_gen_subreg (V4SFmode, operands[2], SFmode, 0);
-  operands[3] = simplify_gen_subreg (V4SFmode, operands[3], SFmode, 0);
-  operands[8] = simplify_gen_subreg (V4SFmode, operands[4], SFmode, 0);
-  operands[6] = simplify_gen_subreg (V4SFmode, operands[6], SFmode, 0);
-  operands[7] = simplify_gen_subreg (V4SFmode, operands[7], SFmode, 0);
-})
-
-(define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (if_then_else:DF (match_operator:DF 1 "sse_comparison_operator"
-                          [(match_operand:DF 4 "register_operand" "")
-                           (match_operand:DF 5 "nonimmediate_operand" "")])
-                        (match_operand:DF 2 "register_operand" "")
-                        (match_operand:DF 3 "register_operand" "")))
-   (clobber (match_operand 6 "" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "SSE_REG_P (operands[0]) && reload_completed"
-  [(set (match_dup 4) (match_op_dup 1 [(match_dup 4) (match_dup 5)]))
-   (set (match_dup 2) (and:V2DF (match_dup 2)
-                               (match_dup 8)))
-   (set (match_dup 8) (and:V2DF (not:V2DF (match_dup 8))
-                                         (match_dup 3)))
-   (set (match_dup 0) (ior:V2DF (match_dup 6)
-                               (match_dup 7)))]
-{
-  if (TARGET_SSE_SPLIT_REGS && !optimize_size)
-    {
-      rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0);
-      emit_insn (gen_sse2_unpcklpd (op, op, op));
-      op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0);
-      emit_insn (gen_sse2_unpcklpd (op, op, op));
-    }
-
-  /* If op2 == op3, op3 would be clobbered before it is used.  */
-  if (operands_match_p (operands[2], operands[3]))
-    {
-      emit_move_insn (operands[0], operands[2]);
-      DONE;
-    }
-
-  PUT_MODE (operands[1], GET_MODE (operands[0]));
-  if (operands_match_p (operands[0], operands[4]))
-    operands[6] = operands[4], operands[7] = operands[2];
-  else
-    operands[6] = operands[2], operands[7] = operands[4];
-  operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
-  operands[2] = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0);
-  operands[3] = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0);
-  operands[8] = simplify_gen_subreg (V2DFmode, operands[4], DFmode, 0);
-  operands[6] = simplify_gen_subreg (V2DFmode, operands[6], DFmode, 0);
-  operands[7] = simplify_gen_subreg (V2DFmode, operands[7], DFmode, 0);
-})
-
-;; Special case of conditional move we can handle effectively.
-;; Do not brother with the integer/floating point case, since these are
-;; bot considerably slower, unlike in the generic case.
-(define_insn "*sse_movsfcc_const0_1"
-  [(set (match_operand:SF 0 "register_operand" "=&x")
-       (if_then_else:SF (match_operator 1 "sse_comparison_operator"
-                       [(match_operand:SF 4 "register_operand" "0")
-                        (match_operand:SF 5 "nonimmediate_operand" "xm")])
-                     (match_operand:SF 2 "register_operand" "x")
-                     (match_operand:SF 3 "const0_operand" "X")))]
-  "TARGET_SSE"
-  "#")
-
-(define_insn "*sse_movsfcc_const0_2"
-  [(set (match_operand:SF 0 "register_operand" "=&x")
-       (if_then_else:SF (match_operator 1 "sse_comparison_operator"
-                       [(match_operand:SF 4 "register_operand" "0")
-                        (match_operand:SF 5 "nonimmediate_operand" "xm")])
-                     (match_operand:SF 2 "const0_operand" "X")
-                     (match_operand:SF 3 "register_operand" "x")))]
-  "TARGET_SSE"
-  "#")
-
-(define_insn "*sse_movsfcc_const0_3"
-  [(set (match_operand:SF 0 "register_operand" "=&x")
-       (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
-                       [(match_operand:SF 4 "nonimmediate_operand" "xm")
-                        (match_operand:SF 5 "register_operand" "0")])
-                     (match_operand:SF 2 "register_operand" "x")
-                     (match_operand:SF 3 "const0_operand" "X")))]
-  "TARGET_SSE"
-  "#")
-
-(define_insn "*sse_movsfcc_const0_4"
-  [(set (match_operand:SF 0 "register_operand" "=&x")
-       (if_then_else:SF (match_operator 1 "fcmov_comparison_operator"
-                       [(match_operand:SF 4 "nonimmediate_operand" "xm")
-                        (match_operand:SF 5 "register_operand" "0")])
-                     (match_operand:SF 2 "const0_operand" "X")
-                     (match_operand:SF 3 "register_operand" "x")))]
-  "TARGET_SSE"
-  "#")
-
-(define_insn "*sse_movdfcc_const0_1"
-  [(set (match_operand:DF 0 "register_operand" "=&Y")
-       (if_then_else:DF (match_operator 1 "sse_comparison_operator"
-                       [(match_operand:DF 4 "register_operand" "0")
-                        (match_operand:DF 5 "nonimmediate_operand" "Ym")])
-                     (match_operand:DF 2 "register_operand" "Y")
-                     (match_operand:DF 3 "const0_operand" "X")))]
-  "TARGET_SSE2"
-  "#")
-
-(define_insn "*sse_movdfcc_const0_2"
-  [(set (match_operand:DF 0 "register_operand" "=&Y")
-       (if_then_else:DF (match_operator 1 "sse_comparison_operator"
-                       [(match_operand:DF 4 "register_operand" "0")
-                        (match_operand:DF 5 "nonimmediate_operand" "Ym")])
-                     (match_operand:DF 2 "const0_operand" "X")
-                     (match_operand:DF 3 "register_operand" "Y")))]
-  "TARGET_SSE2"
-  "#")
-
-(define_insn "*sse_movdfcc_const0_3"
-  [(set (match_operand:DF 0 "register_operand" "=&Y")
-       (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
-                       [(match_operand:DF 4 "nonimmediate_operand" "Ym")
-                        (match_operand:DF 5 "register_operand" "0")])
-                     (match_operand:DF 2 "register_operand" "Y")
-                     (match_operand:DF 3 "const0_operand" "X")))]
-  "TARGET_SSE2"
-  "#")
-
-(define_insn "*sse_movdfcc_const0_4"
-  [(set (match_operand:DF 0 "register_operand" "=&Y")
-       (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
-                       [(match_operand:DF 4 "nonimmediate_operand" "Ym")
-                        (match_operand:DF 5 "register_operand" "0")])
-                     (match_operand:DF 2 "const0_operand" "X")
-                     (match_operand:DF 3 "register_operand" "Y")))]
-  "TARGET_SSE2"
-  "#")
-
-(define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (if_then_else:SF (match_operator 1 "comparison_operator"
-                          [(match_operand:SF 4 "nonimmediate_operand" "")
-                           (match_operand:SF 5 "nonimmediate_operand" "")])
-                        (match_operand:SF 2 "nonmemory_operand" "")
-                        (match_operand:SF 3 "nonmemory_operand" "")))]
-  "SSE_REG_P (operands[0]) && reload_completed
-   && (const0_operand (operands[2], GET_MODE (operands[0]))
-       || const0_operand (operands[3], GET_MODE (operands[0])))"
-  [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)]))
-   (set (match_dup 8) (and:V4SF (match_dup 6) (match_dup 7)))]
-{
-  PUT_MODE (operands[1], GET_MODE (operands[0]));
-  if (!sse_comparison_operator (operands[1], VOIDmode)
-      || !rtx_equal_p (operands[0], operands[4]))
-    {
-      rtx tmp = operands[5];
-      operands[5] = operands[4];
-      operands[4] = tmp;
-      PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1])));
-    }
-  if (!rtx_equal_p (operands[0], operands[4]))
-    abort ();
-  operands[8] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
-  if (const0_operand (operands[2], GET_MODE (operands[2])))
-    {
-      operands[7] = operands[3];
-      operands[6] = gen_rtx_NOT (V4SFmode, operands[8]);
-    }
-  else
-    {
-      operands[7] = operands[2];
-      operands[6] = operands[8];
-    }
-  operands[7] = simplify_gen_subreg (V4SFmode, operands[7], SFmode, 0);
-})
-
-(define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (if_then_else:DF (match_operator 1 "comparison_operator"
-                          [(match_operand:DF 4 "nonimmediate_operand" "")
-                           (match_operand:DF 5 "nonimmediate_operand" "")])
-                        (match_operand:DF 2 "nonmemory_operand" "")
-                        (match_operand:DF 3 "nonmemory_operand" "")))]
-  "SSE_REG_P (operands[0]) && reload_completed
-   && (const0_operand (operands[2], GET_MODE (operands[0]))
-       || const0_operand (operands[3], GET_MODE (operands[0])))"
-  [(set (match_dup 0) (match_op_dup 1 [(match_dup 0) (match_dup 5)]))
-   (set (match_dup 8) (and:V2DF (match_dup 6) (match_dup 7)))]
-{
-  if (TARGET_SSE_SPLIT_REGS && !optimize_size)
-    {
-      if (REG_P (operands[2]))
-       {
-         rtx op = simplify_gen_subreg (V2DFmode, operands[2], DFmode, 0);
-         emit_insn (gen_sse2_unpcklpd (op, op, op));
-       }
-      if (REG_P (operands[3]))
-       {
-         rtx op = simplify_gen_subreg (V2DFmode, operands[3], DFmode, 0);
-         emit_insn (gen_sse2_unpcklpd (op, op, op));
-       }
-    }
-  PUT_MODE (operands[1], GET_MODE (operands[0]));
-  if (!sse_comparison_operator (operands[1], VOIDmode)
-      || !rtx_equal_p (operands[0], operands[4]))
-    {
-      rtx tmp = operands[5];
-      operands[5] = operands[4];
-      operands[4] = tmp;
-      PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1])));
-    }
-  if (!rtx_equal_p (operands[0], operands[4]))
-    abort ();
-  operands[8] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
-  if (const0_operand (operands[2], GET_MODE (operands[2])))
-    {
-      operands[7] = operands[3];
-      operands[6] = gen_rtx_NOT (V2DFmode, operands[8]);
-    }
-  else
-    {
-      operands[7] = operands[2];
-      operands[6] = operands[8];
-    }
-  operands[7] = simplify_gen_subreg (V2DFmode, operands[7], DFmode, 0);
-})
-
-(define_expand "allocate_stack_worker"
-  [(match_operand:SI 0 "register_operand" "")]
-  "TARGET_STACK_PROBE"
-{
-  if (reload_completed)
+  if (reload_completed)
     {
       if (TARGET_64BIT)
        emit_insn (gen_allocate_stack_worker_rex64_postreload (operands[0]));
   [(label_ref (match_operand 0 "" ""))]
   "!TARGET_64BIT && flag_pic"
 {
-  emit_insn (gen_set_got (pic_offset_table_rtx));
+  if (TARGET_MACHO)
+    {
+      rtx xops[3];
+      rtx picreg = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+      rtx label_rtx = gen_label_rtx ();
+      emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
+      xops[0] = xops[1] = picreg;
+      xops[2] = gen_rtx_CONST (SImode,
+                 gen_rtx_MINUS (SImode,
+                   gen_rtx_LABEL_REF (SImode, label_rtx),
+                   gen_rtx_SYMBOL_REF (SImode, GOT_SYMBOL_NAME)));
+      ix86_expand_binary_operator (MINUS, SImode, xops);
+    }
+  else
+    emit_insn (gen_set_got (pic_offset_table_rtx));
   DONE;
 })
 \f
   "! TARGET_PARTIAL_REG_STALL && reload_completed
    && ((GET_MODE (operands[0]) == HImode 
        && ((!optimize_size && !TARGET_FAST_PREFIX)
+            /* ??? next two lines just !satisfies_constraint_K (...) */
            || GET_CODE (operands[2]) != CONST_INT
-           || CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')))
+           || satisfies_constraint_K (operands[2])))
        || (GET_MODE (operands[0]) == QImode 
           && (TARGET_PROMOTE_QImode || optimize_size)))"
   [(parallel [(set (match_dup 0)
   [(set (match_operand:SI 0 "push_operand" "")
        (match_operand:SI 1 "memory_operand" ""))
    (match_scratch:SI 2 "r")]
-  "! optimize_size && ! TARGET_PUSH_MEMORY"
+  "!optimize_size && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
   [(set (match_operand:DI 0 "push_operand" "")
        (match_operand:DI 1 "memory_operand" ""))
    (match_scratch:DI 2 "r")]
-  "! optimize_size && ! TARGET_PUSH_MEMORY"
+  "!optimize_size && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
   [(set (match_operand:SF 0 "push_operand" "")
        (match_operand:SF 1 "memory_operand" ""))
    (match_scratch:SF 2 "r")]
-  "! optimize_size && ! TARGET_PUSH_MEMORY"
+  "!optimize_size && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
   [(set (match_operand:HI 0 "push_operand" "")
        (match_operand:HI 1 "memory_operand" ""))
    (match_scratch:HI 2 "r")]
-  "! optimize_size && ! TARGET_PUSH_MEMORY"
+  "!optimize_size && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
   [(set (match_operand:QI 0 "push_operand" "")
        (match_operand:QI 1 "memory_operand" ""))
    (match_scratch:QI 2 "q")]
-  "! optimize_size && ! TARGET_PUSH_MEMORY"
+  "!optimize_size && !TARGET_PUSH_MEMORY
+   && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
           (const_int 0)]))]
   "ix86_match_ccmode (insn, CCNOmode)
    && (true_regnum (operands[2]) != 0
-       || (GET_CODE (operands[3]) == CONST_INT
-          && CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'K')))
+       || satisfies_constraint_K (operands[3]))
    && peep2_reg_dead_p (1, operands[2])"
   [(parallel
      [(set (match_dup 0)
 ;; Attempt to always use XOR for zeroing registers.
 (define_peephole2
   [(set (match_operand 0 "register_operand" "")
-       (const_int 0))]
-  "(GET_MODE (operands[0]) == QImode
-    || GET_MODE (operands[0]) == HImode
-    || GET_MODE (operands[0]) == SImode
-    || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
+       (match_operand 1 "const0_operand" ""))]
+  "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
    && (! TARGET_USE_MOV0 || optimize_size)
+   && GENERAL_REG_P (operands[0])
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (const_int 0))
              (clobber (reg:CC FLAGS_REG))])]
-  "operands[0] = gen_lowpart (GET_MODE (operands[0]) == DImode ? DImode : SImode,
-                             operands[0]);")
+{
+  operands[0] = gen_lowpart (word_mode, operands[0]);
+})
 
 (define_peephole2
   [(set (strict_low_part (match_operand 0 "register_operand" ""))
                   (mult:DI (match_operand:DI 1 "memory_operand" "")
                            (match_operand:DI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_K8 && !optimize_size
-   && (GET_CODE (operands[2]) != CONST_INT
-       || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
+  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+   && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
              (clobber (reg:CC FLAGS_REG))])]
                   (mult:SI (match_operand:SI 1 "memory_operand" "")
                            (match_operand:SI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_K8 && !optimize_size
-   && (GET_CODE (operands[2]) != CONST_INT
-       || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
+  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+   && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
              (clobber (reg:CC FLAGS_REG))])]
                     (mult:SI (match_operand:SI 1 "memory_operand" "")
                              (match_operand:SI 2 "immediate_operand" ""))))
              (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_K8 && !optimize_size
-   && (GET_CODE (operands[2]) != CONST_INT
-       || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
+  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+   && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
              (clobber (reg:CC FLAGS_REG))])]
                            (match_operand:DI 2 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:DI 3 "r")]
-  "TARGET_K8 && !optimize_size
-   && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
+  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+   && satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
              (clobber (reg:CC FLAGS_REG))])]
                            (match_operand:SI 2 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:SI 3 "r")]
-  "TARGET_K8 && !optimize_size
-   && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
+  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+   && satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
              (clobber (reg:CC FLAGS_REG))])]
                            (match_operand:HI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:HI 3 "r")]
-  "TARGET_K8 && !optimize_size"
+  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
              (clobber (reg:CC FLAGS_REG))])]
   if (!rtx_equal_p (operands[0], operands[1]))
     emit_move_insn (operands[0], operands[1]);
 })
+
+;; After splitting up read-modify operations, array accesses with memory
+;; operands might end up in form:
+;;  sall    $2, %eax
+;;  movl    4(%esp), %edx
+;;  addl    %edx, %eax
+;; instead of pre-splitting:
+;;  sall    $2, %eax
+;;  addl    4(%esp), %eax
+;; Turn it into:
+;;  movl    4(%esp), %edx
+;;  leal    (%edx,%eax,4), %eax
+
+(define_peephole2
+  [(parallel [(set (match_operand 0 "register_operand" "")
+                  (ashift (match_operand 1 "register_operand" "")
+                          (match_operand 2 "const_int_operand" "")))
+              (clobber (reg:CC FLAGS_REG))])
+   (set (match_operand 3 "register_operand")
+        (match_operand 4 "x86_64_general_operand" ""))
+   (parallel [(set (match_operand 5 "register_operand" "")
+                  (plus (match_operand 6 "register_operand" "")
+                        (match_operand 7 "register_operand" "")))
+                  (clobber (reg:CC FLAGS_REG))])]
+  "INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) <= 3
+   /* Validate MODE for lea.  */
+   && ((!TARGET_PARTIAL_REG_STALL
+       && (GET_MODE (operands[0]) == QImode
+           || GET_MODE (operands[0]) == HImode))
+       || GET_MODE (operands[0]) == SImode 
+       || (TARGET_64BIT && GET_MODE (operands[0]) == DImode))
+   /* We reorder load and the shift.  */
+   && !rtx_equal_p (operands[1], operands[3])
+   && !reg_overlap_mentioned_p (operands[0], operands[4])
+   /* Last PLUS must consist of operand 0 and 3.  */
+   && !rtx_equal_p (operands[0], operands[3])
+   && (rtx_equal_p (operands[3], operands[6])
+       || rtx_equal_p (operands[3], operands[7]))
+   && (rtx_equal_p (operands[0], operands[6])
+       || rtx_equal_p (operands[0], operands[7]))
+   /* The intermediate operand 0 must die or be same as output.  */
+   && (rtx_equal_p (operands[0], operands[5])
+       || peep2_reg_dead_p (3, operands[0]))"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (match_dup 1))]
+{
+  enum machine_mode mode = GET_MODE (operands[5]) == DImode ? DImode : SImode;
+  int scale = 1 << INTVAL (operands[2]);
+  rtx index = gen_lowpart (Pmode, operands[1]);
+  rtx base = gen_lowpart (Pmode, operands[3]);
+  rtx dest = gen_lowpart (mode, operands[5]);
+
+  operands[1] = gen_rtx_PLUS (Pmode, base,
+                             gen_rtx_MULT (Pmode, index, GEN_INT (scale)));
+  if (mode != Pmode)
+    operands[1] = gen_rtx_SUBREG (mode, operands[1], 0);
+  operands[0] = dest;
+})
 \f
 ;; Call-value patterns last so that the wildcard operand does not
 ;; disrupt insn-recog's switch tables.
   "jmp\t*%%r11"
   [(set_attr "type" "callv")])
 \f
+;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
+;; That, however, is usually mapped by the OS to SIGSEGV, which is often 
+;; caught for use by garbage collectors and the like.  Using an insn that
+;; maps to SIGILL makes it more likely the program will rightfully die.
+;; Keeping with tradition, "6" is in honor of #UD.
 (define_insn "trap"
-  [(trap_if (const_int 1) (const_int 5))]
+  [(trap_if (const_int 1) (const_int 6))]
   ""
-  "int\t$5")
-
-;;; ix86 doesn't have conditional trap instructions, but we fake them
-;;; for the sake of bounds checking.  By emitting bounds checks as
-;;; conditional traps rather than as conditional jumps around
-;;; unconditional traps we avoid introducing spurious basic-block
-;;; boundaries and facilitate elimination of redundant checks.  In
-;;; honor of the too-inflexible-for-BPs `bound' instruction, we use
-;;; interrupt 5.
-;;; 
-;;; FIXME: Static branch prediction rules for ix86 are such that
-;;; forward conditional branches predict as untaken.  As implemented
-;;; below, pseudo conditional traps violate that rule.  We should use
-;;; .pushsection/.popsection to place all of the `int 5's in a special
-;;; section loaded at the end of the text segment and branch forward
-;;; there on bounds-failure, and then jump back immediately (in case
-;;; the system chooses to ignore bounds violations, or to report
-;;; violations and continue execution).
-
-(define_expand "conditional_trap"
-  [(trap_if (match_operator 0 "comparison_operator"
-            [(match_dup 2) (const_int 0)])
-           (match_operand 1 "const_int_operand" ""))]
-  ""
-{
-  emit_insn (gen_rtx_TRAP_IF (VOIDmode,
-                             ix86_expand_compare (GET_CODE (operands[0]),
-                                                  NULL, NULL),
-                             operands[1]));
-  DONE;
-})
+  { return ASM_SHORT "0x0b0f"; }
+  [(set_attr "length" "2")])
 
-(define_insn "*conditional_trap_1"
-  [(trap_if (match_operator 0 "comparison_operator"
-            [(reg FLAGS_REG) (const_int 0)])
-           (match_operand 1 "const_int_operand" ""))]
-  ""
+(define_expand "sse_prologue_save"
+  [(parallel [(set (match_operand:BLK 0 "" "")
+                  (unspec:BLK [(reg:DI 21)
+                               (reg:DI 22)
+                               (reg:DI 23)
+                               (reg:DI 24)
+                               (reg:DI 25)
+                               (reg:DI 26)
+                               (reg:DI 27)
+                               (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+             (use (match_operand:DI 1 "register_operand" ""))
+             (use (match_operand:DI 2 "immediate_operand" ""))
+             (use (label_ref:DI (match_operand 3 "" "")))])]
+  "TARGET_64BIT"
+  "")
+
+(define_insn "*sse_prologue_save_insn"
+  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
+                         (match_operand:DI 4 "const_int_operand" "n")))
+       (unspec:BLK [(reg:DI 21)
+                    (reg:DI 22)
+                    (reg:DI 23)
+                    (reg:DI 24)
+                    (reg:DI 25)
+                    (reg:DI 26)
+                    (reg:DI 27)
+                    (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
+   (use (match_operand:DI 1 "register_operand" "r"))
+   (use (match_operand:DI 2 "const_int_operand" "i"))
+   (use (label_ref:DI (match_operand 3 "" "X")))]
+  "TARGET_64BIT
+   && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128
+   && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
+  "*
 {
-  operands[2] = gen_label_rtx ();
-  output_asm_insn ("j%c0\t%l2\; int\t%1", operands);
-  (*targetm.asm_out.internal_label) (asm_out_file, "L",
-                            CODE_LABEL_NUMBER (operands[2]));
+  int i;
+  operands[0] = gen_rtx_MEM (Pmode,
+                            gen_rtx_PLUS (Pmode, operands[0], operands[4]));
+  output_asm_insn (\"jmp\\t%A1\", operands);
+  for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
+    {
+      operands[4] = adjust_address (operands[0], DImode, i*16);
+      operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
+      PUT_MODE (operands[4], TImode);
+      if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
+        output_asm_insn (\"rex\", operands);
+      output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands);
+    }
+  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
+                            CODE_LABEL_NUMBER (operands[3]));
   RET;
-})
-
-       ;; Pentium III SIMD instructions.
-
-;; Moves for SSE/MMX regs.
+}
+  "
+  [(set_attr "type" "other")
+   (set_attr "length_immediate" "0")
+   (set_attr "length_address" "0")
+   (set_attr "length" "135")
+   (set_attr "memory" "store")
+   (set_attr "modrm" "0")
+   (set_attr "mode" "DI")])
 
-(define_expand "movv4sf"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
-       (match_operand:V4SF 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
+(define_expand "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "")
+            (match_operand:SI 1 "const_int_operand" "")
+            (match_operand:SI 2 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE || TARGET_3DNOW"
 {
-  ix86_expand_vector_move (V4SFmode, operands);
-  DONE;
-})
+  int rw = INTVAL (operands[1]);
+  int locality = INTVAL (operands[2]);
 
-(define_insn "*movv4sf_internal"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
-       (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
-  "TARGET_SSE"
-  "@
-    xorps\t%0, %0
-    movaps\t{%1, %0|%0, %1}
-    movaps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V4SF")])
+  gcc_assert (rw == 0 || rw == 1);
+  gcc_assert (locality >= 0 && locality <= 3);
+  gcc_assert (GET_MODE (operands[0]) == Pmode
+             || GET_MODE (operands[0]) == VOIDmode);
 
-(define_split
-  [(set (match_operand:V4SF 0 "register_operand" "")
-       (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
-  "TARGET_SSE && reload_completed"
-  [(set (match_dup 0)
-       (vec_merge:V4SF
-        (vec_duplicate:V4SF (match_dup 1))
-        (match_dup 2)
-        (const_int 1)))]
-{
-  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
-  operands[2] = CONST0_RTX (V4SFmode);
+  /* Use 3dNOW prefetch in case we are asking for write prefetch not
+     supported by SSE counterpart or the SSE prefetch is not available
+     (K6 machines).  Otherwise use SSE prefetch as it allows specifying
+     of locality.  */
+  if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
+    operands[2] = GEN_INT (3);
+  else
+    operands[1] = const0_rtx;
 })
 
-(define_expand "movv2df"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
-       (match_operand:V2DF 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
+(define_insn "*prefetch_sse"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+            (const_int 0)
+            (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE && !TARGET_64BIT"
 {
-  ix86_expand_vector_move (V2DFmode, operands);
-  DONE;
-})
+  static const char * const patterns[4] = {
+   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+  };
 
-(define_insn "*movv2df_internal"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
-       (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
-  "TARGET_SSE
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "xorpd\t%0, %0";
-    case 1:
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movapd\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
+  int locality = INTVAL (operands[1]);
+  gcc_assert (locality >= 0 && locality <= 3);
+
+  return patterns[locality];  
 }
-  [(set_attr "type" "ssemov")
-   (set (attr "mode")
-        (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
-                (const_string "V4SF")
-              (eq_attr "alternative" "0,1")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "V2DF"))
-              (eq_attr "alternative" "2")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "V2DF"))]
-              (const_string "V2DF")))])
+  [(set_attr "type" "sse")
+   (set_attr "memory" "none")])
 
-(define_split
-  [(set (match_operand:V2DF 0 "register_operand" "")
-       (match_operand:V2DF 1 "zero_extended_scalar_load_operand" ""))]
-  "TARGET_SSE2 && reload_completed"
-  [(set (match_dup 0)
-       (vec_merge:V2DF
-        (vec_duplicate:V2DF (match_dup 1))
-        (match_dup 2)
-        (const_int 1)))]
+(define_insn "*prefetch_sse_rex"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+            (const_int 0)
+            (match_operand:SI 1 "const_int_operand" ""))]
+  "TARGET_PREFETCH_SSE && TARGET_64BIT"
 {
-  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
-  operands[2] = CONST0_RTX (V2DFmode);
-})
+  static const char * const patterns[4] = {
+   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
+  };
 
-;; 16 byte integral modes handled by SSE, minus TImode, which gets
-;; special-cased for TARGET_64BIT.
-(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
+  int locality = INTVAL (operands[1]);
+  gcc_assert (locality >= 0 && locality <= 3);
 
-(define_expand "mov<mode>"
-  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
-       (match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_move (<MODE>mode, operands);
-  DONE;
-})
+  return patterns[locality];  
+}
+  [(set_attr "type" "sse")
+   (set_attr "memory" "none")])
 
-(define_insn "*mov<mode>_internal"
-  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
-       (match_operand:SSEMODEI 1 "vector_move_operand"  "C ,xm,x"))]
-  "TARGET_SSE
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 1:
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "ssemov")
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "0,1")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "2")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "TI")))])
-
-;; 8 byte integral modes handled by MMX (and by extension, SSE)
-(define_mode_macro MMXMODEI [V8QI V4HI V2SI])
-
-(define_expand "mov<mode>"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
-       (match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
-  "TARGET_MMX"
-{
-  ix86_expand_vector_move (<MODE>mode, operands);
-  DONE;
-})
-
-(define_insn "*mov<mode>_internal_rex64"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
-                               "=rm,r,*y,*y ,m ,*y,Y ,x,x ,m,r,x")
-       (match_operand:MMXMODEI 1 "vector_move_operand"
-                               "Cr ,m,C ,*ym,*y,Y ,*y,C,xm,x,x,r"))]
-  "TARGET_64BIT && TARGET_MMX
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "@
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movdq2q\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov,imov,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov")
-   (set_attr "mode" "DI")])
-
-(define_insn "*mov<mode>_internal"
-  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
-                                       "=*y,*y ,m ,*y,*Y,*x,*x ,m")
-       (match_operand:MMXMODEI 1 "vector_move_operand"
-                                       "C  ,*ym,*y,*Y,*y,C ,*xm,*x"))]
-  "TARGET_MMX
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "@
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movdq2q\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov")
-   (set_attr "mode" "DI")])
-
-(define_expand "movv2sf"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
-       (match_operand:V2SF 1 "nonimmediate_operand" ""))]
-  "TARGET_MMX"
-{
-  ix86_expand_vector_move (V2SFmode, operands);
-  DONE;
-})
-
-(define_insn "*movv2sf_internal_rex64"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand"
-                               "=rm,r,*y ,*y ,m ,*y,Y ,x,x ,m,r,x")
-        (match_operand:V2SF 1 "vector_move_operand"
-                               "Cr ,m ,C ,*ym,*y,Y ,*y,C,xm,x,x,r"))]
-  "TARGET_64BIT && TARGET_MMX
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "@
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movdq2q\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    xorps\t%0, %0
-    movlps\t{%1, %0|%0, %1}
-    movlps\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov,imov,mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov,ssemov,ssemov")
-   (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V2SF,V2SF,DI,DI")])
-
-(define_insn "*movv2sf_internal"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand"
-                                       "=*y,*y ,m,*y,*Y,*x,*x ,m")
-        (match_operand:V2SF 1 "vector_move_operand"
-                                       "C ,*ym,*y,*Y,*y,C ,*xm,*x"))]
-  "TARGET_MMX
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "@
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movdq2q\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    xorps\t%0, %0
-    movlps\t{%1, %0|%0, %1}
-    movlps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxmov,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,ssemov,ssemov")
-   (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V2SF,V2SF")])
-
-(define_expand "movti"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-       (match_operand:TI 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE || TARGET_64BIT"
-{
-  if (TARGET_64BIT)
-    ix86_expand_move (TImode, operands);
-  else
-    ix86_expand_vector_move (TImode, operands);
-  DONE;
-})
-
-(define_insn "*movti_internal"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
-       (match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
-  "TARGET_SSE && !TARGET_64BIT
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 1:
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "ssemov,ssemov,ssemov")
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "0,1")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "2")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "TI")))])
-
-(define_insn "*movti_rex64"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
-       (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
-  "TARGET_64BIT
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return "#";
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 3:
-    case 4:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "2,3")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "4")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "DI")))])
-
-(define_expand "movtf"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-       (match_operand:TF 1 "nonimmediate_operand" ""))]
-  "TARGET_64BIT"
-{
-  ix86_expand_move (TFmode, operands);
-  DONE;
-})
-
-(define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm")
-       (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))]
-  "TARGET_64BIT
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-{
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-      return "#";
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 3:
-    case 4:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      abort ();
-    }
-}
-  [(set_attr "type" "*,*,ssemov,ssemov,ssemov")
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "2,3")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "4")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "DI")))])
-
-(define_mode_macro SSEPUSH [V16QI V8HI V4SI V2DI TI V4SF V2DF])
-
-(define_insn "*push<mode>"
-  [(set (match_operand:SSEPUSH 0 "push_operand" "=<")
-       (match_operand:SSEPUSH 1 "register_operand" "x"))]
-  "TARGET_SSE"
-  "#")
-
-(define_mode_macro MMXPUSH [V8QI V4HI V2SI V2SF])
-
-(define_insn "*push<mode>"
-  [(set (match_operand:MMXPUSH 0 "push_operand" "=<")
-       (match_operand:MMXPUSH 1 "register_operand" "xy"))]
-  "TARGET_MMX"
-  "#")
-
-(define_split
-  [(set (match_operand 0 "push_operand" "")
-       (match_operand 1 "register_operand" ""))]
-  "!TARGET_64BIT && reload_completed
-   && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
-  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 3)))
-   (set (match_dup 2) (match_dup 1))]
-  "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
-                                stack_pointer_rtx);
-   operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
-
-(define_split
-  [(set (match_operand 0 "push_operand" "")
-       (match_operand 1 "register_operand" ""))]
-  "TARGET_64BIT && reload_completed
-   && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
-  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 3)))
-   (set (match_dup 2) (match_dup 1))]
-  "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
-                                stack_pointer_rtx);
-   operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
-
-
-(define_split
-  [(set (match_operand:TI 0 "nonimmediate_operand" "")
-        (match_operand:TI 1 "general_operand" ""))]
-  "reload_completed && !SSE_REG_P (operands[0])
-   && !SSE_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
-(define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-        (match_operand:TF 1 "general_operand" ""))]
-  "reload_completed && !SSE_REG_P (operands[0])
-   && !SSE_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
-;; All 16-byte vector modes handled by SSE
-(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
-
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
-       (match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE"
-{
-  ix86_expand_vector_move_misalign (<MODE>mode, operands);
-  DONE;
-})
-
-;; All 8-byte vector modes handled by MMX
-(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF])
-
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
-       (match_operand:MMXMODE 1 "nonimmediate_operand" ""))]
-  "TARGET_MMX"
-{
-  ix86_expand_vector_move (<MODE>mode, operands);
-  DONE;
-})
-
-;; These two patterns are useful for specifying exactly whether to use
-;; movaps or movups
-(define_expand "sse_movaps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
-       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
-                    UNSPEC_MOVA))]
-  "TARGET_SSE"
-{
-  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
-    {
-      rtx tmp = gen_reg_rtx (V4SFmode);
-      emit_insn (gen_sse_movaps (tmp, operands[1]));
-      emit_move_insn (operands[0], tmp);
-      DONE;
-    }
-})
-
-(define_insn "*sse_movaps_1"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
-                    UNSPEC_MOVA))]
-  "TARGET_SSE
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movaps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov,ssemov")
-   (set_attr "mode" "V4SF")])
-
-(define_expand "sse_movups"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
-       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
-                    UNSPEC_MOVU))]
-  "TARGET_SSE"
-{
-  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
-    {
-      rtx tmp = gen_reg_rtx (V4SFmode);
-      emit_insn (gen_sse_movups (tmp, operands[1]));
-      emit_move_insn (operands[0], tmp);
-      DONE;
-    }
-})
-
-(define_insn "*sse_movups_1"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-       (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
-                    UNSPEC_MOVU))]
-  "TARGET_SSE
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movups\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt,ssecvt")
-   (set_attr "mode" "V4SF")])
-
-;; SSE Strange Moves.
-
-(define_insn "sse_movmskps"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:V4SF 1 "register_operand" "x")]
-                  UNSPEC_MOVMSK))]
-  "TARGET_SSE"
-  "movmskps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "mmx_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
-                  UNSPEC_MOVMSK))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-
-(define_insn "mmx_maskmovq"
-  [(set (mem:V8QI (match_operand:SI 0 "register_operand" "D"))
-       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
-                     (match_operand:V8QI 2 "register_operand" "y")]
-                    UNSPEC_MASKMOV))]
-  "(TARGET_SSE || TARGET_3DNOW_A) && !TARGET_64BIT"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "maskmovq\t{%2, %1|%1, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_maskmovq_rex"
-  [(set (mem:V8QI (match_operand:DI 0 "register_operand" "D"))
-       (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "y")
-                     (match_operand:V8QI 2 "register_operand" "y")]
-                    UNSPEC_MASKMOV))]
-  "(TARGET_SSE || TARGET_3DNOW_A) && TARGET_64BIT"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "maskmovq\t{%2, %1|%1, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "sse_movntv4sf"
-  [(set (match_operand:V4SF 0 "memory_operand" "=m")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "x")]
-                    UNSPEC_MOVNT))]
-  "TARGET_SSE"
-  "movntps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_movntdi"
-  [(set (match_operand:DI 0 "memory_operand" "=m")
-       (unspec:DI [(match_operand:DI 1 "register_operand" "y")]
-                  UNSPEC_MOVNT))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "movntq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxmov")
-   (set_attr "mode" "DI")])
-
-(define_insn "sse_movhlps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0")
-        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
-                         (parallel [(const_int 2)
-                                    (const_int 3)
-                                    (const_int 0)
-                                    (const_int 1)]))
-        (const_int 3)))]
-  "TARGET_SSE"
-  "movhlps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_movlhps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0")
-        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
-                         (parallel [(const_int 2)
-                                    (const_int 3)
-                                    (const_int 0)
-                                    (const_int 1)]))
-        (const_int 12)))]
-  "TARGET_SSE"
-  "movlhps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_movhps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
-        (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
-        (const_int 12)))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
-  "movhps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_movlps"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "nonimmediate_operand" "0,0")
-        (match_operand:V4SF 2 "nonimmediate_operand" "m,x")
-        (const_int 3)))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) == MEM || GET_CODE (operands[2]) == MEM)"
-  "movlps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_expand "sse_loadss"
-  [(match_operand:V4SF 0 "register_operand" "")
-   (match_operand:SF 1 "memory_operand" "")]
-  "TARGET_SSE"
-{
-  emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
-                              CONST0_RTX (V4SFmode)));
-  DONE;
-})
-
-(define_insn "sse_loadss_1"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
-        (match_operand:V4SF 2 "const0_operand" "X")
-        (const_int 1)))]
-  "TARGET_SSE"
-  "movss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_movss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0")
-        (match_operand:V4SF 2 "register_operand" "x")
-        (const_int 14)))]
-  "TARGET_SSE"
-  "movss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_storess"
-  [(set (match_operand:SF 0 "memory_operand" "=m")
-       (vec_select:SF
-        (match_operand:V4SF 1 "register_operand" "x")
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE"
-  "movss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_shufps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-                     (match_operand:SI 3 "immediate_operand" "i")]
-                    UNSPEC_SHUFFLE))]
-  "TARGET_SSE"
-  ;; @@@ check operand order for intel/nonintel syntax
-  "shufps\t{%3, %2, %0|%0, %2, %3}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-
-;; SSE arithmetic
-
-(define_insn "addv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "addps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmaddv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "addss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "subv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "subps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmsubv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (minus:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                    (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "subss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-;; ??? Should probably be done by generic code instead.
-(define_expand "negv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-       (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "")
-                 (match_dup 2)))]
-  "TARGET_SSE"
-{
-  rtx m0 = gen_lowpart (SFmode, gen_int_mode (0x80000000, SImode));
-  rtx vm0 = gen_rtx_CONST_VECTOR (V4SFmode, gen_rtvec (4, m0, m0, m0, m0));
-  operands[2] = force_reg (V4SFmode, vm0);
-})
-
-(define_insn "mulv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "mulps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmmulv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "mulss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "SF")])
-
-(define_insn "divv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "divps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmdivv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (div:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "divss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "SF")])
-
-
-;; SSE square root/reciprocal
-
-(define_insn "rcpv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF
-        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
-  "TARGET_SSE"
-  "rcpps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmrcpv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-                     UNSPEC_RCP)
-        (match_operand:V4SF 2 "register_operand" "0")
-        (const_int 1)))]
-  "TARGET_SSE"
-  "rcpss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-(define_insn "rsqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF
-        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
-  "TARGET_SSE"
-  "rsqrtps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmrsqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-                     UNSPEC_RSQRT)
-        (match_operand:V4SF 2 "register_operand" "0")
-        (const_int 1)))]
-  "TARGET_SSE"
-  "rsqrtss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-(define_insn "sqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "sqrtps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmsqrtv4sf2"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
-        (match_operand:V4SF 2 "register_operand" "0")
-        (const_int 1)))]
-  "TARGET_SSE"
-  "sqrtss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-;; SSE logical operations.
-
-;; SSE defines logical operations on floating point values.  This brings
-;; interesting challenge to RTL representation where logicals are only valid
-;; on integral types.  We deal with this by representing the floating point
-;; logical as logical on arguments casted to TImode as this is what hardware
-;; really does.  Unfortunately hardware requires the type information to be
-;; present and thus we must avoid subregs from being simplified and eliminated
-;; in later compilation phases.
-;;
-;; We have following variants from each instruction:
-;; sse_andsf3 - the operation taking V4SF vector operands
-;;              and doing TImode cast on them
-;; *sse_andsf3_memory - the operation taking one memory operand casted to
-;;                      TImode, since backend insist on eliminating casts
-;;                      on memory operands
-;; sse_andti3_sf_1 - the operation taking SF scalar operands.
-;;                   We cannot accept memory operand here as instruction reads
-;;                  whole scalar.  This is generated only post reload by GCC
-;;                  scalar float operations that expands to logicals (fabs)
-;; sse_andti3_sf_2 - the operation taking SF scalar input and TImode
-;;                  memory operand.  Eventually combine can be able
-;;                  to synthesize these using splitter.
-;; sse2_anddf3, *sse2_anddf3_memory
-;;              
-;; 
-;; These are not called andti3 etc. because we really really don't want
-;; the compiler to widen DImode ands to TImode ands and then try to move
-;; into DImode subregs of SSE registers, and them together, and move out
-;; of DImode subregs again!
-;; SSE1 single precision floating point logical operation
-(define_expand "sse_andv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (and:V4SF (match_operand:V4SF 1 "register_operand" "")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "")
-
-(define_insn "*sse_andv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (and:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "andps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-(define_expand "sse_nandv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" ""))
-                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "")
-
-(define_insn "*sse_nandv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (and:V4SF (not:V4SF (match_operand:V4SF 1 "register_operand" "0"))
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "andnps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-(define_expand "sse_iorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (ior:V4SF (match_operand:V4SF 1 "register_operand" "")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "")
-
-(define_insn "*sse_iorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (ior:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "orps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-(define_expand "sse_xorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-        (xor:V4SF (match_operand:V4SF 1 "register_operand" "")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE"
-  "")
-
-(define_insn "*sse_xorv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (xor:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "xorps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V4SF")])
-
-;; SSE2 double precision floating point logical operation
-
-(define_expand "sse2_andv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (and:V2DF (match_operand:V2DF 1 "register_operand" "")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "")
-
-(define_insn "*sse2_andv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (and:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "andpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "sse2_nandv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" ""))
-                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "")
-
-(define_insn "*sse2_nandv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (and:V2DF (not:V2DF (match_operand:V2DF 1 "register_operand" "0"))
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "andnpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "sse2_iorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (ior:V2DF (match_operand:V2DF 1 "register_operand" "")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "")
-
-(define_insn "*sse2_iorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (ior:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "orpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-(define_expand "sse2_xorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "")
-        (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "")
-
-(define_insn "*sse2_xorv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (xor:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "xorpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "V2DF")])
-
-;; SSE2 integral logicals.  These patterns must always come after floating
-;; point ones since we don't want compiler to use integer opcodes on floating
-;; point SSE values to avoid matching of subregs in the match_operand.
-(define_insn "*sse2_andti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (and:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
-               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pand\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_andv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pand\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
-(define_insn "*sse2_nandti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (and:TI (not:TI (match_operand:TI 1 "register_operand" "0"))
-               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_nandv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (and:V2DI (not:V2DI (match_operand:V2DI 1 "register_operand" "0"))
-                 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
-(define_insn "*sse2_iorti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (ior:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
-               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "por\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_iorv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "por\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
-(define_insn "*sse2_xorti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (xor:TI (match_operand:TI 1 "nonimmediate_operand" "%0")
-               (match_operand:TI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pxor\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_xorv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
-                 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
-  "pxor\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sselog")
-   (set_attr "mode" "TI")])
-
-;; Use xor, but don't show input operands so they aren't live before
-;; this insn.
-(define_insn "sse_clrv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (match_operand:V4SF 1 "const0_operand" "X"))]
-  "TARGET_SSE"
-{
-  if (get_attr_mode (insn) == MODE_TI)
-    return "pxor\t{%0, %0|%0, %0}";
-  else
-    return "xorps\t{%0, %0|%0, %0}";
-}
-  [(set_attr "type" "sselog")
-   (set_attr "memory" "none")
-   (set (attr "mode")
-       (if_then_else
-          (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
-                        (const_int 0))
-                    (ne (symbol_ref "TARGET_SSE2")
-                        (const_int 0)))
-               (eq (symbol_ref "optimize_size")
-                   (const_int 0)))
-        (const_string "TI")
-        (const_string "V4SF")))])
-
-;; Use xor, but don't show input operands so they aren't live before
-;; this insn.
-(define_insn "sse_clrv2df"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(const_int 0)] UNSPEC_NOP))]
-  "TARGET_SSE2"
-  "xorpd\t{%0, %0|%0, %0}"
-  [(set_attr "type" "sselog")
-   (set_attr "memory" "none")
-   (set_attr "mode" "V4SF")])
-
-;; SSE mask-generating compares
-
-(define_insn "maskcmpv4sf3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (match_operator:V4SI 3 "sse_comparison_operator"
-               [(match_operand:V4SF 1 "register_operand" "0")
-                (match_operand:V4SF 2 "register_operand" "x")]))]
-  "TARGET_SSE"
-  "cmp%D3ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "maskncmpv4sf3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (not:V4SI
-        (match_operator:V4SI 3 "sse_comparison_operator"
-               [(match_operand:V4SF 1 "register_operand" "0")
-                (match_operand:V4SF 2 "register_operand" "x")])))]
-  "TARGET_SSE"
-{
-  if (GET_CODE (operands[3]) == UNORDERED)
-    return "cmpordps\t{%2, %0|%0, %2}";
-  else
-    return "cmpn%D3ps\t{%2, %0|%0, %2}";
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmmaskcmpv4sf3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (match_operator:V4SI 3 "sse_comparison_operator"
-               [(match_operand:V4SF 1 "register_operand" "0")
-                (match_operand:V4SF 2 "register_operand" "x")])
-        (subreg:V4SI (match_dup 1) 0)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "cmp%D3ss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "SF")])
-
-(define_insn "vmmaskncmpv4sf3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (not:V4SI
-         (match_operator:V4SI 3 "sse_comparison_operator"
-               [(match_operand:V4SF 1 "register_operand" "0")
-                (match_operand:V4SF 2 "register_operand" "x")]))
-        (subreg:V4SI (match_dup 1) 0)
-        (const_int 1)))]
-  "TARGET_SSE"
-{
-  if (GET_CODE (operands[3]) == UNORDERED)
-    return "cmpordss\t{%2, %0|%0, %2}";
-  else
-    return "cmpn%D3ss\t{%2, %0|%0, %2}";
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_comi"
-  [(set (reg:CCFP FLAGS_REG)
-        (compare:CCFP (vec_select:SF
-                      (match_operand:V4SF 0 "register_operand" "x")
-                      (parallel [(const_int 0)]))
-                     (vec_select:SF
-                      (match_operand:V4SF 1 "register_operand" "x")
-                      (parallel [(const_int 0)]))))]
-  "TARGET_SSE"
-  "comiss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "SF")])
-
-(define_insn "sse_ucomi"
-  [(set (reg:CCFPU FLAGS_REG)
-       (compare:CCFPU (vec_select:SF
-                       (match_operand:V4SF 0 "register_operand" "x")
-                       (parallel [(const_int 0)]))
-                      (vec_select:SF
-                       (match_operand:V4SF 1 "register_operand" "x")
-                       (parallel [(const_int 0)]))))]
-  "TARGET_SSE"
-  "ucomiss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "SF")])
-
-
-;; SSE unpack
-
-(define_insn "sse_unpckhps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                         (parallel [(const_int 2)
-                                    (const_int 0)
-                                    (const_int 3)
-                                    (const_int 1)]))
-        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
-                         (parallel [(const_int 0)
-                                    (const_int 2)
-                                    (const_int 1)
-                                    (const_int 3)]))
-        (const_int 5)))]
-  "TARGET_SSE"
-  "unpckhps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "sse_unpcklps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (vec_select:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                         (parallel [(const_int 0)
-                                    (const_int 2)
-                                    (const_int 1)
-                                    (const_int 3)]))
-        (vec_select:V4SF (match_operand:V4SF 2 "register_operand" "x")
-                         (parallel [(const_int 2)
-                                    (const_int 0)
-                                    (const_int 3)
-                                    (const_int 1)]))
-        (const_int 5)))]
-  "TARGET_SSE"
-  "unpcklps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-
-;; SSE min/max
-
-(define_insn "smaxv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "maxps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmsmaxv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (smax:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "maxss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-(define_insn "sminv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                  (match_operand:V4SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE"
-  "minps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "vmsminv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (smin:V4SF (match_operand:V4SF 1 "register_operand" "0")
-                   (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
-        (match_dup 1)
-        (const_int 1)))]
-  "TARGET_SSE"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-;; SSE <-> integer/MMX conversions
-
-(define_insn "cvtpi2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0")
-        (vec_duplicate:V4SF
-         (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
-        (const_int 12)))]
-  "TARGET_SSE"
-  "cvtpi2ps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "cvtps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_select:V2SI
-        (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
-        (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvtps2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "cvttps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_select:V2SI
-        (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-                     UNSPEC_FIX)
-        (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_SSE"
-  "cvttps2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "SF")])
-
-(define_insn "cvtsi2ss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0,0")
-        (vec_duplicate:V4SF
-         (float:SF (match_operand:SI 2 "nonimmediate_operand" "r,rm")))
-        (const_int 14)))]
-  "TARGET_SSE"
-  "cvtsi2ss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "mode" "SF")])
-
-(define_insn "cvtsi2ssq"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (vec_merge:V4SF
-        (match_operand:V4SF 1 "register_operand" "0,0")
-        (vec_duplicate:V4SF
-         (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
-        (const_int 14)))]
-  "TARGET_SSE && TARGET_64BIT"
-  "cvtsi2ssq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "mode" "SF")])
-
-(define_insn "cvtss2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (vec_select:SI
-        (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE"
-  "cvtss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "mode" "SI")])
-
-(define_insn "cvtss2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (vec_select:DI
-        (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE"
-  "cvtss2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "mode" "DI")])
-
-(define_insn "cvttss2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (vec_select:SI
-        (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
-                     UNSPEC_FIX)
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE"
-  "cvttss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double,vector")])
-
-(define_insn "cvttss2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (vec_select:DI
-        (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
-                     UNSPEC_FIX)
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE && TARGET_64BIT"
-  "cvttss2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double,vector")])
-
-
-;; MMX insns
-
-;; MMX arithmetic
-
-(define_insn "addv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
-                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "paddb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "addv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
-                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "paddw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "addv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0")
-                  (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "paddd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_adddi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(plus:DI (match_operand:DI 1 "register_operand" "%0")
-                  (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "paddq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "ssaddv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
-                     (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "paddsb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "ssaddv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
-                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "paddsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "usaddv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
-                     (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "paddusb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "usaddv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
-                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "paddusw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "subv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "subv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "subv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (minus:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                   (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_subdi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(minus:DI (match_operand:DI 1 "register_operand" "0")
-                   (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psubq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "sssubv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubsb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "sssubv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "ussubv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubusb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "ussubv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "psubusw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "mulv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (mult:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
-
-(define_insn "smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (truncate:V4HI
-        (lshiftrt:V4SI
-         (mult:V4SI (sign_extend:V4SI
-                     (match_operand:V4HI 1 "register_operand" "0"))
-                    (sign_extend:V4SI
-                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
-         (const_int 16))))]
-  "TARGET_MMX"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
-
-(define_insn "umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (truncate:V4HI
-        (lshiftrt:V4SI
-         (mult:V4SI (zero_extend:V4SI
-                     (match_operand:V4HI 1 "register_operand" "0"))
-                    (zero_extend:V4SI
-                     (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
-         (const_int 16))))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_pmaddwd"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (plus:V2SI
-        (mult:V2SI
-         (sign_extend:V2SI
-          (vec_select:V2HI (match_operand:V4HI 1 "register_operand" "0")
-                           (parallel [(const_int 0) (const_int 2)])))
-         (sign_extend:V2SI
-          (vec_select:V2HI (match_operand:V4HI 2 "nonimmediate_operand" "ym")
-                           (parallel [(const_int 0) (const_int 2)]))))
-        (mult:V2SI
-         (sign_extend:V2SI (vec_select:V2HI (match_dup 1)
-                                            (parallel [(const_int 1)
-                                                       (const_int 3)])))
-         (sign_extend:V2SI (vec_select:V2HI (match_dup 2)
-                                            (parallel [(const_int 1)
-                                                       (const_int 3)]))))))]
-  "TARGET_MMX"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
-
-
-;; MMX logical operations
-;; Note we don't want to declare these as regular iordi3 insns to prevent
-;; normal code that also wants to use the FPU from getting broken.
-;; The UNSPECs are there to prevent the combiner from getting overly clever.
-(define_insn "mmx_iordi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(ior:DI (match_operand:DI 1 "register_operand" "%0")
-                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "por\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_xordi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(xor:DI (match_operand:DI 1 "register_operand" "%0")
-                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "pxor\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")
-   (set_attr "memory" "none")])
-
-;; Same as pxor, but don't show input operands so that we don't think
-;; they are live.
-(define_insn "mmx_clrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI [(const_int 0)] UNSPEC_NOP))]
-  "TARGET_MMX"
-  "pxor\t{%0, %0|%0, %0}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")
-   (set_attr "memory" "none")])
-
-(define_insn "mmx_anddi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(and:DI (match_operand:DI 1 "register_operand" "%0")
-                 (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "pand\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_nanddi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(and:DI (not:DI (match_operand:DI 1 "register_operand" "0"))
-                         (match_operand:DI 2 "nonimmediate_operand" "ym"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-
-;; MMX unsigned averages/sum of absolute differences
-
-(define_insn "mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (ashiftrt:V8QI
-        (plus:V8QI (plus:V8QI
-                    (match_operand:V8QI 1 "register_operand" "0")
-                    (match_operand:V8QI 2 "nonimmediate_operand" "ym"))
-                   (const_vector:V8QI [(const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)]))
-        (const_int 1)))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pavgb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ashiftrt:V4HI
-        (plus:V4HI (plus:V4HI
-                    (match_operand:V4HI 1 "register_operand" "0")
-                    (match_operand:V4HI 2 "nonimmediate_operand" "ym"))
-                   (const_vector:V4HI [(const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)
-                                       (const_int 1)]))
-        (const_int 1)))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_psadbw"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI [(match_operand:V8QI 1 "register_operand" "0")
-                   (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-                  UNSPEC_PSADBW))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-
-;; MMX insert/extract/shuffle
-
-(define_expand "mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "")
-        (vec_merge:V4HI
-         (match_operand:V4HI 1 "register_operand" "")
-          (vec_duplicate:V4HI
-            (match_operand:SI 2 "nonimmediate_operand" ""))
-          (match_operand:SI 3 "const_0_to_3_operand" "")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-{
-  operands[2] = gen_lowpart (HImode, operands[2]);
-  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
-})
-
-(define_insn "*mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (vec_merge:V4HI
-         (match_operand:V4HI 1 "register_operand" "0")
-          (vec_duplicate:V4HI
-            (match_operand:HI 2 "nonimmediate_operand" "rm"))
-          (match_operand:SI 3 "const_pow2_1_to_8_operand" "N")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-{
-  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
-}
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extend:SI (vec_select:HI (match_operand:V4HI 1 "register_operand" "y")
-                                      (parallel
-                                       [(match_operand:SI 2 "const_0_to_3_operand" "N")]))))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_pshufw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (unspec:V4HI [(match_operand:V4HI 1 "nonimmediate_operand" "ym")
-                     (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_SHUFFLE))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pshufw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-
-;; MMX mask-generating comparisons
-
-(define_insn "eqv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (eq:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpeqb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "eqv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (eq:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpeqw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "eqv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (eq:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpeqd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "gtv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (gt:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgtb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "gtv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (gt:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgtw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-(define_insn "gtv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (gt:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgtd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
-
-
-;; MMX max/min insns
-
-(define_insn "umaxv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (umax:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmaxub\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "smaxv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (smax:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pmaxsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "uminv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (umin:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                  (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pminub\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-(define_insn "sminv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (smin:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "pminsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
-
-
-;; MMX shifts
-
-(define_insn "ashrv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ashiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psraw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "ashrv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (ashiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psrad\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "lshrv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (lshiftrt:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psrlw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "lshrv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (lshiftrt:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psrld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-;; See logical MMX insns.
-(define_insn "mmx_lshrdi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-         [(lshiftrt:DI (match_operand:DI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi"))]
-         UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psrlq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "ashlv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ashift:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "psllw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "ashlv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (ashift:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                      (match_operand:DI 2 "nonmemory_operand" "yi")))]
-  "TARGET_MMX"
-  "pslld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-;; See logical MMX insns.
-(define_insn "mmx_ashldi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI
-        [(ashift:DI (match_operand:DI 1 "register_operand" "0")
-                    (match_operand:DI 2 "nonmemory_operand" "yi"))]
-        UNSPEC_NOP))]
-  "TARGET_MMX"
-  "psllq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-
-;; MMX pack/unpack insns.
-
-(define_insn "mmx_packsswb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (vec_concat:V8QI
-        (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
-        (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
-  "TARGET_MMX"
-  "packsswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_packssdw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (vec_concat:V4HI
-        (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "0"))
-        (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
-  "TARGET_MMX"
-  "packssdw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_packuswb"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (vec_concat:V8QI
-        (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "0"))
-        (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
-  "TARGET_MMX"
-  "packuswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (vec_merge:V8QI
-        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                         (parallel [(const_int 4)
-                                    (const_int 0)
-                                    (const_int 5)
-                                    (const_int 1)
-                                    (const_int 6)
-                                    (const_int 2)
-                                    (const_int 7)
-                                    (const_int 3)]))
-        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
-                         (parallel [(const_int 0)
-                                    (const_int 4)
-                                    (const_int 1)
-                                    (const_int 5)
-                                    (const_int 2)
-                                    (const_int 6)
-                                    (const_int 3)
-                                    (const_int 7)]))
-        (const_int 85)))]
-  "TARGET_MMX"
-  "punpckhbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpckhwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (vec_merge:V4HI
-        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                         (parallel [(const_int 0)
-                                    (const_int 2)
-                                    (const_int 1)
-                                    (const_int 3)]))
-        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-                         (parallel [(const_int 2)
-                                    (const_int 0)
-                                    (const_int 3)
-                                    (const_int 1)]))
-        (const_int 5)))]
-  "TARGET_MMX"
-  "punpckhwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpckhdq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_merge:V2SI
-        (match_operand:V2SI 1 "register_operand" "0")
-        (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
-                         (parallel [(const_int 1)
-                                    (const_int 0)]))
-        (const_int 1)))]
-  "TARGET_MMX"
-  "punpckhdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpcklbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (vec_merge:V8QI
-        (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "0")
-                         (parallel [(const_int 0)
-                                    (const_int 4)
-                                    (const_int 1)
-                                    (const_int 5)
-                                    (const_int 2)
-                                    (const_int 6)
-                                    (const_int 3)
-                                    (const_int 7)]))
-        (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
-                         (parallel [(const_int 4)
-                                    (const_int 0)
-                                    (const_int 5)
-                                    (const_int 1)
-                                    (const_int 6)
-                                    (const_int 2)
-                                    (const_int 7)
-                                    (const_int 3)]))
-        (const_int 85)))]
-  "TARGET_MMX"
-  "punpcklbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpcklwd"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (vec_merge:V4HI
-        (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "0")
-                         (parallel [(const_int 2)
-                                    (const_int 0)
-                                    (const_int 3)
-                                    (const_int 1)]))
-        (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-                         (parallel [(const_int 0)
-                                    (const_int 2)
-                                    (const_int 1)
-                                    (const_int 3)]))
-        (const_int 5)))]
-  "TARGET_MMX"
-  "punpcklwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-(define_insn "mmx_punpckldq"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_merge:V2SI
-        (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "0")
-                          (parallel [(const_int 1)
-                                     (const_int 0)]))
-        (match_operand:V2SI 2 "register_operand" "y")
-        (const_int 1)))]
-  "TARGET_MMX"
-  "punpckldq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
-
-
-;; Miscellaneous stuff
-
-(define_insn "emms"
-  [(unspec_volatile [(const_int 0)] UNSPECV_EMMS)
-   (clobber (reg:XF 8))
-   (clobber (reg:XF 9))
-   (clobber (reg:XF 10))
-   (clobber (reg:XF 11))
-   (clobber (reg:XF 12))
-   (clobber (reg:XF 13))
-   (clobber (reg:XF 14))
-   (clobber (reg:XF 15))
-   (clobber (reg:DI 29))
-   (clobber (reg:DI 30))
-   (clobber (reg:DI 31))
-   (clobber (reg:DI 32))
-   (clobber (reg:DI 33))
-   (clobber (reg:DI 34))
-   (clobber (reg:DI 35))
-   (clobber (reg:DI 36))]
-  "TARGET_MMX"
-  "emms"
-  [(set_attr "type" "mmx")
-   (set_attr "memory" "unknown")])
-
-(define_insn "ldmxcsr"
-  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
-                   UNSPECV_LDMXCSR)]
-  "TARGET_SSE"
-  "ldmxcsr\t%0"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "load")])
-
-(define_insn "stmxcsr"
-  [(set (match_operand:SI 0 "memory_operand" "=m")
-       (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
-  "TARGET_SSE"
-  "stmxcsr\t%0"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "store")])
-
-(define_expand "sfence"
-  [(set (match_dup 0)
-       (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-{
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
-})
-
-(define_insn "*sfence_insn"
-  [(set (match_operand:BLK 0 "" "")
-       (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
-  "TARGET_SSE || TARGET_3DNOW_A"
-  "sfence"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "unknown")])
-
-(define_expand "sse_prologue_save"
-  [(parallel [(set (match_operand:BLK 0 "" "")
-                  (unspec:BLK [(reg:DI 21)
-                               (reg:DI 22)
-                               (reg:DI 23)
-                               (reg:DI 24)
-                               (reg:DI 25)
-                               (reg:DI 26)
-                               (reg:DI 27)
-                               (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
-             (use (match_operand:DI 1 "register_operand" ""))
-             (use (match_operand:DI 2 "immediate_operand" ""))
-             (use (label_ref:DI (match_operand 3 "" "")))])]
-  "TARGET_64BIT"
-  "")
-
-(define_insn "*sse_prologue_save_insn"
-  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
-                         (match_operand:DI 4 "const_int_operand" "n")))
-       (unspec:BLK [(reg:DI 21)
-                    (reg:DI 22)
-                    (reg:DI 23)
-                    (reg:DI 24)
-                    (reg:DI 25)
-                    (reg:DI 26)
-                    (reg:DI 27)
-                    (reg:DI 28)] UNSPEC_SSE_PROLOGUE_SAVE))
-   (use (match_operand:DI 1 "register_operand" "r"))
-   (use (match_operand:DI 2 "const_int_operand" "i"))
-   (use (label_ref:DI (match_operand 3 "" "X")))]
-  "TARGET_64BIT
-   && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128
-   && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
-  "*
-{
-  int i;
-  operands[0] = gen_rtx_MEM (Pmode,
-                            gen_rtx_PLUS (Pmode, operands[0], operands[4]));
-  output_asm_insn (\"jmp\\t%A1\", operands);
-  for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
-    {
-      operands[4] = adjust_address (operands[0], DImode, i*16);
-      operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
-      PUT_MODE (operands[4], TImode);
-      if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
-        output_asm_insn (\"rex\", operands);
-      output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands);
-    }
-  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
-                            CODE_LABEL_NUMBER (operands[3]));
-  RET;
-}
-  "
-  [(set_attr "type" "other")
-   (set_attr "length_immediate" "0")
-   (set_attr "length_address" "0")
-   (set_attr "length" "135")
-   (set_attr "memory" "store")
-   (set_attr "modrm" "0")
-   (set_attr "mode" "DI")])
-
-;; 3Dnow! instructions
-
-(define_insn "addv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (plus:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfadd\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "subv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (minus:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfsub\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "subrv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (minus:V2SF (match_operand:V2SF 2 "nonimmediate_operand" "ym")
-                    (match_operand:V2SF 1 "register_operand" "0")))]
-  "TARGET_3DNOW"
-  "pfsubr\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "gtv2sf3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
-                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfcmpgt\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "gev2sf3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
-                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfcmpge\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "eqv2sf3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (eq:V2SI (match_operand:V2SF 1 "register_operand" "0")
-                (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfcmpeq\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfmaxv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (smax:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfmax\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfminv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (smin:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfmin\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "mulv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (mult:V2SF (match_operand:V2SF 1 "register_operand" "0")
-                  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pfmul\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "femms"
-  [(unspec_volatile [(const_int 0)] UNSPECV_FEMMS)
-   (clobber (reg:XF 8))
-   (clobber (reg:XF 9))
-   (clobber (reg:XF 10))
-   (clobber (reg:XF 11))
-   (clobber (reg:XF 12))
-   (clobber (reg:XF 13))
-   (clobber (reg:XF 14))
-   (clobber (reg:XF 15))
-   (clobber (reg:DI 29))
-   (clobber (reg:DI 30))
-   (clobber (reg:DI 31))
-   (clobber (reg:DI 32))
-   (clobber (reg:DI 33))
-   (clobber (reg:DI 34))
-   (clobber (reg:DI 35))
-   (clobber (reg:DI 36))]
-  "TARGET_3DNOW"
-  "femms"
-  [(set_attr "type" "mmx")
-   (set_attr "memory" "none")]) 
-
-(define_insn "pf2id"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pf2id\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pf2iw"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (sign_extend:V2SI
-          (ss_truncate:V2HI
-             (fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))))]
-  "TARGET_3DNOW_A"
-  "pf2iw\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfacc"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (vec_concat:V2SF
-          (plus:SF
-             (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
-                            (parallel [(const_int  0)]))
-             (vec_select:SF (match_dup 1)
-                            (parallel [(const_int 1)])))
-           (plus:SF
-              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
-                            (parallel [(const_int  0)]))
-              (vec_select:SF (match_dup 2)
-                            (parallel [(const_int 1)])))))]
-  "TARGET_3DNOW"
-  "pfacc\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfnacc"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (vec_concat:V2SF
-           (minus:SF
-              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
-                            (parallel [(const_int 0)]))
-              (vec_select:SF (match_dup 1)
-                            (parallel [(const_int 1)])))
-           (minus:SF
-              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
-                            (parallel [(const_int  0)]))
-              (vec_select:SF (match_dup 2)
-                            (parallel [(const_int 1)])))))]
-  "TARGET_3DNOW_A"
-  "pfnacc\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pfpnacc"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (vec_concat:V2SF
-           (minus:SF
-              (vec_select:SF (match_operand:V2SF 1 "register_operand" "0")
-                            (parallel [(const_int 0)]))
-              (vec_select:SF (match_dup 1)
-                            (parallel [(const_int 1)])))
-           (plus:SF
-              (vec_select:SF (match_operand:V2SF 2 "nonimmediate_operand" "y")
-                            (parallel [(const_int 0)]))
-              (vec_select:SF (match_dup 2)
-                            (parallel [(const_int 1)])))))]
-  "TARGET_3DNOW_A"
-  "pfpnacc\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "pi2fw"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (float:V2SF
-          (vec_concat:V2SI
-             (sign_extend:SI
-                (truncate:HI
-                   (vec_select:SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
-                                  (parallel [(const_int 0)]))))
-              (sign_extend:SI
-                (truncate:HI
-                    (vec_select:SI (match_dup 1)
-                                  (parallel [(const_int  1)])))))))]
-  "TARGET_3DNOW_A"
-  "pi2fw\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "V2SF")])
-
-(define_insn "floatv2si2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW"
-  "pi2fd\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "V2SF")])
-
-;; This insn is identical to pavgb in operation, but the opcode is
-;; different.  To avoid accidentally matching pavgb, use an unspec.
-
-(define_insn "pavgusb"
- [(set (match_operand:V8QI 0 "register_operand" "=y")
-       (unspec:V8QI
-          [(match_operand:V8QI 1 "register_operand" "0")
-           (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
-         UNSPEC_PAVGUSB))]
-  "TARGET_3DNOW"
-  "pavgusb\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "TI")])
-
-;; 3DNow reciprocal and sqrt
-(define_insn "pfrcpv2sf2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
-       UNSPEC_PFRCP))]
-  "TARGET_3DNOW"
-  "pfrcp\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-
-(define_insn "pfrcpit1v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
-                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
-                    UNSPEC_PFRCPIT1))]
-  "TARGET_3DNOW"
-  "pfrcpit1\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-
-(define_insn "pfrcpit2v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
-                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
-                    UNSPEC_PFRCPIT2))]
-  "TARGET_3DNOW"
-  "pfrcpit2\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-
-(define_insn "pfrsqrtv2sf2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
-                    UNSPEC_PFRSQRT))]
-  "TARGET_3DNOW"
-  "pfrsqrt\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-               
-(define_insn "pfrsqit1v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "0")
-                     (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
-                    UNSPEC_PFRSQIT1))]
-  "TARGET_3DNOW"
-  "pfrsqit1\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmx")
-   (set_attr "mode" "TI")])
-
-(define_insn "pmulhrwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-       (truncate:V4HI
-          (lshiftrt:V4SI
-             (plus:V4SI
-                (mult:V4SI
-                   (sign_extend:V4SI
-                      (match_operand:V4HI 1 "register_operand" "0"))
-                   (sign_extend:V4SI
-                      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
-                (const_vector:V4SI [(const_int 32768)
-                                    (const_int 32768)
-                                    (const_int 32768)
-                                    (const_int 32768)]))
-             (const_int 16))))]
-  "TARGET_3DNOW"
-  "pmulhrw\\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "TI")])
-
-(define_insn "pswapdv2si2"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (vec_select:V2SI (match_operand:V2SI 1 "nonimmediate_operand" "ym")
-                        (parallel [(const_int 1) (const_int 0)])))]
-  "TARGET_3DNOW_A"
-  "pswapd\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "pswapdv2sf2"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
-       (vec_select:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "ym")
-                        (parallel [(const_int 1) (const_int 0)])))]
-  "TARGET_3DNOW_A"
-  "pswapd\\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "TI")])
-
-(define_expand "prefetch"
-  [(prefetch (match_operand 0 "address_operand" "")
-            (match_operand:SI 1 "const_int_operand" "")
-            (match_operand:SI 2 "const_int_operand" ""))]
-  "TARGET_PREFETCH_SSE || TARGET_3DNOW"
-{
-  int rw = INTVAL (operands[1]);
-  int locality = INTVAL (operands[2]);
-
-  if (rw != 0 && rw != 1)
-    abort ();
-  if (locality < 0 || locality > 3)
-    abort ();
-  if (GET_MODE (operands[0]) != Pmode && GET_MODE (operands[0]) != VOIDmode)
-    abort ();
-
-  /* Use 3dNOW prefetch in case we are asking for write prefetch not
-     suported by SSE counterpart or the SSE prefetch is not available
-     (K6 machines).  Otherwise use SSE prefetch as it allows specifying
-     of locality.  */
-  if (TARGET_3DNOW && (!TARGET_PREFETCH_SSE || rw))
-    operands[2] = GEN_INT (3);
-  else
-    operands[1] = const0_rtx;
-})
-
-(define_insn "*prefetch_sse"
-  [(prefetch (match_operand:SI 0 "address_operand" "p")
-            (const_int 0)
-            (match_operand:SI 1 "const_int_operand" ""))]
-  "TARGET_PREFETCH_SSE && !TARGET_64BIT"
-{
-  static const char * const patterns[4] = {
-   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
-  };
-
-  int locality = INTVAL (operands[1]);
-  if (locality < 0 || locality > 3)
-    abort ();
-
-  return patterns[locality];  
-}
-  [(set_attr "type" "sse")
-   (set_attr "memory" "none")])
-
-(define_insn "*prefetch_sse_rex"
-  [(prefetch (match_operand:DI 0 "address_operand" "p")
-            (const_int 0)
-            (match_operand:SI 1 "const_int_operand" ""))]
-  "TARGET_PREFETCH_SSE && TARGET_64BIT"
-{
-  static const char * const patterns[4] = {
-   "prefetchnta\t%a0", "prefetcht2\t%a0", "prefetcht1\t%a0", "prefetcht0\t%a0"
-  };
-
-  int locality = INTVAL (operands[1]);
-  if (locality < 0 || locality > 3)
-    abort ();
-
-  return patterns[locality];  
-}
-  [(set_attr "type" "sse")
-   (set_attr "memory" "none")])
-
-(define_insn "*prefetch_3dnow"
-  [(prefetch (match_operand:SI 0 "address_operand" "p")
-            (match_operand:SI 1 "const_int_operand" "n")
-            (const_int 3))]
-  "TARGET_3DNOW && !TARGET_64BIT"
-{
-  if (INTVAL (operands[1]) == 0)
-    return "prefetch\t%a0";
-  else
-    return "prefetchw\t%a0";
-}
-  [(set_attr "type" "mmx")
-   (set_attr "memory" "none")])
-
-(define_insn "*prefetch_3dnow_rex"
-  [(prefetch (match_operand:DI 0 "address_operand" "p")
-            (match_operand:SI 1 "const_int_operand" "n")
-            (const_int 3))]
-  "TARGET_3DNOW && TARGET_64BIT"
+(define_insn "*prefetch_3dnow"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+            (match_operand:SI 1 "const_int_operand" "n")
+            (const_int 3))]
+  "TARGET_3DNOW && !TARGET_64BIT"
 {
   if (INTVAL (operands[1]) == 0)
     return "prefetch\t%a0";
   else
-    return "prefetchw\t%a0";
-}
-  [(set_attr "type" "mmx")
-   (set_attr "memory" "none")])
-
-;; SSE2 support
-
-(define_insn "addv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "addpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmaddv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "addsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn "subv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "subpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (minus:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "subsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn "mulv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "mulpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmmulv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "mulsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssemul")
-   (set_attr "mode" "DF")])
-
-(define_insn "divv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                 (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "divpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmdivv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (div:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                 (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "divsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssediv")
-   (set_attr "mode" "DF")])
-
-;; SSE min/max
-
-(define_insn "smaxv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "maxpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmsmaxv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (smax:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "maxsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-
-(define_insn "sminv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                  (match_operand:V2DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "minpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmsminv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (smin:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                                  (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
-                        (match_dup 1)
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "minsd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
-;; SSE2 square root.  There doesn't appear to be an extension for the
-;; reciprocal/rsqrt instructions if the Intel manual is to be believed.
-
-(define_insn "sqrtv2df2"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm")))]
-  "TARGET_SSE2"
-  "sqrtpd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmsqrtv2df2"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (sqrt:V2DF (match_operand:V2DF 1 "register_operand" "xm"))
-                        (match_operand:V2DF 2 "register_operand" "0")
-                       (const_int 1)))]
-  "TARGET_SSE2"
-  "sqrtsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-;; SSE mask-generating compares
-
-(define_insn "maskcmpv2df3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (match_operator:V2DI 3 "sse_comparison_operator"
-                            [(match_operand:V2DF 1 "register_operand" "0")
-                             (match_operand:V2DF 2 "nonimmediate_operand" "x")]))]
-  "TARGET_SSE2"
-  "cmp%D3pd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "maskncmpv2df3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (not:V2DI
-        (match_operator:V2DI 3 "sse_comparison_operator"
-                             [(match_operand:V2DF 1 "register_operand" "0")
-                              (match_operand:V2DF 2 "nonimmediate_operand" "x")])))]
-  "TARGET_SSE2"
-{
-  if (GET_CODE (operands[3]) == UNORDERED)
-    return "cmpordps\t{%2, %0|%0, %2}";
-  else
-    return "cmpn%D3pd\t{%2, %0|%0, %2}";
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "vmmaskcmpv2df3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_merge:V2DI
-        (match_operator:V2DI 3 "sse_comparison_operator"
-                             [(match_operand:V2DF 1 "register_operand" "0")
-                              (match_operand:V2DF 2 "nonimmediate_operand" "x")])
-        (subreg:V2DI (match_dup 1) 0)
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "cmp%D3sd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "DF")])
-
-(define_insn "vmmaskncmpv2df3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_merge:V2DI
-        (not:V2DI
-         (match_operator:V2DI 3 "sse_comparison_operator"
-                              [(match_operand:V2DF 1 "register_operand" "0")
-                               (match_operand:V2DF 2 "nonimmediate_operand" "x")]))
-        (subreg:V2DI (match_dup 1) 0)
-        (const_int 1)))]
-  "TARGET_SSE2"
-{
-  if (GET_CODE (operands[3]) == UNORDERED)
-    return "cmpordsd\t{%2, %0|%0, %2}";
-  else
-    return "cmpn%D3sd\t{%2, %0|%0, %2}";
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse2_comi"
-  [(set (reg:CCFP FLAGS_REG)
-        (compare:CCFP (vec_select:DF
-                      (match_operand:V2DF 0 "register_operand" "x")
-                      (parallel [(const_int 0)]))
-                     (vec_select:DF
-                      (match_operand:V2DF 1 "register_operand" "x")
-                      (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "comisd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse2_ucomi"
-  [(set (reg:CCFPU FLAGS_REG)
-       (compare:CCFPU (vec_select:DF
-                        (match_operand:V2DF 0 "register_operand" "x")
-                        (parallel [(const_int 0)]))
-                       (vec_select:DF
-                        (match_operand:V2DF 1 "register_operand" "x")
-                        (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "ucomisd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecomi")
-   (set_attr "mode" "DF")])
-
-;; SSE Strange Moves.
-
-(define_insn "sse2_movmskpd"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:V2DF 1 "register_operand" "x")]
-                  UNSPEC_MOVMSK))]
-  "TARGET_SSE2"
-  "movmskpd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
-                  UNSPEC_MOVMSK))]
-  "TARGET_SSE2"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_maskmovdqu"
-  [(set (mem:V16QI (match_operand:SI 0 "register_operand" "D"))
-       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
-                      (match_operand:V16QI 2 "register_operand" "x")]
-                     UNSPEC_MASKMOV))]
-  "TARGET_SSE2"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "maskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_maskmovdqu_rex64"
-  [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
-       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
-                      (match_operand:V16QI 2 "register_operand" "x")]
-                     UNSPEC_MASKMOV))]
-  "TARGET_SSE2"
-  ;; @@@ check ordering of operands in intel/nonintel syntax
-  "maskmovdqu\t{%2, %1|%1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movntv2df"
-  [(set (match_operand:V2DF 0 "memory_operand" "=m")
-       (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "x")]
-                    UNSPEC_MOVNT))]
-  "TARGET_SSE2"
-  "movntpd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_movntv2di"
-  [(set (match_operand:V2DI 0 "memory_operand" "=m")
-       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
-                    UNSPEC_MOVNT))]
-  "TARGET_SSE2"
-  "movntdq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movntsi"
-  [(set (match_operand:SI 0 "memory_operand" "=m")
-       (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
-                  UNSPEC_MOVNT))]
-  "TARGET_SSE2"
-  "movnti\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-;; SSE <-> integer/MMX conversions
-
-;; Conversions between SI and SF
-
-(define_insn "cvtdq2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "cvtdq2ps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "cvtps2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "cvtps2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvttps2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
-                    UNSPEC_FIX))]
-  "TARGET_SSE2"
-  "cvttps2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-;; Conversions between SI and DF
-
-(define_insn "cvtdq2pd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (float:V2DF (vec_select:V2SI
-                    (match_operand:V4SI 1 "nonimmediate_operand" "xm")
-                    (parallel
-                     [(const_int 0)
-                      (const_int 1)]))))]
-  "TARGET_SSE2"
-  "cvtdq2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "cvtpd2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_concat:V4SI
-        (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm"))
-        (const_vector:V2SI [(const_int 0) (const_int 0)])))]
-  "TARGET_SSE2"
-  "cvtpd2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvttpd2dq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_concat:V4SI
-        (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
-                     UNSPEC_FIX)
-        (const_vector:V2SI [(const_int 0) (const_int 0)])))]
-  "TARGET_SSE2"
-  "cvttpd2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvtpd2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "cvtpd2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvttpd2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
-                    UNSPEC_FIX))]
-  "TARGET_SSE2"
-  "cvttpd2pi\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "cvtpi2pd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
-  "TARGET_SSE2"
-  "cvtpi2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-;; Conversions between SI and DF
-
-(define_insn "cvtsd2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (fix:SI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
-                              (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "cvtsd2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "mode" "SI")])
-
-(define_insn "cvtsd2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "x,m")
-                              (parallel [(const_int 0)]))))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "cvtsd2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "mode" "DI")])
-
-(define_insn "cvttsd2si"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
-                                  (parallel [(const_int 0)]))] UNSPEC_FIX))]
-  "TARGET_SSE2"
-  "cvttsd2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SI")
-   (set_attr "athlon_decode" "double,vector")])
-
-(define_insn "cvttsd2siq"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
-                                  (parallel [(const_int 0)]))] UNSPEC_FIX))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "cvttsd2siq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DI")
-   (set_attr "athlon_decode" "double,vector")])
-
-(define_insn "cvtsi2sd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
-                       (vec_duplicate:V2DF
-                         (float:DF
-                           (match_operand:SI 2 "nonimmediate_operand" "r,rm")))
-                       (const_int 2)))]
-  "TARGET_SSE2"
-  "cvtsi2sd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,direct")])
-
-(define_insn "cvtsi2sdq"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
-                       (vec_duplicate:V2DF
-                         (float:DF
-                           (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
-                       (const_int 2)))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "cvtsi2sdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,direct")])
-
-;; Conversions between SF and DF
-
-(define_insn "cvtsd2ss"
-  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
-       (vec_merge:V4SF (match_operand:V4SF 1 "register_operand" "0,0")
-                       (vec_duplicate:V4SF
-                         (float_truncate:V2SF
-                           (match_operand:V2DF 2 "nonimmediate_operand" "x,xm")))
-                       (const_int 14)))]
-  "TARGET_SSE2"
-  "cvtsd2ss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "mode" "SF")])
-
-(define_insn "cvtss2sd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
-                       (float_extend:V2DF
-                         (vec_select:V2SF
-                           (match_operand:V4SF 2 "nonimmediate_operand" "xm")
-                           (parallel [(const_int 0)
-                                      (const_int 1)])))
-                       (const_int 2)))]
-  "TARGET_SSE2"
-  "cvtss2sd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
-
-(define_insn "cvtpd2ps"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-       (subreg:V4SF
-         (vec_concat:V4SI
-           (subreg:V2SI (float_truncate:V2SF
-                          (match_operand:V2DF 1 "nonimmediate_operand" "xm")) 0)
-           (const_vector:V2SI [(const_int 0) (const_int 0)])) 0))]
-  "TARGET_SSE2"
-  "cvtpd2ps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "cvtps2pd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (float_extend:V2DF
-         (vec_select:V2SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")
-                          (parallel [(const_int 0)
-                                     (const_int 1)]))))]
-  "TARGET_SSE2"
-  "cvtps2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-;; SSE2 variants of MMX insns
-
-;; MMX arithmetic
-
-(define_insn "addv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
-                   (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "addv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
-                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "addv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0")
-                  (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "addv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0")
-                  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "ssaddv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
-                      (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddsb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "ssaddv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
-                     (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "usaddv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
-                      (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddusb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "usaddv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
-                     (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "paddusw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "subv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "subv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                   (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "subv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (minus:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                   (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "subv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (minus:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                   (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "sssubv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (ss_minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubsb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "sssubv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ss_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "ussubv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (us_minus:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                       (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubusb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "ussubv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "psubusw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "mulv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (mult:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "mode" "TI")])
-
-(define_insn "smulv8hi3_highpart"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (truncate:V8HI
-        (lshiftrt:V8SI
-         (mult:V8SI (sign_extend:V8SI (match_operand:V8HI 1 "register_operand" "0"))
-                    (sign_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
-         (const_int 16))))]
-  "TARGET_SSE2"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "mode" "TI")])
-
-(define_insn "umulv8hi3_highpart"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (truncate:V8HI
-        (lshiftrt:V8SI
-         (mult:V8SI (zero_extend:V8SI (match_operand:V8HI 1 "register_operand" "0"))
-                    (zero_extend:V8SI (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
-         (const_int 16))))]
-  "TARGET_SSE2"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_umulsidi3"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (mult:DI (zero_extend:DI (vec_select:SI
-                                 (match_operand:V2SI 1 "register_operand" "0")
-                                 (parallel [(const_int 0)])))
-                (zero_extend:DI (vec_select:SI
-                                 (match_operand:V2SI 2 "nonimmediate_operand" "ym")
-                                 (parallel [(const_int 0)])))))]
-  "TARGET_SSE2"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
-
-(define_insn "sse2_umulv2siv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (mult:V2DI (zero_extend:V2DI
-                    (vec_select:V2SI
-                      (match_operand:V4SI 1 "register_operand" "0")
-                      (parallel [(const_int 0) (const_int 2)])))
-                  (zero_extend:V2DI
-                    (vec_select:V2SI
-                      (match_operand:V4SI 2 "nonimmediate_operand" "xm")
-                      (parallel [(const_int 0) (const_int 2)])))))]
-  "TARGET_SSE2"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pmaddwd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (plus:V4SI
-        (mult:V4SI
-         (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "0")
-                                            (parallel [(const_int 0)
-                                                       (const_int 2)
-                                                       (const_int 4)
-                                                       (const_int 6)])))
-         (sign_extend:V4SI (vec_select:V4HI (match_operand:V8HI 2 "nonimmediate_operand" "xm")
-                                            (parallel [(const_int 0)
-                                                       (const_int 2)
-                                                       (const_int 4)
-                                                       (const_int 6)]))))
-        (mult:V4SI
-         (sign_extend:V4SI (vec_select:V4HI (match_dup 1)
-                                            (parallel [(const_int 1)
-                                                       (const_int 3)
-                                                       (const_int 5)
-                                                       (const_int 7)])))
-         (sign_extend:V4SI (vec_select:V4HI (match_dup 2)
-                                            (parallel [(const_int 1)
-                                                       (const_int 3)
-                                                       (const_int 5)
-                                                       (const_int 7)]))))))]
-  "TARGET_SSE2"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-;; Same as pxor, but don't show input operands so that we don't think
-;; they are live.
-(define_insn "sse2_clrti"
-  [(set (match_operand:TI 0 "register_operand" "=x") (const_int 0))]
-  "TARGET_SSE2"
-{
-  if (get_attr_mode (insn) == MODE_TI)
-    return "pxor\t%0, %0";
-  else
-    return "xorps\t%0, %0";
+    return "prefetchw\t%a0";
 }
-  [(set_attr "type" "ssemov")
-   (set_attr "memory" "none")
-   (set (attr "mode")
-             (if_then_else
-               (ne (symbol_ref "optimize_size")
-                   (const_int 0))
-               (const_string "V4SF")
-               (const_string "TI")))])
-
-;; MMX unsigned averages/sum of absolute differences
-
-(define_insn "sse2_uavgv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (ashiftrt:V16QI
-        (plus:V16QI (plus:V16QI
-                    (match_operand:V16QI 1 "register_operand" "0")
-                    (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
-                    (const_vector:V16QI [(const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)
-                                         (const_int 1) (const_int 1)]))
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "pavgb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_uavgv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashiftrt:V8HI
-        (plus:V8HI (plus:V8HI
-                    (match_operand:V8HI 1 "register_operand" "0")
-                    (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
-                   (const_vector:V8HI [(const_int 1) (const_int 1)
-                                       (const_int 1) (const_int 1)
-                                       (const_int 1) (const_int 1)
-                                       (const_int 1) (const_int 1)]))
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-;; @@@ this isn't the right representation.
-(define_insn "sse2_psadbw"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (unspec:V2DI [(match_operand:V16QI 1 "register_operand" "0")
-                     (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_PSADBW))]
-  "TARGET_SSE2"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-
-;; MMX insert/extract/shuffle
-
-(define_expand "sse2_pinsrw"
-  [(set (match_operand:V8HI 0 "register_operand" "")
-        (vec_merge:V8HI
-         (match_operand:V8HI 1 "register_operand" "")
-          (vec_duplicate:V8HI
-            (match_operand:SI 2 "nonimmediate_operand" ""))
-          (match_operand:SI 3 "const_0_to_7_operand" "")))]
-  "TARGET_SSE2"
-{
-  operands[2] = gen_lowpart (HImode, operands[2]);
-  operands[3] = GEN_INT (1 << INTVAL (operands[3]));
-})
+  [(set_attr "type" "mmx")
+   (set_attr "memory" "none")])
 
-(define_insn "*sse2_pinsrw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (vec_merge:V8HI
-         (match_operand:V8HI 1 "register_operand" "0")
-          (vec_duplicate:V8HI
-            (match_operand:HI 2 "nonimmediate_operand" "rm"))
-          (match_operand:SI 3 "const_pow2_1_to_128_operand" "N")))]
-  "TARGET_SSE2"
+(define_insn "*prefetch_3dnow_rex"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+            (match_operand:SI 1 "const_int_operand" "n")
+            (const_int 3))]
+  "TARGET_3DNOW && TARGET_64BIT"
 {
-  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+  if (INTVAL (operands[1]) == 0)
+    return "prefetch\t%a0";
+  else
+    return "prefetchw\t%a0";
 }
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extend:SI
-         (vec_select:HI (match_operand:V8HI 1 "register_operand" "x")
-                        (parallel
-                         [(match_operand:SI 2 "const_0_to_7_operand" "N")]))))]
-  "TARGET_SSE2"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pshufd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (unspec:V4SI [(match_operand:V4SI 1 "nonimmediate_operand" "xm")
-                     (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_SHUFFLE))]
-  "TARGET_SSE2"
-  "pshufd\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pshuflw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")
-                     (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_PSHUFLW))]
-  "TARGET_SSE2"
-  "pshuflw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_pshufhw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")
-                     (match_operand:SI 2 "immediate_operand" "i")]
-                    UNSPEC_PSHUFHW))]
-  "TARGET_SSE2"
-  "pshufhw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-;; MMX mask-generating comparisons
-
-(define_insn "eqv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (eq:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpeqb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "eqv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (eq:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpeqw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "eqv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (eq:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpeqd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "gtv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (gt:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpgtb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "gtv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (gt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpgtw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-(define_insn "gtv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (gt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pcmpgtd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecmp")
-   (set_attr "mode" "TI")])
-
-
-;; MMX max/min insns
-
-(define_insn "umaxv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (umax:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pmaxub\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "smaxv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (smax:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pmaxsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "uminv16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (umin:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pminub\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-(define_insn "sminv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (smin:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2"
-  "pminsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "mode" "TI")])
-
-
-;; MMX shifts
-
-(define_insn "ashrv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psraw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashrv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psrad\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psrlw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psrld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                      (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psrlq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                    (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psllw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                    (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "pslld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv2di3"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                    (match_operand:SI 2 "nonmemory_operand" "xi")))]
-  "TARGET_SSE2"
-  "psllq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashrv8hi3_ti"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psraw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashrv4si3_ti"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psrad\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv8hi3_ti"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psrlw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv4si3_ti"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psrld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "lshrv2di3_ti"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                      (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psrlq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv8hi3_ti"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                    (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psllw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv4si3_ti"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                    (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "pslld\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "ashlv2di3_ti"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
-                    (subreg:SI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
-  "TARGET_SSE2"
-  "psllq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-;; See logical MMX insns for the reason for the unspec.  Strictly speaking
-;; we wouldn't need here it since we never generate TImode arithmetic.
-
-;; There has to be some kind of prize for the weirdest new instruction...
-(define_insn "sse2_ashlti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (unspec:TI
-        [(ashift:TI (match_operand:TI 1 "register_operand" "0")
-                    (mult:SI (match_operand:SI 2 "immediate_operand" "i")
-                              (const_int 8)))] UNSPEC_NOP))]
-  "TARGET_SSE2"
-  "pslldq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_lshrti3"
-  [(set (match_operand:TI 0 "register_operand" "=x")
-        (unspec:TI
-        [(lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
-                      (mult:SI (match_operand:SI 2 "immediate_operand" "i")
-                               (const_int 8)))] UNSPEC_NOP))]
-  "TARGET_SSE2"
-  "psrldq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseishft")
-   (set_attr "mode" "TI")])
-
-;; SSE unpack
-
-(define_insn "sse2_unpckhpd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_concat:V2DF
-        (vec_select:DF (match_operand:V2DF 1 "register_operand" "0")
-                       (parallel [(const_int 1)]))
-        (vec_select:DF (match_operand:V2DF 2 "register_operand" "x")
-                       (parallel [(const_int 1)]))))]
-  "TARGET_SSE2"
-  "unpckhpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_unpcklpd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_concat:V2DF
-        (vec_select:DF (match_operand:V2DF 1 "register_operand" "0")
-                       (parallel [(const_int 0)]))
-        (vec_select:DF (match_operand:V2DF 2 "register_operand" "x")
-                       (parallel [(const_int 0)]))))]
-  "TARGET_SSE2"
-  "unpcklpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-;; MMX pack/unpack insns.
-
-(define_insn "sse2_packsswb"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (vec_concat:V16QI
-        (ss_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0"))
-        (ss_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))]
-  "TARGET_SSE2"
-  "packsswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_packssdw"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (vec_concat:V8HI
-        (ss_truncate:V4HI (match_operand:V4SI 1 "register_operand" "0"))
-        (ss_truncate:V4HI (match_operand:V4SI 2 "register_operand" "x"))))]
-  "TARGET_SSE2"
-  "packssdw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_packuswb"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (vec_concat:V16QI
-        (us_truncate:V8QI (match_operand:V8HI 1 "register_operand" "0"))
-        (us_truncate:V8QI (match_operand:V8HI 2 "register_operand" "x"))))]
-  "TARGET_SSE2"
-  "packuswb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpckhbw"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (vec_merge:V16QI
-        (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                          (parallel [(const_int 8) (const_int 0)
-                                     (const_int 9) (const_int 1)
-                                     (const_int 10) (const_int 2)
-                                     (const_int 11) (const_int 3)
-                                     (const_int 12) (const_int 4)
-                                     (const_int 13) (const_int 5)
-                                     (const_int 14) (const_int 6)
-                                     (const_int 15) (const_int 7)]))
-        (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x")
-                          (parallel [(const_int 0) (const_int 8)
-                                     (const_int 1) (const_int 9)
-                                     (const_int 2) (const_int 10)
-                                     (const_int 3) (const_int 11)
-                                     (const_int 4) (const_int 12)
-                                     (const_int 5) (const_int 13)
-                                     (const_int 6) (const_int 14)
-                                     (const_int 7) (const_int 15)]))
-        (const_int 21845)))]
-  "TARGET_SSE2"
-  "punpckhbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpckhwd"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (vec_merge:V8HI
-        (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                         (parallel [(const_int 4) (const_int 0)
-                                    (const_int 5) (const_int 1)
-                                    (const_int 6) (const_int 2)
-                                    (const_int 7) (const_int 3)]))
-        (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x")
-                         (parallel [(const_int 0) (const_int 4)
-                                    (const_int 1) (const_int 5)
-                                    (const_int 2) (const_int 6)
-                                    (const_int 3) (const_int 7)]))
-        (const_int 85)))]
-  "TARGET_SSE2"
-  "punpckhwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpckhdq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                         (parallel [(const_int 2) (const_int 0)
-                                    (const_int 3) (const_int 1)]))
-        (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x")
-                         (parallel [(const_int 0) (const_int 2)
-                                    (const_int 1) (const_int 3)]))
-        (const_int 5)))]
-  "TARGET_SSE2"
-  "punpckhdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpcklbw"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (vec_merge:V16QI
-        (vec_select:V16QI (match_operand:V16QI 1 "register_operand" "0")
-                          (parallel [(const_int 0) (const_int 8)
-                                     (const_int 1) (const_int 9)
-                                     (const_int 2) (const_int 10)
-                                     (const_int 3) (const_int 11)
-                                     (const_int 4) (const_int 12)
-                                     (const_int 5) (const_int 13)
-                                     (const_int 6) (const_int 14)
-                                     (const_int 7) (const_int 15)]))
-        (vec_select:V16QI (match_operand:V16QI 2 "register_operand" "x")
-                          (parallel [(const_int 8) (const_int 0)
-                                     (const_int 9) (const_int 1)
-                                     (const_int 10) (const_int 2)
-                                     (const_int 11) (const_int 3)
-                                     (const_int 12) (const_int 4)
-                                     (const_int 13) (const_int 5)
-                                     (const_int 14) (const_int 6)
-                                     (const_int 15) (const_int 7)]))
-        (const_int 21845)))]
-  "TARGET_SSE2"
-  "punpcklbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpcklwd"
-  [(set (match_operand:V8HI 0 "register_operand" "=x")
-       (vec_merge:V8HI
-        (vec_select:V8HI (match_operand:V8HI 1 "register_operand" "0")
-                         (parallel [(const_int 0) (const_int 4)
-                                    (const_int 1) (const_int 5)
-                                    (const_int 2) (const_int 6)
-                                    (const_int 3) (const_int 7)]))
-        (vec_select:V8HI (match_operand:V8HI 2 "register_operand" "x")
-                         (parallel [(const_int 4) (const_int 0)
-                                    (const_int 5) (const_int 1)
-                                    (const_int 6) (const_int 2)
-                                    (const_int 7) (const_int 3)]))
-        (const_int 85)))]
-  "TARGET_SSE2"
-  "punpcklwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpckldq"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (vec_select:V4SI (match_operand:V4SI 1 "register_operand" "0")
-                         (parallel [(const_int 0) (const_int 2)
-                                    (const_int 1) (const_int 3)]))
-        (vec_select:V4SI (match_operand:V4SI 2 "register_operand" "x")
-                         (parallel [(const_int 2) (const_int 0)
-                                    (const_int 3) (const_int 1)]))
-        (const_int 5)))]
-  "TARGET_SSE2"
-  "punpckldq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpcklqdq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_merge:V2DI
-        (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
-                         (parallel [(const_int 1)
-                                    (const_int 0)]))
-        (match_operand:V2DI 1 "register_operand" "0")
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "punpcklqdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_punpckhqdq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_merge:V2DI
-        (match_operand:V2DI 1 "register_operand" "0")
-        (vec_select:V2DI (match_operand:V2DI 2 "register_operand" "x")
-                         (parallel [(const_int 1)
-                                    (const_int 0)]))
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "punpckhqdq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-;; SSE2 moves
-
-(define_insn "sse2_movapd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-       (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
-                    UNSPEC_MOVA))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movapd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_movupd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-       (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
-                    UNSPEC_MOVU))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movupd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_movdqa"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
-       (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
-                      UNSPEC_MOVA))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movdqa\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movdqu"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
-       (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
-                      UNSPEC_MOVU))]
-  "TARGET_SSE2
-   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
-  "movdqu\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movdq2q"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
-       (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
-                      (parallel [(const_int 0)])))]
-  "TARGET_SSE2 && !TARGET_64BIT"
-  "@
-   movq\t{%1, %0|%0, %1}
-   movdq2q\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movdq2q_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r")
-       (vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x")
-                      (parallel [(const_int 0)])))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "@
-   movq\t{%1, %0|%0, %1}
-   movdq2q\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movq2dq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,?x")
-       (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
-                        (const_int 0)))]
-  "TARGET_SSE2 && !TARGET_64BIT"
-  "@
-   movq\t{%1, %0|%0, %1}
-   movq2dq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt,ssemov")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movq2dq_rex64"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x")
-       (vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r")
-                        (const_int 0)))]
-  "TARGET_SSE2 && TARGET_64BIT"
-  "@
-   movq\t{%1, %0|%0, %1}
-   movq2dq\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt,ssemov,ssecvt")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_movq"
-  [(set (match_operand:V2DI 0 "register_operand" "=x")
-       (vec_concat:V2DI (vec_select:DI
-                         (match_operand:V2DI 1 "nonimmediate_operand" "xm")
-                         (parallel [(const_int 0)]))
-                        (const_int 0)))]
-  "TARGET_SSE2"
-  "movq\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_loadd"
-  [(set (match_operand:V4SI 0 "register_operand" "=x")
-       (vec_merge:V4SI
-        (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr"))
-        (const_vector:V4SI [(const_int 0)
-                            (const_int 0)
-                            (const_int 0)
-                            (const_int 0)])
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "TI")])
-
-(define_insn "sse2_stored"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=mr")
-       (vec_select:SI
-        (match_operand:V4SI 1 "register_operand" "x")
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE2"
-  "movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "mode" "TI")])
-
-;; Store the high double of the source vector into the double destination.
-(define_insn "sse2_storehpd"
-  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,Y,Y")
-       (vec_select:DF
-         (match_operand:V2DF 1 "nonimmediate_operand" " Y,0,o")
-         (parallel [(const_int 1)])))]
-  "TARGET_SSE2"
-  "@
-   movhpd\t{%1, %0|%0, %1}
-   unpckhpd\t%0, %0
-   #"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
+  [(set_attr "type" "mmx")
+   (set_attr "memory" "none")])
 
-(define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (vec_select:DF
-         (match_operand:V2DF 1 "memory_operand" "")
-         (parallel [(const_int 1)])))]
-  "TARGET_SSE2 && reload_completed"
-  [(const_int 0)]
+(define_expand "stack_protect_set"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")]
+  ""
 {
-  emit_move_insn (operands[0], adjust_address (operands[1], DFmode, 8));
+#ifdef TARGET_THREAD_SSP_OFFSET
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_tls_protect_set_di (operands[0],
+                                       GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+  else
+    emit_insn (gen_stack_tls_protect_set_si (operands[0],
+                                       GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#else
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_set_di (operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_set_si (operands[0], operands[1]));
+#endif
   DONE;
 })
 
-;; Load the high double of the target vector from the source scalar.
-(define_insn "sse2_loadhpd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,o")
-       (vec_concat:V2DF
-         (vec_select:DF
-           (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
-           (parallel [(const_int 0)]))
-         (match_operand:DF 2 "nonimmediate_operand"     " m,Y,Y")))]
-  "TARGET_SSE2"
-  "@
-   movhpd\t{%2, %0|%0, %2}
-   unpcklpd\t{%2, %0|%0, %2}
-   #"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
+(define_insn "stack_protect_set_si"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+       (unspec:SI [(match_operand:SI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{l}\t{%1, %2|%2, %1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+  [(set_attr "type" "multi")])
 
-(define_split
-  [(set (match_operand:V2DF 0 "memory_operand" "")
-       (vec_concat:V2DF
-         (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
-         (match_operand:DF 1 "register_operand" "")))]
-  "TARGET_SSE2 && reload_completed"
-  [(const_int 0)]
-{
-  emit_move_insn (adjust_address (operands[0], DFmode, 8), operands[1]);
-  DONE;
-})
+(define_insn "stack_protect_set_di"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+       (unspec:DI [(match_operand:DI 1 "memory_operand" "m")] UNSPEC_SP_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "mov{q}\t{%1, %2|%2, %1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2"
+  [(set_attr "type" "multi")])
 
-;; Store the low double of the source vector into the double destination.
-(define_expand "sse2_storelpd"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-       (vec_select:DF
-         (match_operand:V2DF 1 "nonimmediate_operand" "")
-         (parallel [(const_int 1)])))]
-  "TARGET_SSE2"
-{
-  operands[1] = gen_lowpart (DFmode, operands[1]);
-  emit_move_insn (operands[0], operands[1]);
-  DONE;
-})
+(define_insn "stack_tls_protect_set_si"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+       (unspec:SI [(match_operand:SI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
+   (set (match_scratch:SI 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR %%gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+  [(set_attr "type" "multi")])
 
-;; Load the load double of the target vector from the source scalar.
-(define_insn "sse2_loadlpd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=Y,Y,m")
-       (vec_concat:V2DF
-         (match_operand:DF 2 "nonimmediate_operand"     " m,Y,Y")
-         (vec_select:DF
-           (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
-           (parallel [(const_int 1)]))))]
-  "TARGET_SSE2"
-  "@
-   movlpd\t{%2, %0|%0, %2}
-   movsd\t{%2, %0|%0, %2}
-   movlpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-;; Merge the low part of the source vector into the low part of the target.
-(define_insn "sse2_movsd"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=Y,Y,m")
-        (vec_merge:V2DF
-         (match_operand:V2DF 1 "nonimmediate_operand" "0,0,0")
-         (match_operand:V2DF 2 "nonimmediate_operand" "x,m,Y")
-         (const_int 2)))]
-  "TARGET_SSE2"
-  "@movsd\t{%2, %0|%0, %2}
-    movlpd\t{%2, %0|%0, %2}
-    movlpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF,V2DF,V2DF")])
+(define_insn "stack_tls_protect_set_di"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+       (unspec:DI [(match_operand:DI 1 "const_int_operand" "i")] UNSPEC_SP_TLS_SET))
+   (set (match_scratch:DI 2 "=&r") (const_int 0))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  {
+     /* The kernel uses a different segment register for performance reasons; a
+        system call would not have to trash the userspace segment register,
+        which would be expensive */
+     if (ix86_cmodel != CM_KERNEL)
+        return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+     else
+        return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR %%gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+  }
+  [(set_attr "type" "multi")])
 
-(define_expand "sse2_loadsd"
-  [(match_operand:V2DF 0 "register_operand" "")
-   (match_operand:DF 1 "memory_operand" "")]
-  "TARGET_SSE2"
+(define_expand "stack_protect_test"
+  [(match_operand 0 "memory_operand" "")
+   (match_operand 1 "memory_operand" "")
+   (match_operand 2 "" "")]
+  ""
 {
-  emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
-                               CONST0_RTX (V2DFmode)));
-  DONE;
-})
-
-(define_insn "sse2_loadsd_1"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_merge:V2DF
-        (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
-        (match_operand:V2DF 2 "const0_operand" "X")
-        (const_int 1)))]
-  "TARGET_SSE2"
-  "movsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse2_storesd"
-  [(set (match_operand:DF 0 "memory_operand" "=m")
-       (vec_select:DF
-        (match_operand:V2DF 1 "register_operand" "x")
-        (parallel [(const_int 0)])))]
-  "TARGET_SSE2"
-  "movsd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
-
-(define_insn "sse2_shufpd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-                     (match_operand:V2DF 2 "nonimmediate_operand" "xm")
-                     (match_operand:SI 3 "immediate_operand" "i")]
-                    UNSPEC_SHUFFLE))]
-  "TARGET_SSE2"
-  ;; @@@ check operand order for intel/nonintel syntax
-  "shufpd\t{%3, %2, %0|%0, %2, %3}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "sse2_clflush"
-  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
-                   UNSPECV_CLFLUSH)]
-  "TARGET_SSE2"
-  "clflush\t%a0"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "unknown")])
+  rtx flags = gen_rtx_REG (CCZmode, FLAGS_REG);
+  ix86_compare_op0 = operands[0];
+  ix86_compare_op1 = operands[1];
+  ix86_compare_emitted = flags;
 
-(define_expand "sse2_mfence"
-  [(set (match_dup 0)
-       (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
-  "TARGET_SSE2"
-{
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
+#ifdef TARGET_THREAD_SSP_OFFSET
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_tls_protect_test_di (flags, operands[0],
+                                       GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+  else
+    emit_insn (gen_stack_tls_protect_test_si (flags, operands[0],
+                                       GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+#else
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_test_di (flags, operands[0], operands[1]));
+  else
+    emit_insn (gen_stack_protect_test_si (flags, operands[0], operands[1]));
+#endif
+  emit_jump_insn (gen_beq (operands[2]));
+  DONE;
 })
 
-(define_insn "*mfence_insn"
-  [(set (match_operand:BLK 0 "" "")
-       (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
-  "TARGET_SSE2"
-  "mfence"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "unknown")])
+(define_insn "stack_protect_test_si"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+       (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "memory_operand" "m")]
+                   UNSPEC_SP_TEST))
+   (clobber (match_scratch:SI 3 "=&r"))]
+  ""
+  "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%2, %3|%3, %2}"
+  [(set_attr "type" "multi")])
 
-(define_expand "sse2_lfence"
-  [(set (match_dup 0)
-       (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
-  "TARGET_SSE2"
-{
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
-})
+(define_insn "stack_protect_test_di"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+       (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "memory_operand" "m")]
+                   UNSPEC_SP_TEST))
+   (clobber (match_scratch:DI 3 "=&r"))]
+  "TARGET_64BIT"
+  "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%2, %3|%3, %2}"
+  [(set_attr "type" "multi")])
 
-(define_insn "*lfence_insn"
-  [(set (match_operand:BLK 0 "" "")
-       (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
-  "TARGET_SSE2"
-  "lfence"
-  [(set_attr "type" "sse")
-   (set_attr "memory" "unknown")])
-
-;; SSE3
-
-(define_insn "mwait"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
-                    (match_operand:SI 1 "register_operand" "c")]
-                   UNSPECV_MWAIT)]
-  "TARGET_SSE3"
-  "mwait\t%0, %1"
-  [(set_attr "length" "3")])
-
-(define_insn "monitor"
-  [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
-                    (match_operand:SI 1 "register_operand" "c")
-                    (match_operand:SI 2 "register_operand" "d")]
-                   UNSPECV_MONITOR)]
-  "TARGET_SSE3"
-  "monitor\t%0, %1, %2"
-  [(set_attr "length" "3")])
-
-;; SSE3 arithmetic
-
-(define_insn "addsubv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_ADDSUB))]
-  "TARGET_SSE3"
-  "addsubps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "addsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-                     (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_ADDSUB))]
-  "TARGET_SSE3"
-  "addsubpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "haddv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_HADD))]
-  "TARGET_SSE3"
-  "haddps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "haddv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-                     (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_HADD))]
-  "TARGET_SSE3"
-  "haddpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "hsubv4sf3"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
-                     (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_HSUB))]
-  "TARGET_SSE3"
-  "hsubps\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "hsubv2df3"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-        (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
-                     (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
-                    UNSPEC_HSUB))]
-  "TARGET_SSE3"
-  "hsubpd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "V2DF")])
-
-(define_insn "movshdup"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF
-        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSHDUP))]
-  "TARGET_SSE3"
-  "movshdup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "movsldup"
-  [(set (match_operand:V4SF 0 "register_operand" "=x")
-        (unspec:V4SF
-        [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_MOVSLDUP))]
-  "TARGET_SSE3"
-  "movsldup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "lddqu"
-  [(set (match_operand:V16QI 0 "register_operand" "=x")
-       (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
-                      UNSPEC_LDQQU))]
-  "TARGET_SSE3"
-  "lddqu\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+(define_insn "stack_tls_protect_test_si"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+       (unspec:CCZ [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "const_int_operand" "i")]
+                   UNSPEC_SP_TLS_TEST))
+   (clobber (match_scratch:SI 3 "=r"))]
+  ""
+  "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR %%gs:%P2}"
+  [(set_attr "type" "multi")])
 
-(define_insn "loadddup"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m")))]
-  "TARGET_SSE3"
-  "movddup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
+(define_insn "stack_tls_protect_test_di"
+  [(set (match_operand:CCZ 0 "flags_reg_operand" "")
+       (unspec:CCZ [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "const_int_operand" "i")]
+                   UNSPEC_SP_TLS_TEST))
+   (clobber (match_scratch:DI 3 "=r"))]
+  "TARGET_64BIT"
+  {
+     /* The kernel uses a different segment register for performance reasons; a
+        system call would not have to trash the userspace segment register,
+        which would be expensive */
+     if (ix86_cmodel != CM_KERNEL)
+        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}";
+     else
+        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR %%gs:%P2}";
+  }
+  [(set_attr "type" "multi")])
 
-(define_insn "movddup"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
-       (vec_duplicate:V2DF
-        (vec_select:DF (match_operand:V2DF 1 "register_operand" "x")
-                       (parallel [(const_int 0)]))))]
-  "TARGET_SSE3"
-  "movddup\t{%1, %0|%0, %1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "mode" "DF")])
+(include "sse.md")
+(include "mmx.md")
+(include "sync.md")