OSDN Git Service

2007-09-14 Jan Hubicka <jh@suse.cz>
[pf3gnuchains/gcc-fork.git] / gcc / config / i386 / i386.md
index 7dae8e1..cbf5161 100644 (file)
@@ -9,7 +9,7 @@
 ;;
 ;; GCC is free software; you can redistribute it and/or modify
 ;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
+;; the Free Software Foundation; either version 3, or (at your option)
 ;; any later version.
 ;;
 ;; GCC is distributed in the hope that it will be useful,
@@ -18,9 +18,8 @@
 ;; GNU General Public License for more details.
 ;;
 ;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING.  If not, write to
-;; the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.  */
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.  */
 ;;
 ;; The original PO technology requires these to be ordered by speed,
 ;; so that assigner will pick the fastest.
@@ -58,6 +57,7 @@
    (UNSPEC_DTPOFF              6)
    (UNSPEC_GOTNTPOFF           7)
    (UNSPEC_INDNTPOFF           8)
+   (UNSPEC_PLTOFF              9)
 
    ; Prologue support
    (UNSPEC_STACK_ALLOC         11)
    (UNSPEC_SSE_PROLOGUE_SAVE   13)
    (UNSPEC_REG_SAVE            14)
    (UNSPEC_DEF_CFA             15)
+   (UNSPEC_SET_RIP             16)
+   (UNSPEC_SET_GOT_OFFSET      17)
 
    ; TLS support
-   (UNSPEC_TP                  16)
-   (UNSPEC_TLS_GD              17)
-   (UNSPEC_TLS_LD_BASE         18)
-   (UNSPEC_TLSDESC             19)
+   (UNSPEC_TP                  18)
+   (UNSPEC_TLS_GD              19)
+   (UNSPEC_TLS_LD_BASE         20)
+   (UNSPEC_TLSDESC             21)
 
    ; Other random patterns
-   (UNSPEC_SCAS                        20)
-   (UNSPEC_FNSTSW              21)
-   (UNSPEC_SAHF                        22)
-   (UNSPEC_FSTCW               23)
-   (UNSPEC_ADD_CARRY           24)
-   (UNSPEC_FLDCW               25)
-   (UNSPEC_REP                 26)
-   (UNSPEC_EH_RETURN           27)
-   (UNSPEC_LD_MPIC             28)     ; load_macho_picbase
-   (UNSPEC_TRUNC_NOOP          29)
+   (UNSPEC_SCAS                        30)
+   (UNSPEC_FNSTSW              31)
+   (UNSPEC_SAHF                        32)
+   (UNSPEC_FSTCW               33)
+   (UNSPEC_ADD_CARRY           34)
+   (UNSPEC_FLDCW               35)
+   (UNSPEC_REP                 36)
+   (UNSPEC_EH_RETURN           37)
+   (UNSPEC_LD_MPIC             38)     ; load_macho_picbase
+   (UNSPEC_TRUNC_NOOP          39)
 
    ; For SSE/MMX support:
-   (UNSPEC_FIX_NOTRUNC         30)
-   (UNSPEC_MASKMOV             31)
-   (UNSPEC_MOVMSK              32)
-   (UNSPEC_MOVNT               33)
-   (UNSPEC_MOVU                        34)
-   (UNSPEC_RCP                 35)
-   (UNSPEC_RSQRT               36)
-   (UNSPEC_SFENCE              37)
-   (UNSPEC_NOP                 38)     ; prevents combiner cleverness
-   (UNSPEC_PFRCP               39)
+   (UNSPEC_FIX_NOTRUNC         40)
+   (UNSPEC_MASKMOV             41)
+   (UNSPEC_MOVMSK              42)
+   (UNSPEC_MOVNT               43)
+   (UNSPEC_MOVU                        44)
+   (UNSPEC_RCP                 45)
+   (UNSPEC_RSQRT               46)
+   (UNSPEC_SFENCE              47)
+   (UNSPEC_NOP                 48)     ; prevents combiner cleverness
+   (UNSPEC_PFRCP               49)
    (UNSPEC_PFRCPIT1            40)
    (UNSPEC_PFRCPIT2            41)
    (UNSPEC_PFRSQRT             42)
    (UNSPEC_FPREM1_F            90)
    (UNSPEC_FPREM1_U            91)
 
+   (UNSPEC_C2_FLAG             95)
+
    ; SSP patterns
    (UNSPEC_SP_SET              100)
    (UNSPEC_SP_TEST             101)
    (UNSPEC_EXTRQ                131)   
    (UNSPEC_INSERTQI             132)
    (UNSPEC_INSERTQ              133)
+
+   ; For SSE4.1 support
+   (UNSPEC_BLENDV              134)
+   (UNSPEC_INSERTPS            135)
+   (UNSPEC_DP                  136)
+   (UNSPEC_MOVNTDQA            137)
+   (UNSPEC_MPSADBW             138)
+   (UNSPEC_PHMINPOSUW          139)
+   (UNSPEC_PTEST               140)
+   (UNSPEC_ROUND               141)
+
+   ; For SSE4.2 support
+   (UNSPEC_CRC32               143)
+   (UNSPEC_PCMPESTR            144)
+   (UNSPEC_PCMPISTR            145)
+
+   ;; For SSE5
+   (UNSPEC_SSE5_INTRINSIC      150)
+   (UNSPEC_SSE5_UNSIGNED_CMP   151)
+   (UNSPEC_SSE5_TRUEFALSE      152)
+   (UNSPEC_SSE5_PERMUTE                153)
+   (UNSPEC_SSE5_ASHIFT         154)
+   (UNSPEC_SSE5_LSHIFT         155)
+   (UNSPEC_FRCZ                        156)
+   (UNSPEC_CVTPH2PS            157)
+   (UNSPEC_CVTPS2PH            158)
   ])
 
 (define_constants
    (UNSPECV_CMPXCHG_2          11)
    (UNSPECV_XCHG               12)
    (UNSPECV_LOCK               13)
+   (UNSPECV_PROLOGUE_USE       14)
+  ])
+
+;; Constants to represent pcomtrue/pcomfalse varients
+(define_constants
+  [(PCOM_FALSE                 0)
+   (PCOM_TRUE                  1)
+   (COM_FALSE_S                        2)
+   (COM_FALSE_P                        3)
+   (COM_TRUE_S                 4)
+   (COM_TRUE_P                 5)
   ])
 
 ;; Registers by name.
    push,pop,call,callv,leave,
    str,bitmanip,
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
-   sselog,sselog1,sseiadd,sseishft,sseimul,
-   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins,
+   sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,ssediv,sseins,
+   ssemuladd,sse4arg,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 
 (define_attr "unit" "integer,i387,sse,mmx,unknown"
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
           (const_string "i387")
-        (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
-                         sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins")
+        (eq_attr "type" "sselog,sselog1,sseiadd,sseiadd1,sseishft,sseimul,
+                         sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,
+                         ssecvt1,sseicvt,ssediv,sseins,ssemuladd,sse4arg")
           (const_string "sse")
         (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
           (const_string "mmx")
        ]
        (const_int 0)))
 
+;; There are also additional prefixes in SSSE3.
+(define_attr "prefix_extra" "" (const_int 0))
+
 ;; Set when modrm byte is used.
 (define_attr "modrm" ""
   (cond [(eq_attr "type" "str,leave")
         (plus (plus (attr "modrm")
                     (plus (attr "prefix_0f")
                           (plus (attr "prefix_rex")
-                                (const_int 1))))
+                                (plus (attr "prefix_extra")
+                                      (const_int 1)))))
               (plus (attr "prefix_rep")
                     (plus (attr "prefix_data16")
                           (plus (attr "length_immediate")
                 "!alu1,negnot,ishift1,
                   imov,imovx,icmp,test,bitmanip,
                   fmov,fcmp,fsgn,
-                  sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
-                  mmx,mmxmov,mmxcmp,mmxcvt")
+                  sse,ssemov,ssecmp,ssecomi,ssecvt,ssecvt1,sseicvt,sselog1,
+                  sseiadd1,mmx,mmxmov,mmxcmp,mmxcvt")
              (match_operand 2 "memory_operand" ""))
           (const_string "load")
-        (and (eq_attr "type" "icmov")
+        (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
              (match_operand 3 "memory_operand" ""))
           (const_string "load")
        ]
   [(set_attr "length" "128")
    (set_attr "type" "multi")])
 
-;; All x87 floating point modes
-(define_mode_macro X87MODEF [SF DF XF])
+(define_code_iterator plusminus [plus minus])
+
+;; Base name for define_insn and insn mnemonic.
+(define_code_attr addsub [(plus "add") (minus "sub")])
 
-;; x87 SFmode and DFMode floating point modes
-(define_mode_macro X87MODEF12 [SF DF])
+;; Mark commutative operators as such in constraints.
+(define_code_attr comm [(plus "%") (minus "")])
+
+;; All single word integer modes.
+(define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
+
+;; Instruction suffix for integer modes.
+(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+
+;; Register class for integer modes.
+(define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
+
+;; Immediate operand constraint for integer modes.
+(define_mode_attr i [(QI "i") (HI "i") (SI "i") (DI "e")])
+
+;; General operand predicate for integer modes.
+(define_mode_attr general_operand
+       [(QI "general_operand")
+        (HI "general_operand")
+        (SI "general_operand")
+        (DI "x86_64_general_operand")])
+
+;; SSE and x87 SFmode and DFmode floating point modes
+(define_mode_iterator MODEF [SF DF])
+
+;; All x87 floating point modes
+(define_mode_iterator X87MODEF [SF DF XF])
 
 ;; All integer modes handled by x87 fisttp operator.
-(define_mode_macro X87MODEI [HI SI DI])
+(define_mode_iterator X87MODEI [HI SI DI])
 
 ;; All integer modes handled by integer x87 operators.
-(define_mode_macro X87MODEI12 [HI SI])
-
-;; All SSE floating point modes
-(define_mode_macro SSEMODEF [SF DF])
+(define_mode_iterator X87MODEI12 [HI SI])
 
 ;; All integer modes handled by SSE cvtts?2si* operators.
-(define_mode_macro SSEMODEI24 [SI DI])
+(define_mode_iterator SSEMODEI24 [SI DI])
 
 ;; SSE asm suffix for floating point modes
 (define_mode_attr ssemodefsuffix [(SF "s") (DF "d")])
 
 (define_expand "cmpsi_1"
   [(set (reg:CC FLAGS_REG)
-       (compare:CC (match_operand:SI 0 "nonimmediate_operand" "rm,r")
-                   (match_operand:SI 1 "general_operand" "ri,mr")))]
+       (compare:CC (match_operand:SI 0 "nonimmediate_operand" "")
+                   (match_operand:SI 1 "general_operand" "")))]
   ""
   "")
 
   DONE;
 })
 
-(define_expand "cmpdf"
-  [(set (reg:CC FLAGS_REG)
-       (compare:CC (match_operand:DF 0 "cmp_fp_expander_operand" "")
-                   (match_operand:DF 1 "cmp_fp_expander_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-{
-  ix86_compare_op0 = operands[0];
-  ix86_compare_op1 = operands[1];
-  DONE;
-})
-
-(define_expand "cmpsf"
+(define_expand "cmp<mode>"
   [(set (reg:CC FLAGS_REG)
-       (compare:CC (match_operand:SF 0 "cmp_fp_expander_operand" "")
-                   (match_operand:SF 1 "cmp_fp_expander_operand" "")))]
-  "TARGET_80387 || TARGET_SSE_MATH"
+       (compare:CC (match_operand:MODEF 0 "cmp_fp_expander_operand" "")
+                   (match_operand:MODEF 1 "cmp_fp_expander_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
   ix86_compare_op0 = operands[0];
   ix86_compare_op1 = operands[1];
             (match_operand 1 "register_operand" "f")
             (match_operand 2 "const0_operand" "X"))]
        UNSPEC_FNSTSW))]
-  "TARGET_80387
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
           ]
           (const_string "XF")))])
 
-(define_insn "*cmpfp_sf"
-  [(set (match_operand:HI 0 "register_operand" "=a")
+(define_insn_and_split "*cmpfp_0_cc"
+  [(set (reg:CCFP FLAGS_REG)
+       (compare:CCFP
+         (match_operand 1 "register_operand" "f")
+         (match_operand 2 "const0_operand" "X")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
        (unspec:HI
-         [(compare:CCFP
-            (match_operand:SF 1 "register_operand" "f")
-            (match_operand:SF 2 "nonimmediate_operand" "fm"))]
-         UNSPEC_FNSTSW))]
-  "TARGET_80387"
-  "* return output_fp_compare (insn, operands, 0, 0);"
+         [(compare:CCFP (match_dup 1)(match_dup 2))]
+       UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
   [(set_attr "type" "multi")
    (set_attr "unit" "i387")
-   (set_attr "mode" "SF")])
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+             (const_string "SF")
+           (match_operand:DF 1 "" "")
+             (const_string "DF")
+          ]
+          (const_string "XF")))])
 
-(define_insn "*cmpfp_df"
+(define_insn "*cmpfp_xf"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
          [(compare:CCFP
-            (match_operand:DF 1 "register_operand" "f")
-            (match_operand:DF 2 "nonimmediate_operand" "fm"))]
+            (match_operand:XF 1 "register_operand" "f")
+            (match_operand:XF 2 "register_operand" "f"))]
          UNSPEC_FNSTSW))]
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
    (set_attr "unit" "i387")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "XF")])
 
-(define_insn "*cmpfp_xf"
+(define_insn_and_split "*cmpfp_xf_cc"
+  [(set (reg:CCFP FLAGS_REG)
+       (compare:CCFP
+         (match_operand:XF 1 "register_operand" "f")
+         (match_operand:XF 2 "register_operand" "f")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387
+   && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (unspec:HI
+         [(compare:CCFP (match_dup 1)(match_dup 2))]
+       UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "XF")])
+
+(define_insn "*cmpfp_<mode>"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
          [(compare:CCFP
-            (match_operand:XF 1 "register_operand" "f")
-            (match_operand:XF 2 "register_operand" "f"))]
+            (match_operand:MODEF 1 "register_operand" "f")
+            (match_operand:MODEF 2 "nonimmediate_operand" "fm"))]
          UNSPEC_FNSTSW))]
   "TARGET_80387"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
    (set_attr "unit" "i387")
-   (set_attr "mode" "XF")])
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*cmpfp_<mode>_cc"
+  [(set (reg:CCFP FLAGS_REG)
+       (compare:CCFP
+         (match_operand:MODEF 1 "register_operand" "f")
+         (match_operand:MODEF 2 "nonimmediate_operand" "fm")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "TARGET_80387
+   && TARGET_SAHF && !TARGET_CMOVE"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (unspec:HI
+         [(compare:CCFP (match_dup 1)(match_dup 2))]
+       UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*cmpfp_u"
   [(set (match_operand:HI 0 "register_operand" "=a")
             (match_operand 1 "register_operand" "f")
             (match_operand 2 "register_operand" "f"))]
          UNSPEC_FNSTSW))]
-  "TARGET_80387
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])"
   "* return output_fp_compare (insn, operands, 0, 1);"
   [(set_attr "type" "multi")
           ]
           (const_string "XF")))])
 
+(define_insn_and_split "*cmpfp_u_cc"
+  [(set (reg:CCFPU FLAGS_REG)
+       (compare:CCFPU
+         (match_operand 1 "register_operand" "f")
+         (match_operand 2 "register_operand" "f")))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && GET_MODE (operands[1]) == GET_MODE (operands[2])"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (unspec:HI
+         [(compare:CCFPU (match_dup 1)(match_dup 2))]
+       UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set (attr "mode")
+     (cond [(match_operand:SF 1 "" "")
+             (const_string "SF")
+           (match_operand:DF 1 "" "")
+             (const_string "DF")
+          ]
+          (const_string "XF")))])
+
 (define_insn "*cmpfp_<mode>"
   [(set (match_operand:HI 0 "register_operand" "=a")
        (unspec:HI
             (match_operator 3 "float_operator"
               [(match_operand:X87MODEI12 2 "memory_operand" "m")]))]
          UNSPEC_FNSTSW))]
-  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_USE_<MODE>MODE_FIOP
    && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
    (set_attr "fp_int_src" "true")
    (set_attr "mode" "<MODE>")])
 
+(define_insn_and_split "*cmpfp_<mode>_cc"
+  [(set (reg:CCFP FLAGS_REG)
+       (compare:CCFP
+         (match_operand 1 "register_operand" "f")
+         (match_operator 3 "float_operator"
+           [(match_operand:X87MODEI12 2 "memory_operand" "m")])))
+   (clobber (match_operand:HI 0 "register_operand" "=a"))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_SAHF && !TARGET_CMOVE
+   && TARGET_USE_<MODE>MODE_FIOP
+   && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+       (unspec:HI
+         [(compare:CCFP
+            (match_dup 1)
+            (match_op_dup 3 [(match_dup 2)]))]
+       UNSPEC_FNSTSW))
+   (set (reg:CC FLAGS_REG)
+       (unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
+  ""
+  [(set_attr "type" "multi")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")
+   (set_attr "mode" "<MODE>")])
+
 ;; FP compares, step 2
 ;; Move the fpsw to ax.
 
 
 (define_insn "x86_sahf_1"
   [(set (reg:CC FLAGS_REG)
-       (unspec:CC [(match_operand:HI 0 "register_operand" "a")] UNSPEC_SAHF))]
-  "!TARGET_64BIT"
-  "sahf"
+       (unspec:CC [(match_operand:HI 0 "register_operand" "a")]
+                  UNSPEC_SAHF))]
+  "TARGET_SAHF"
+{
+#ifdef HAVE_AS_IX86_SAHF
+  return "sahf";
+#else
+  return ".byte\t0x9e";
+#endif
+}
   [(set_attr "length" "1")
    (set_attr "athlon_decode" "vector")
    (set_attr "amdfam10_decode" "direct")
   [(set (reg:CCFP FLAGS_REG)
        (compare:CCFP (match_operand 0 "register_operand" "f")
                      (match_operand 1 "register_operand" "f")))]
-  "TARGET_80387 && TARGET_CMOVE
-   && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))
-   && FLOAT_MODE_P (GET_MODE (operands[0]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && TARGET_CMOVE
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH)
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 0);"
   [(set_attr "type" "fcmp")
   [(set (reg:CCFPU FLAGS_REG)
        (compare:CCFPU (match_operand 0 "register_operand" "f")
                       (match_operand 1 "register_operand" "f")))]
-  "TARGET_80387 && TARGET_CMOVE
-   && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))
-   && FLOAT_MODE_P (GET_MODE (operands[0]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[0]))
+   && TARGET_CMOVE
+   && !(SSE_FLOAT_MODE_P (GET_MODE (operands[0])) && TARGET_SSE_MATH)
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 1);"
   [(set_attr "type" "fcmp")
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed
    && operands[1] == constm1_rtx
-   && (TARGET_PENTIUM || optimize_size)"
+   && (TARGET_MOVE_M1_VIA_OR || optimize_size)"
 {
   operands[1] = constm1_rtx;
   return "or{l}\t{%1, %0|%0, %1}";
   [(set (match_operand:DI 0 "push_operand" "")
         (match_operand:DI 1 "immediate_operand" ""))]
   "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-                   ? flow2_completed : reload_completed)
+                   ? epilogue_completed : reload_completed)
    && !symbolic_operand (operands[1], DImode)
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 0) (match_dup 1))
   [(set (match_operand:DI 0 "register_operand" "=r")
        (match_operand:DI 1 "const_int_operand" "i"))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (TARGET_PENTIUM || optimize_size)
+  "TARGET_64BIT && (TARGET_MOVE_M1_VIA_OR || optimize_size)
    && reload_completed
    && operands[1] == constm1_rtx"
 {
 
 (define_insn "*movdi_2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-                       "=r  ,o  ,*y,m*y,*y,*Y2,m  ,*Y2,*Y2,*x,m ,*x,*x")
+                       "=r  ,o  ,*y,m*y,*y,*Yt,m  ,*Yt,*Yt,*x,m ,*x,*x")
        (match_operand:DI 1 "general_operand"
-                       "riFo,riF,C ,*y ,m ,C  ,*Y2,*Y2,m  ,C ,*x,*x,m "))]
+                       "riFo,riF,C ,*y ,m ,C  ,*Yt,*Yt,m  ,C ,*x,*x,m "))]
   "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    #
 
 (define_insn "*movdi_1_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-         "=r,r  ,r,m ,!m,*y,*y,?r ,m ,?*Ym,*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym")
+         "=r,r  ,r,m ,!m,*y,*y,?r ,m ,?*Ym,?*y,*x,*x,?r ,m,?*Yi,*x,?*x,?*Ym")
        (match_operand:DI 1 "general_operand"
-         "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r   ,m ,C ,*x,*Yi,*x,r  ,m ,*Ym,*x"))]
+         "Z ,rem,i,re,n ,C ,*y,*Ym,*y,r   ,m  ,C ,*x,*Yi,*x,r  ,m ,*Ym,*x"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
   [(set (match_operand:DI 0 "memory_operand" "")
         (match_operand:DI 1 "immediate_operand" ""))]
   "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-                   ? flow2_completed : reload_completed)
+                   ? epilogue_completed : reload_completed)
    && !symbolic_operand (operands[1], DImode)
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 2) (match_dup 3))
 {
   if (TARGET_64BIT)
     ix86_expand_move (TImode, operands);
+  else if (push_operand (operands[0], TImode))
+    ix86_expand_push (TImode, operands[1]);
   else
     ix86_expand_vector_move (TImode, operands);
   DONE;
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
+;; This expands to what emit_move_complex would generate if we didn't
+;; have a movti pattern.  Having this avoids problems with reload on
+;; 32-bit targets when SSE is present, but doesn't seem to be harmful
+;; to have around all the time.
+(define_expand "movcdi"
+  [(set (match_operand:CDI 0 "nonimmediate_operand" "")
+       (match_operand:CDI 1 "general_operand" ""))]
+  ""
+{
+  if (push_operand (operands[0], CDImode))
+    emit_move_complex_push (CDImode, operands[0], operands[1]);
+  else
+    emit_move_complex_parts (operands[0], operands[1]);
+  DONE;
+})
+
 (define_expand "movsf"
   [(set (match_operand:SF 0 "nonimmediate_operand" "")
        (match_operand:SF 1 "general_operand" ""))]
        (match_operand:SF 1 "memory_operand" ""))]
   "reload_completed
    && MEM_P (operands[1])
-   && constant_pool_reference_p (operands[1])"
+   && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0)
-       (match_dup 1))]
-  "operands[1] = avoid_constant_pool_reference (operands[1]);")
+       (match_dup 2))])
 
 
 ;; %%% Kill this when call knows how to work this out.
 
 (define_insn "*movsf_1"
   [(set (match_operand:SF 0 "nonimmediate_operand"
-         "=f,m,f,r  ,m ,x,x,x ,m,*y,m ,*y,Yi,r ,*Ym,r  ")
+         "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
        (match_operand:SF 1 "general_operand"
-         "fm,f,G,rmF,Fr,C,x,xm,x,m ,*y,*y,r ,Yi,r  ,*Ym"))]
+         "fm,f,G,rmF,Fr,C,x,xm,x,m  ,*y,*y ,r  ,Yi,r   ,*Ym"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
   switch (which_alternative)
     {
     case 0:
-      return output_387_reg_move (insn, operands);
-
     case 1:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0";
-      else
-        return "fst%z0\t%y0";
+      return output_387_reg_move (insn, operands);
 
     case 2:
       return standard_80387_constant_opcode (operands[1]);
 
 (define_insn "*pushdf_nointeger"
   [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
-       (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
+       (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Yt"))]
   "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES"
 {
   /* This insn should be already split before reg-stack.  */
 
 (define_insn "*pushdf_integer"
   [(set (match_operand:DF 0 "push_operand" "=<,<,<")
-       (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
+       (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Yt"))]
   "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
 {
   /* This insn should be already split before reg-stack.  */
 
 (define_insn "*movdf_nointeger"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-                       "=f,m,f,*r  ,o  ,Y2*x,Y2*x,Y2*x ,m  ")
+                       "=f,m,f,*r  ,o  ,Yt*x,Yt*x,Yt*x ,m  ")
        (match_operand:DF 1 "general_operand"
-                       "fm,f,G,*roF,F*r,C   ,Y2*x,mY2*x,Y2*x"))]
+                       "fm,f,G,*roF,F*r,C   ,Yt*x,mYt*x,Yt*x"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
    && (reload_in_progress || reload_completed
   switch (which_alternative)
     {
     case 0:
-      return output_387_reg_move (insn, operands);
-
     case 1:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0";
-      else
-        return "fst%z0\t%y0";
+      return output_387_reg_move (insn, operands);
 
     case 2:
       return standard_80387_constant_opcode (operands[1]);
 
 (define_insn "*movdf_integer_rex64"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-               "=f,m,f,r  ,m ,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+               "=f,m,f,r  ,m ,Yt*x,Yt*x,Yt*x,m   ,Yi,r ")
        (match_operand:DF 1 "general_operand"
-               "fm,f,G,rmF,Fr,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+               "fm,f,G,rmF,Fr,C   ,Yt*x,m   ,Yt*x,r ,Yi"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
   switch (which_alternative)
     {
     case 0:
-      return output_387_reg_move (insn, operands);
-
     case 1:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0";
-      else
-        return "fst%z0\t%y0";
+      return output_387_reg_move (insn, operands);
 
     case 2:
       return standard_80387_constant_opcode (operands[1]);
 
 (define_insn "*movdf_integer"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-               "=f,m,f,r  ,o ,Y2*x,Y2*x,Y2*x,m   ")
+               "=f,m,f,r  ,o ,Yt*x,Yt*x,Yt*x,m   ")
        (match_operand:DF 1 "general_operand"
-               "fm,f,G,roF,Fr,C   ,Y2*x,m   ,Y2*x"))]
+               "fm,f,G,roF,Fr,C   ,Yt*x,m   ,Yt*x"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && !optimize_size && TARGET_INTEGER_DFMODE_MOVES
    && (reload_in_progress || reload_completed
   switch (which_alternative)
     {
     case 0:
-      return output_387_reg_move (insn, operands);
-
     case 1:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0";
-      else
-        return "fst%z0\t%y0";
+      return output_387_reg_move (insn, operands);
 
     case 2:
       return standard_80387_constant_opcode (operands[1]);
   switch (which_alternative)
     {
     case 0:
-      return output_387_reg_move (insn, operands);
-
     case 1:
-      /* There is no non-popping store to memory for XFmode.  So if
-        we need one, follow the store with a load.  */
-      if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0\;fld%z0\t%y0";
-      else
-        return "fstp%z0\t%y0";
+      return output_387_reg_move (insn, operands);
 
     case 2:
       return standard_80387_constant_opcode (operands[1]);
   switch (which_alternative)
     {
     case 0:
-      return output_387_reg_move (insn, operands);
-
     case 1:
-      /* There is no non-popping store to memory for XFmode.  So if
-        we need one, follow the store with a load.  */
-      if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0\;fld%z0\t%y0";
-      else
-        return "fstp%z0\t%y0";
+      return output_387_reg_move (insn, operands);
 
     case 2:
       return standard_80387_constant_opcode (operands[1]);
   [(set_attr "type" "fmov,fmov,fmov,multi,multi")
    (set_attr "mode" "XF,XF,XF,SI,SI")])
 
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+       (match_operand:TF 1 "nonimmediate_operand" ""))]
+  "TARGET_64BIT"
+{
+  ix86_expand_move (TFmode, operands);
+  DONE;
+})
+
+(define_insn "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
+       (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+  "TARGET_64BIT
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+    case 1:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "movaps\t{%1, %0|%0, %1}";
+      else
+       return "movdqa\t{%1, %0|%0, %1}";
+    case 2:
+      if (get_attr_mode (insn) == MODE_V4SF)
+       return "xorps\t%0, %0";
+      else
+       return "pxor\t%0, %0";
+    case 3:
+    case 4:
+       return "#";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "ssemov,ssemov,sselog1,*,*")
+   (set (attr "mode")
+        (cond [(eq_attr "alternative" "0,2")
+                (if_then_else
+                  (ne (symbol_ref "optimize_size")
+                      (const_int 0))
+                  (const_string "V4SF")
+                  (const_string "TI"))
+              (eq_attr "alternative" "1")
+                (if_then_else
+                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+                           (const_int 0))
+                       (ne (symbol_ref "optimize_size")
+                           (const_int 0)))
+                  (const_string "V4SF")
+                  (const_string "TI"))]
+              (const_string "DI")))])
+
 (define_split
   [(set (match_operand 0 "nonimmediate_operand" "")
        (match_operand 1 "general_operand" ""))]
        (match_operand 1 "memory_operand" ""))]
   "reload_completed
    && MEM_P (operands[1])
-   && (GET_MODE (operands[0]) == XFmode
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == SFmode
        || GET_MODE (operands[0]) == DFmode)
-   && constant_pool_reference_p (operands[1])"
-  [(set (match_dup 0) (match_dup 1))]
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))]
 {
-  rtx c = avoid_constant_pool_reference (operands[1]);
+  rtx c = operands[2];
   rtx r = operands[0];
 
   if (GET_CODE (r) == SUBREG)
     }
   else if (MMX_REG_P (r))
     FAIL;
-
-  operands[1] = c;
 })
 
 (define_split
        (float_extend (match_operand 1 "memory_operand" "")))]
   "reload_completed
    && MEM_P (operands[1])
-   && (GET_MODE (operands[0]) == XFmode
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == SFmode
        || GET_MODE (operands[0]) == DFmode)
-   && constant_pool_reference_p (operands[1])"
-  [(set (match_dup 0) (match_dup 1))]
+   && (operands[2] = find_constant_src (insn))"
+  [(set (match_dup 0) (match_dup 2))]
 {
-  rtx c = avoid_constant_pool_reference (SET_SRC (PATTERN (curr_insn)));
+  rtx c = operands[2];
   rtx r = operands[0];
 
   if (GET_CODE (r) == SUBREG)
     }
   else if (MMX_REG_P (r))
     FAIL;
-
-  operands[1] = c;
 })
 
 (define_insn "swapxf"
     operands[1] = CONST1_RTX (<MODE>mode);
 })
 
-(define_expand "movtf"
+(define_split
   [(set (match_operand:TF 0 "nonimmediate_operand" "")
-       (match_operand:TF 1 "nonimmediate_operand" ""))]
-  "TARGET_64BIT"
-{
-  ix86_expand_move (TFmode, operands);
-  DONE;
-})
+        (match_operand:TF 1 "general_operand" ""))]
+  "reload_completed
+   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+\f
+;; Zero extension instructions
 
-(define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o,x,x,xm")
-       (match_operand:TF 1 "general_operand" "riFo,riF,C,xm,x"))]
-  "TARGET_64BIT
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+     (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
+  ""
 {
-  switch (which_alternative)
+  if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
     {
-    case 0:
-    case 1:
-      return "#";
-    case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
-      else
-       return "pxor\t%0, %0";
-    case 3:
-    case 4:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
-      else
-       return "movdqa\t{%1, %0|%0, %1}";
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
-   (set (attr "mode")
-        (cond [(eq_attr "alternative" "2,3")
-                (if_then_else
-                  (ne (symbol_ref "optimize_size")
-                      (const_int 0))
-                  (const_string "V4SF")
-                  (const_string "TI"))
-              (eq_attr "alternative" "4")
-                (if_then_else
-                  (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
-                           (const_int 0))
-                       (ne (symbol_ref "optimize_size")
-                           (const_int 0)))
-                  (const_string "V4SF")
-                  (const_string "TI"))]
-              (const_string "DI")))])
-
-(define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-        (match_operand:TF 1 "general_operand" ""))]
-  "reload_completed && !SSE_REG_P (operands[0])
-   && !SSE_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-\f
-;; Zero extension instructions
-
-(define_expand "zero_extendhisi2"
-  [(set (match_operand:SI 0 "register_operand" "")
-     (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
-  ""
-{
-  if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
-    {
-      operands[1] = force_reg (HImode, operands[1]);
-      emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
-      DONE;
+      operands[1] = force_reg (HImode, operands[1]);
+      emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
+      DONE;
     }
 })
 
 
 ;; %%% Kill me once multi-word ops are sane.
 (define_expand "zero_extendsidi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-     (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "rm")))]
+  [(set (match_operand:DI 0 "register_operand" "")
+     (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "")))]
   ""
 {
   if (!TARGET_64BIT)
 })
 
 (define_insn "zero_extendsidi2_32"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,*y,?*Yi,*Y2")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Yt")
        (zero_extend:DI
-        (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r   ,m ,r   ,m")))
+        (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r   ,m  ,r   ,m")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT"
   "@
    (set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")])
 
 (define_insn "zero_extendsidi2_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,*y,?*Yi,*Y2")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Yt")
      (zero_extend:DI
-       (match_operand:SI 1 "nonimmediate_operand"  "rm,0,r   ,m ,r   ,m")))]
+       (match_operand:SI 1 "nonimmediate_operand"  "rm,0,r   ,m  ,r   ,m")))]
   "TARGET_64BIT"
   "@
    mov\t{%k1, %k0|%k0, %k1}
   emit_move_insn (operands[3], operands[1]);
 
   /* Generate a cltd if possible and doing so it profitable.  */
-  if (true_regnum (operands[1]) == 0
-      && true_regnum (operands[2]) == 1
-      && (optimize_size || TARGET_USE_CLTD))
+  if ((optimize_size || TARGET_USE_CLTD)
+      && true_regnum (operands[1]) == 0
+      && true_regnum (operands[2]) == 1)
     {
       emit_insn (gen_ashrsi3_31 (operands[2], operands[1], GEN_INT (31)));
     }
     emit_move_insn (operands[3], operands[1]);
 
   /* Generate a cltd if possible and doing so it profitable.  */
-  if (true_regnum (operands[3]) == 0
-      && (optimize_size || TARGET_USE_CLTD))
+  if ((optimize_size || TARGET_USE_CLTD)
+      && true_regnum (operands[3]) == 0)
     {
       emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31)));
       DONE;
     }
 })
 
+/* For converting SF(xmm2) to DF(xmm1), use the following code instead of
+   cvtss2sd:
+      unpcklps xmm2,xmm2   ; packed conversion might crash on signaling NaNs
+      cvtps2pd xmm2,xmm1
+   We do the conversion post reload to avoid producing of 128bit spills
+   that might lead to ICE on 32bit target.  The sequence unlikely combine
+   anyway.  */
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (float_extend:DF
+         (match_operand:SF 1 "nonimmediate_operand" "")))]
+  "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size 
+   && reload_completed && SSE_REG_P (operands[0])"
+   [(set (match_dup 2)
+        (float_extend:V2DF
+          (vec_select:V2SF
+            (match_dup 3)
+            (parallel [(const_int 0) (const_int 1)]))))]
+{
+  operands[2] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+  operands[3] = simplify_gen_subreg (V4SFmode, operands[0], DFmode, 0);
+  /* Use movss for loading from memory, unpcklps reg, reg for registers.
+     Try to avoid move when unpacking can be done in source.  */
+  if (REG_P (operands[1]))
+    {
+      /* If it is unsafe to overwrite upper half of source, we need
+        to move to destination and unpack there.  */
+      if ((ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+          || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 4)
+         && true_regnum (operands[0]) != true_regnum (operands[1]))
+       {
+         rtx tmp = gen_rtx_REG (SFmode, true_regnum (operands[0]));
+         emit_move_insn (tmp, operands[1]);
+       }
+      else
+       operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
+      emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3]));
+    }
+  else
+    emit_insn (gen_vec_setv4sf_0 (operands[3], 
+                                 CONST0_RTX (V4SFmode), operands[1]));
+})
+
 (define_insn "*extendsfdf2_mixed"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
         (float_extend:DF
   switch (which_alternative)
     {
     case 0:
-      return output_387_reg_move (insn, operands);
-
     case 1:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0";
-      else
-        return "fst%z0\t%y0";
+      return output_387_reg_move (insn, operands);
 
     case 2:
       return "cvtss2sd\t{%1, %0|%0, %1}";
   [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m")
         (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
   "TARGET_80387"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      return output_387_reg_move (insn, operands);
-
-    case 1:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0";
-      else
-        return "fst%z0\t%y0";
-
-    default:
-      gcc_unreachable ();
-    }
-}
-  [(set_attr "type" "fmov")
-   (set_attr "mode" "SF,XF")])
-
-(define_expand "extendsfxf2"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "")
-        (float_extend:XF (match_operand:SF 1 "general_operand" "")))]
-  "TARGET_80387"
-{
-  /* ??? Needed for compress_float_constant since all fp constants
-     are LEGITIMATE_CONSTANT_P.  */
-  if (GET_CODE (operands[1]) == CONST_DOUBLE)
-    {
-      if (standard_80387_constant_p (operands[1]) > 0)
-       {
-         operands[1] = simplify_const_unary_operation
-           (FLOAT_EXTEND, XFmode, operands[1], SFmode);
-         emit_move_insn_1 (operands[0], operands[1]);
-         DONE;
-       }
-      operands[1] = validize_mem (force_const_mem (SFmode, operands[1]));
-    }
-})
-
-(define_insn "*extendsfxf2_i387"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
-        (float_extend:XF (match_operand:SF 1 "nonimmediate_operand" "fm,f")))]
-  "TARGET_80387"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      return output_387_reg_move (insn, operands);
-
-    case 1:
-      /* There is no non-popping store to memory for XFmode.  So if
-        we need one, follow the store with a load.  */
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0";
-      else
-        return "fstp%z0\t%y0\n\tfld%z0\t%y0";
-
-    default:
-      gcc_unreachable ();
-    }
-}
+  "* return output_387_reg_move (insn, operands);"
   [(set_attr "type" "fmov")
    (set_attr "mode" "SF,XF")])
 
-(define_expand "extenddfxf2"
+(define_expand "extend<mode>xf2"
   [(set (match_operand:XF 0 "nonimmediate_operand" "")
-        (float_extend:XF (match_operand:DF 1 "general_operand" "")))]
+        (float_extend:XF (match_operand:MODEF 1 "general_operand" "")))]
   "TARGET_80387"
 {
   /* ??? Needed for compress_float_constant since all fp constants
       if (standard_80387_constant_p (operands[1]) > 0)
        {
          operands[1] = simplify_const_unary_operation
-           (FLOAT_EXTEND, XFmode, operands[1], DFmode);
+           (FLOAT_EXTEND, XFmode, operands[1], <MODE>mode);
          emit_move_insn_1 (operands[0], operands[1]);
          DONE;
        }
-      operands[1] = validize_mem (force_const_mem (DFmode, operands[1]));
+      operands[1] = validize_mem (force_const_mem (<MODE>mode, operands[1]));
     }
 })
 
-(define_insn "*extenddfxf2_i387"
+(define_insn "*extend<mode>xf2_i387"
   [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m")
-        (float_extend:XF (match_operand:DF 1 "nonimmediate_operand" "fm,f")))]
+        (float_extend:XF
+         (match_operand:MODEF 1 "nonimmediate_operand" "fm,f")))]
   "TARGET_80387"
-{
-  switch (which_alternative)
-    {
-    case 0:
-      return output_387_reg_move (insn, operands);
-
-    case 1:
-      /* There is no non-popping store to memory for XFmode.  So if
-        we need one, follow the store with a load.  */
-      if (! find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-        return "fstp%z0\t%y0\n\tfld%z0\t%y0";
-      else
-        return "fstp%z0\t%y0";
-
-    default:
-      gcc_unreachable ();
-    }
-}
+  "* return output_387_reg_move (insn, operands);"
   [(set_attr "type" "fmov")
-   (set_attr "mode" "DF,XF")])
+   (set_attr "mode" "<MODE>,XF")])
 
 ;; %%% This seems bad bad news.
 ;; This cannot output into an f-reg because there is no way to be sure
     ;
   else
     {
-      rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP);
+      rtx temp = assign_386_stack_local (SFmode, SLOT_VIRTUAL);
       emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
       DONE;
     }
 })
 
+/* For converting DF(xmm2) to SF(xmm1), use the following code instead of
+   cvtsd2ss:
+      unpcklpd xmm2,xmm2   ; packed conversion might crash on signaling NaNs
+      cvtpd2ps xmm2,xmm1
+   We do the conversion post reload to avoid producing of 128bit spills
+   that might lead to ICE on 32bit target.  The sequence unlikely combine
+   anyway.  */
+(define_split
+  [(set (match_operand:SF 0 "register_operand" "")
+        (float_truncate:SF
+         (match_operand:DF 1 "nonimmediate_operand" "")))]
+  "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size 
+   && reload_completed && SSE_REG_P (operands[0])"
+   [(set (match_dup 2)
+        (vec_concat:V4SF
+          (float_truncate:V2SF
+            (match_dup 4))
+          (match_dup 3)))]
+{
+  operands[2] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+  operands[3] = CONST0_RTX (V2SFmode);
+  operands[4] = simplify_gen_subreg (V2DFmode, operands[0], SFmode, 0);
+  /* Use movsd for loading from memory, unpcklpd for registers.
+     Try to avoid move when unpacking can be done in source, or SSE3
+     movddup is available.  */
+  if (REG_P (operands[1]))
+    {
+      if (!TARGET_SSE3
+         && true_regnum (operands[0]) != true_regnum (operands[1])
+         && (ORIGINAL_REGNO (operands[1]) < FIRST_PSEUDO_REGISTER
+             || PSEUDO_REGNO_BYTES (ORIGINAL_REGNO (operands[1])) > 8))
+       {
+         rtx tmp = simplify_gen_subreg (DFmode, operands[0], SFmode, 0);
+         emit_move_insn (tmp, operands[1]);
+         operands[1] = tmp;
+       }
+      else if (!TARGET_SSE3)
+       operands[4] = simplify_gen_subreg (V2DFmode, operands[1], DFmode, 0);
+      emit_insn (gen_vec_dupv2df (operands[4], operands[1]));
+    }
+  else
+    emit_insn (gen_sse2_loadlpd (operands[4],
+                                CONST0_RTX (V2DFmode), operands[1]));
+})
+
 (define_expand "truncdfsf2_with_temp"
   [(parallel [(set (match_operand:SF 0 "" "")
                   (float_truncate:SF (match_operand:DF 1 "" "")))
   switch (which_alternative)
     {
     case 0:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-       return "fstp%z0\t%y0";
-      else
-       return "fst%z0\t%y0";
     case 1:
       return output_387_reg_move (insn, operands);
     case 2:
    (set_attr "mode" "SF")])
 
 (define_insn "*truncdfsf_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,?fx*r,Y2")
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,?fx*r,Yt")
        (float_truncate:SF
-         (match_operand:DF 1 "nonimmediate_operand" "f ,f    ,Y2m")))
+         (match_operand:DF 1 "nonimmediate_operand" "f ,f    ,Ytm")))
    (clobber (match_operand:SF 2 "memory_operand"     "=X,m    ,X"))]
   "TARGET_MIX_SSE_I387"
 {
   switch (which_alternative)
     {
     case 0:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-       return "fstp%z0\t%y0";
-      else
-       return "fst%z0\t%y0";
+      return output_387_reg_move (insn, operands);
+
     case 1:
       return "#";
     case 2:
   switch (which_alternative)
     {
     case 0:
-      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-       return "fstp%z0\t%y0";
-      else
-       return "fst%z0\t%y0";
+      return output_387_reg_move (insn, operands);
+
     case 1:
       return "#";
     default:
   "TARGET_80387
    && !(TARGET_SSE2 && TARGET_SSE_MATH)
    && !TARGET_MIX_SSE_I387"
-{
-  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-    return "fstp%z0\t%y0";
-  else
-    return "fst%z0\t%y0";
-}
+  "* return output_387_reg_move (insn, operands);"
   [(set_attr "type" "fmov")
    (set_attr "mode" "SF")])
 
   operands[1] = gen_rtx_REG (SFmode, true_regnum (operands[1]));
 })
 
-;; Conversion from XFmode to SFmode.
+;; Conversion from XFmode to {SF,DF}mode
 
-(define_expand "truncxfsf2"
-  [(parallel [(set (match_operand:SF 0 "nonimmediate_operand" "")
-                  (float_truncate:SF
-                   (match_operand:XF 1 "register_operand" "")))
+(define_expand "truncxf<mode>2"
+  [(parallel [(set (match_operand:MODEF 0 "nonimmediate_operand" "")
+                  (float_truncate:MODEF
+                    (match_operand:XF 1 "register_operand" "")))
              (clobber (match_dup 2))])]
   "TARGET_80387"
 {
   if (flag_unsafe_math_optimizations)
     {
-      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (SFmode);
-      emit_insn (gen_truncxfsf2_i387_noop (reg, operands[1]));
+      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_truncxf<mode>2_i387_noop (reg, operands[1]));
       if (reg != operands[0])
        emit_move_insn (operands[0], reg);
       DONE;
     }
   else
-    operands[2] = assign_386_stack_local (SFmode, SLOT_TEMP);
+    operands[2] = assign_386_stack_local (<MODE>mode, SLOT_VIRTUAL);
 })
 
 (define_insn "*truncxfsf2_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?f,?r,?x")
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=m,?fx*r")
        (float_truncate:SF
-        (match_operand:XF 1 "register_operand" "f,f,f,f")))
-   (clobber (match_operand:SF 2 "memory_operand" "=X,m,m,m"))]
+         (match_operand:XF 1 "register_operand" "f,f")))
+   (clobber (match_operand:SF 2 "memory_operand" "=X,m"))]
   "TARGET_80387"
 {
   gcc_assert (!which_alternative);
-  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-    return "fstp%z0\t%y0";
-  else
-    return "fst%z0\t%y0";
+  return output_387_reg_move (insn, operands);
 }
-  [(set_attr "type" "fmov,multi,multi,multi")
-   (set_attr "unit" "*,i387,i387,i387")
-   (set_attr "mode" "SF")])
-
-(define_insn "truncxfsf2_i387_noop"
-  [(set (match_operand:SF 0 "register_operand" "=f")
-       (float_truncate:SF (match_operand:XF 1 "register_operand" "f")))]
-  "TARGET_80387 && flag_unsafe_math_optimizations"
-  "* return output_387_reg_move (insn, operands);"
-  [(set_attr "type" "fmov")
-   (set_attr "mode" "SF")])
-
-(define_insn "*truncxfsf2_i387"
-  [(set (match_operand:SF 0 "memory_operand" "=m")
-       (float_truncate:SF
-        (match_operand:XF 1 "register_operand" "f")))]
-  "TARGET_80387"
-{
-  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-    return "fstp%z0\t%y0";
-  else
-    return "fst%z0\t%y0";
-}
-  [(set_attr "type" "fmov")
+  [(set_attr "type" "fmov,multi")
+   (set_attr "unit" "*,i387")
    (set_attr "mode" "SF")])
 
-(define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float_truncate:SF
-        (match_operand:XF 1 "register_operand" "")))
-   (clobber (match_operand:SF 2 "memory_operand" ""))]
-  "TARGET_80387 && reload_completed"
-  [(set (match_dup 2) (float_truncate:SF (match_dup 1)))
-   (set (match_dup 0) (match_dup 2))]
-  "")
-
-(define_split
-  [(set (match_operand:SF 0 "memory_operand" "")
-       (float_truncate:SF
-        (match_operand:XF 1 "register_operand" "")))
-   (clobber (match_operand:SF 2 "memory_operand" ""))]
-  "TARGET_80387"
-  [(set (match_dup 0) (float_truncate:SF (match_dup 1)))]
-  "")
-
-;; Conversion from XFmode to DFmode.
-
-(define_expand "truncxfdf2"
-  [(parallel [(set (match_operand:DF 0 "nonimmediate_operand" "")
-                  (float_truncate:DF
-                   (match_operand:XF 1 "register_operand" "")))
-             (clobber (match_dup 2))])]
-  "TARGET_80387"
-{
-  if (flag_unsafe_math_optimizations)
-    {
-      rtx reg = REG_P (operands[0]) ? operands[0] : gen_reg_rtx (DFmode);
-      emit_insn (gen_truncxfdf2_i387_noop (reg, operands[1]));
-      if (reg != operands[0])
-       emit_move_insn (operands[0], reg);
-      DONE;
-    }
-  else
-    operands[2] = assign_386_stack_local (DFmode, SLOT_TEMP);
-})
-
 (define_insn "*truncxfdf2_mixed"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?f,?r,?Y2*x")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?fYt*r")
        (float_truncate:DF
-        (match_operand:XF 1 "register_operand" "f,f,f,f")))
-   (clobber (match_operand:DF 2 "memory_operand" "=X,m,m,m"))]
+         (match_operand:XF 1 "register_operand" "f,f")))
+   (clobber (match_operand:DF 2 "memory_operand" "=X,m"))]
   "TARGET_80387"
 {
   gcc_assert (!which_alternative);
-  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-    return "fstp%z0\t%y0";
-  else
-    return "fst%z0\t%y0";
+  return output_387_reg_move (insn, operands);
 }
-  [(set_attr "type" "fmov,multi,multi,multi")
-   (set_attr "unit" "*,i387,i387,i387")
+  [(set_attr "type" "fmov,multi")
+   (set_attr "unit" "*,i387")
    (set_attr "mode" "DF")])
 
-(define_insn "truncxfdf2_i387_noop"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-       (float_truncate:DF (match_operand:XF 1 "register_operand" "f")))]
+(define_insn "truncxf<mode>2_i387_noop"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+       (float_truncate:MODEF
+         (match_operand:XF 1 "register_operand" "f")))]
   "TARGET_80387 && flag_unsafe_math_optimizations"
   "* return output_387_reg_move (insn, operands);"
   [(set_attr "type" "fmov")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*truncxfdf2_i387"
-  [(set (match_operand:DF 0 "memory_operand" "=m")
-       (float_truncate:DF
+(define_insn "*truncxf<mode>2_i387"
+  [(set (match_operand:MODEF 0 "memory_operand" "=m")
+       (float_truncate:MODEF
          (match_operand:XF 1 "register_operand" "f")))]
   "TARGET_80387"
-{
-  if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
-    return "fstp%z0\t%y0";
-  else
-    return "fst%z0\t%y0";
-}
+  "* return output_387_reg_move (insn, operands);"
   [(set_attr "type" "fmov")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (float_truncate:DF
-        (match_operand:XF 1 "register_operand" "")))
-   (clobber (match_operand:DF 2 "memory_operand" ""))]
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float_truncate:MODEF
+         (match_operand:XF 1 "register_operand" "")))
+   (clobber (match_operand:MODEF 2 "memory_operand" ""))]
   "TARGET_80387 && reload_completed"
-  [(set (match_dup 2) (float_truncate:DF (match_dup 1)))
+  [(set (match_dup 2) (float_truncate:MODEF (match_dup 1)))
    (set (match_dup 0) (match_dup 2))]
   "")
 
 (define_split
-  [(set (match_operand:DF 0 "memory_operand" "")
-       (float_truncate:DF
-        (match_operand:XF 1 "register_operand" "")))
-   (clobber (match_operand:DF 2 "memory_operand" ""))]
+  [(set (match_operand:MODEF 0 "memory_operand" "")
+       (float_truncate:MODEF
+         (match_operand:XF 1 "register_operand" "")))
+   (clobber (match_operand:MODEF 2 "memory_operand" ""))]
   "TARGET_80387"
-  [(set (match_dup 0) (float_truncate:DF (match_dup 1)))]
+  [(set (match_dup 0) (float_truncate:MODEF (match_dup 1)))]
   "")
 \f
 ;; Signed conversion to DImode.
 
 (define_expand "fix_trunc<mode>di2"
   [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
-                   (fix:DI (match_operand:SSEMODEF 1 "register_operand" "")))
+                   (fix:DI (match_operand:MODEF 1 "register_operand" "")))
               (clobber (reg:CC FLAGS_REG))])]
   "TARGET_80387 || (TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode))"
 {
 
 (define_expand "fix_trunc<mode>si2"
   [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
-                  (fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))
+                  (fix:SI (match_operand:MODEF 1 "register_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
   "TARGET_80387 || SSE_FLOAT_MODE_P (<MODE>mode)"
 {
   [(parallel
     [(set (match_operand:SI 0 "register_operand" "")
          (unsigned_fix:SI
-           (match_operand:SSEMODEF 1 "nonimmediate_operand" "")))
+           (match_operand:MODEF 1 "nonimmediate_operand" "")))
      (use (match_dup 2))
      (clobber (match_scratch:<ssevecmode> 3 ""))
      (clobber (match_scratch:<ssevecmode> 4 ""))])]
 (define_insn_and_split "*fixuns_trunc<mode>_1"
   [(set (match_operand:SI 0 "register_operand" "=&x,&x")
        (unsigned_fix:SI
-         (match_operand:SSEMODEF 3 "nonimmediate_operand" "xm,xm")))
+         (match_operand:MODEF 3 "nonimmediate_operand" "xm,xm")))
    (use (match_operand:<ssevecmode> 4  "nonimmediate_operand" "m,x"))
    (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
    (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
 ;; Without these patterns, we'll try the unsigned SI conversion which
 ;; is complex for SSE, rather than the signed SI conversion, which isn't.
 
-(define_expand "fixuns_truncsfhi2"
-  [(set (match_dup 2)
-       (fix:SI (match_operand:SF 1 "nonimmediate_operand" "")))
-   (set (match_operand:HI 0 "nonimmediate_operand" "")
-       (subreg:HI (match_dup 2) 0))]
-  "TARGET_SSE_MATH"
-  "operands[2] = gen_reg_rtx (SImode);")
-
-(define_expand "fixuns_truncdfhi2"
+(define_expand "fixuns_trunc<mode>hi2"
   [(set (match_dup 2)
-       (fix:SI (match_operand:DF 1 "nonimmediate_operand" "")))
+       (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "")))
    (set (match_operand:HI 0 "nonimmediate_operand" "")
        (subreg:HI (match_dup 2) 0))]
-  "TARGET_SSE_MATH && TARGET_SSE2"
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
   "operands[2] = gen_reg_rtx (SImode);")
 
 ;; When SSE is available, it is always faster to use it!
-(define_insn "fix_truncsfdi_sse"
+(define_insn "fix_trunc<mode>di_sse"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (fix:DI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
-  "TARGET_64BIT && TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "cvttss2si{q}\t{%1, %0|%0, %1}"
+       (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
+   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "cvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")])
-
-(define_insn "fix_truncdfdi_sse"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (fix:DI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))]
-  "TARGET_64BIT && TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "cvttsd2si{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")])
-
-(define_insn "fix_truncsfsi_sse"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (fix:SI (match_operand:SF 1 "nonimmediate_operand" "x,xm")))]
-  "TARGET_SSE && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "cvttss2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")])
 
-(define_insn "fix_truncdfsi_sse"
+(define_insn "fix_trunc<mode>si_sse"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-       (fix:SI (match_operand:DF 1 "nonimmediate_operand" "x,xm")))]
-  "TARGET_SSE2 && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "cvttsd2si\t{%1, %0|%0, %1}"
+       (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode)
+   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
+  "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")])
 
 ;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
 (define_peephole2
-  [(set (match_operand:DF 0 "register_operand" "")
-       (match_operand:DF 1 "memory_operand" ""))
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (match_operand:MODEF 1 "memory_operand" ""))
    (set (match_operand:SSEMODEI24 2 "register_operand" "")
        (fix:SSEMODEI24 (match_dup 0)))]
-  "!TARGET_K8
-   && peep2_reg_dead_p (2, operands[0])"
-  [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]
-  "")
-
-(define_peephole2
-  [(set (match_operand:SF 0 "register_operand" "")
-       (match_operand:SF 1 "memory_operand" ""))
-   (set (match_operand:SSEMODEI24 2 "register_operand" "")
-       (fix:SSEMODEI24 (match_dup 0)))]
-  "!TARGET_K8
+  "TARGET_SHORTEN_X87_SSE
    && peep2_reg_dead_p (2, operands[0])"
   [(set (match_dup 2) (fix:SSEMODEI24 (match_dup 1)))]
   "")
 
 ;; Avoid vector decoded forms of the instruction.
 (define_peephole2
-  [(match_scratch:DF 2 "Y")
+  [(match_scratch:DF 2 "Yt")
    (set (match_operand:SSEMODEI24 0 "register_operand" "")
        (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
-  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+  "TARGET_AVOID_VECTOR_DECODE && !optimize_size"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
   "")
   [(match_scratch:SF 2 "x")
    (set (match_operand:SSEMODEI24 0 "register_operand" "")
        (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
-  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+  "TARGET_AVOID_VECTOR_DECODE && !optimize_size"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
   "")
 
 (define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
-  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
-       (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))]
-  "TARGET_FISTTP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+       (fix:X87MODEI (match_operand 1 "register_operand" "")))]
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
    && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
         && (TARGET_64BIT || <MODE>mode != DImode))
        && TARGET_SSE_MATH)
   [(set (match_operand:X87MODEI 0 "memory_operand" "=m")
        (fix:X87MODEI (match_operand 1 "register_operand" "f")))
    (clobber (match_scratch:XF 2 "=&1f"))]
-  "TARGET_FISTTP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
    && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
         && (TARGET_64BIT || <MODE>mode != DImode))
        && TARGET_SSE_MATH)"
        (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
    (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m"))
    (clobber (match_scratch:XF 3 "=&1f,&1f"))]
-  "TARGET_FISTTP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_FISTTP
    && !((SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
        && (TARGET_64BIT || <MODE>mode != DImode))
        && TARGET_SSE_MATH)"
 ;; clobbering insns can be used. Look at emit_i387_cw_initialization ()
 ;; function in i386.c.
 (define_insn_and_split "*fix_trunc<mode>_i387_1"
-  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
-       (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+       (fix:X87MODEI (match_operand 1 "register_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_80387 && !TARGET_FISTTP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
    && !(SSE_FLOAT_MODE_P (GET_MODE (operands[1]))
         && (TARGET_64BIT || <MODE>mode != DImode))
    && !(reload_completed || reload_in_progress)"
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))
    (clobber (match_scratch:XF 4 "=&1f"))]
-  "TARGET_80387 && !TARGET_FISTTP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
    && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
   "* return output_fix_trunc (insn, operands, 0);"
   [(set_attr "type" "fistp")
    (use (match_operand:HI 3 "memory_operand" "m,m"))
    (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
    (clobber (match_scratch:XF 5 "=&1f,&1f"))]
-  "TARGET_80387 && !TARGET_FISTTP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
    && !(TARGET_64BIT && SSE_FLOAT_MODE_P (GET_MODE (operands[1])))"
   "#"
   [(set_attr "type" "fistp")
        (fix:X87MODEI12 (match_operand 1 "register_operand" "f")))
    (use (match_operand:HI 2 "memory_operand" "m"))
    (use (match_operand:HI 3 "memory_operand" "m"))]
-  "TARGET_80387 && !TARGET_FISTTP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
    && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
   "* return output_fix_trunc (insn, operands, 0);"
   [(set_attr "type" "fistp")
    (use (match_operand:HI 2 "memory_operand" "m,m"))
    (use (match_operand:HI 3 "memory_operand" "m,m"))
    (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
-  "TARGET_80387 && !TARGET_FISTTP
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && !TARGET_FISTTP
    && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
   "#"
   [(set_attr "type" "fistp")
 ;; Even though we only accept memory inputs, the backend _really_
 ;; wants to be able to do this between registers.
 
-(define_expand "floathisf2"
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:HI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || TARGET_SSE_MATH"
+(define_expand "floathi<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
-  if (TARGET_SSE_MATH)
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
     {
-      emit_insn (gen_floatsisf2 (operands[0],
-                                convert_to_mode (SImode, operands[1], 0)));
+      emit_insn
+       (gen_floatsi<mode>2 (operands[0],
+                            convert_to_mode (SImode, operands[1], 0)));
       DONE;
     }
 })
 
-(define_insn "*floathisf2_i387"
-  [(set (match_operand:SF 0 "register_operand" "=f,f")
-       (float:SF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
+(define_insn "*floathi<mode>2_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+       (float:MODEF
+         (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
   "@
    fild%z1\t%1
    #"
   [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "SF")
+   (set_attr "mode" "<MODE>")
    (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "floatsisf2"
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "")
+(define_expand "floatsi<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "
+   /* When we use vector converts, we can't have input in memory.  */
+   if (GET_MODE (operands[0]) == DFmode
+       && TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH
+       && SSE_FLOAT_MODE_P (DFmode))
+     operands[1] = force_reg (SImode, operands[1]);
+   else if (GET_MODE (operands[0]) == SFmode
+            && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
+            && SSE_FLOAT_MODE_P (SFmode))
+     {
+       /* When !flag_trapping_math, we handle SImode->SFmode vector
+         conversions same way as SImode->DFmode.
+
+         For flat_trapping_math we can't safely use vector conversion without
+         clearing upper half, otherwise precision exception might occur.
+         However we can still generate the common sequence converting value
+         from general register to XMM register as:
+
+           mov         reg32, mem32
+           movd        mem32, xmm
+           cvtdq2pd xmm,xmm
+
+         because we know that movd clears the upper half.
+
+         Sadly in this case we can't rely on reload moving the value to XMM
+         register, since we need to know if upper half is OK, so we need
+         to do reloading by hand.  We force operand to memory unless target
+         supports inter unit moves.  */
+       if (!flag_trapping_math)
+         operands[1] = force_reg (SImode, operands[1]);
+       else if (!MEM_P (operands[1]))
+        {
+          rtx tmp = assign_386_stack_local (SImode, SLOT_VIRTUAL);
+          emit_move_insn (tmp, operands[1]);
+          operands[1] = tmp;
+        }
+     }
+   /* Offload operand of cvtsi2ss and cvtsi2sd into memory for
+      !TARGET_INTER_UNIT_CONVERSIONS
+      It is neccesary for the patterns to not accept nonemmory operands
+      as we would optimize out later.  */
+   else if (!TARGET_INTER_UNIT_CONVERSIONS
+           && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
+           && !optimize_size
+           && !MEM_P (operands[1]))
+     {
+       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+       emit_move_insn (tmp, operands[1]);
+       operands[1] = tmp;
+     }
+  ")
+
+(define_insn "*floatsisf2_mixed_vector"
+  [(set (match_operand:SF 0 "register_operand" "=x,f,?f")
+       (float:SF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
+  "TARGET_MIX_SSE_I387 && !flag_trapping_math 
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
+  "@
+   cvtdq2ps\t{%1, %0|%0, %1}
+   fild%z1\t%1
+   #"
+  [(set_attr "type" "sseicvt,fmov,multi")
+   (set_attr "mode" "SF")
+   (set_attr "unit" "*,i387,*")
+   (set_attr "athlon_decode" "double,*,*")
+   (set_attr "amdfam10_decode" "double,*,*")
+   (set_attr "fp_int_src" "false,true,true")])
 
 (define_insn "*floatsisf2_mixed"
   [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))]
-  "TARGET_MIX_SSE_I387"
+       (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))]
+  "TARGET_MIX_SSE_I387
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
   "@
    fild%z1\t%1
    #
    (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsisf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))]
-  "TARGET_SSE_MATH"
-  "cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "amdfam10_decode" "vector,double")
-   (set_attr "fp_int_src" "true")])
-
-(define_insn "*floatsisf2_i387"
-  [(set (match_operand:SF 0 "register_operand" "=f,f")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387"
+(define_insn "*floatsisf2_mixed_memory"
+  [(set (match_operand:SF 0 "register_operand" "=f,x")
+       (float:SF (match_operand:SI 1 "memory_operand" "m,m")))]
+  "TARGET_MIX_SSE_I387
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
   "@
    fild%z1\t%1
-   #"
-  [(set_attr "type" "fmov,multi")
+   cvtsi2ss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
    (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387")
+   (set_attr "athlon_decode" "*,double")
+   (set_attr "amdfam10_decode" "*,double")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "floatdisf2"
+(define_insn "*floatsisf2_sse_vector_nointernunit"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (float:SF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_SSE_MATH && flag_trapping_math
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
+   && !TARGET_INTER_UNIT_MOVES"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_insn "*floatsisf2_sse_vector_internunit"
+  [(set (match_operand:SF 0 "register_operand" "=x,x")
+       (float:SF (match_operand:SI 1 "nonimmediate_operand" "rm,x")))]
+  "TARGET_SSE_MATH && flag_trapping_math
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
+   && TARGET_INTER_UNIT_MOVES"
+  "#"
+  [(set_attr "type" "multi")])
+
+(define_split 
   [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)"
-  "")
+       (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
+  "flag_trapping_math
+   && TARGET_USE_VECTOR_CONVERTS && reload_completed
+   && (TARGET_INTER_UNIT_MOVES || MEM_P (operands[1]))
+   && !SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])"
+  [(set (match_dup 0)
+       (float:V4SF (match_dup 2)))]
+{
+  operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0);
+  operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+  emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1]));
+})
 
-(define_insn "*floatdisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))]
-  "TARGET_64BIT && TARGET_MIX_SSE_I387"
-  "@
-   fild%z1\t%1
-   #
-   cvtsi2ss{q}\t{%1, %0|%0, %1}
-   cvtsi2ss{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+(define_split 
+  [(set (match_operand:SF 0 "register_operand" "")
+       (float:SF (match_operand:SI 1 "register_operand" "")))]
+  "flag_trapping_math
+   && TARGET_USE_VECTOR_CONVERTS && reload_completed
+   && SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])"
+  [(set (match_dup 2) (vec_duplicate:V4SI (match_dup 1)))
+   (set (match_dup 0)
+       (float:V4SF (match_dup 2)))]
+{
+  operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0);
+  operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
+})
+
+(define_insn "*floatsisf2_sse_vector"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (float:SF (match_operand:SI 1 "register_operand" "x")))]
+  "TARGET_SSE_MATH && !flag_trapping_math
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
+   && !TARGET_INTER_UNIT_MOVES"
+  "cvtdq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
    (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387,*,*")
-   (set_attr "athlon_decode" "*,*,vector,double")
-   (set_attr "amdfam10_decode" "*,*,vector,double")
+   (set_attr "athlon_decode" "double")
+   (set_attr "amdfam10_decode" "double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdisf2_sse"
+(define_insn "*floatsisf2_sse"
   [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))]
-  "TARGET_64BIT && TARGET_SSE_MATH"
-  "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+       (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_SSE_MATH
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
+  "cvtsi2ss\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "SF")
    (set_attr "athlon_decode" "vector,double")
    (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdisf2_i387"
-  [(set (match_operand:SF 0 "register_operand" "=f,f")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387"
+(define_insn "*floatsisf2_sse_memory"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (float:SF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_SSE_MATH
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "cvtsi2ss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "double")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsidf2_mixed_vector"
+  [(set (match_operand:DF 0 "register_operand" "=x,f,f")
+       (float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
   "@
+   cvtdq2pd\t{%1, %0|%0, %1}
    fild%z1\t%1
    #"
-  [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387")
+  [(set_attr "type" "sseicvt,fmov,multi")
+   (set_attr "mode" "V2DF,DF,DF")
+   (set_attr "unit" "*,*,i387")
+   (set_attr "athlon_decode" "double,*,*")
+   (set_attr "amdfam10_decode" "double,*,*")
+   (set_attr "fp_int_src" "false,true,true")])
+
+(define_insn "*floatsidf2_mixed"
+  [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x")
+       (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
+  "@
+   fild%z1\t%1
+   #
+   cvtsi2sd\t{%1, %0|%0, %1}
+   cvtsi2sd\t{%1, %0|%0, %1}
+   cvtdq2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt")
+   (set_attr "mode" "DF,DF,DF,DF,V2DF")
+   (set_attr "unit" "*,i387,*,*,*")
+   (set_attr "athlon_decode" "*,*,double,direct,double")
+   (set_attr "amdfam10_decode" "*,*,vector,double,double")
+   (set_attr "fp_int_src" "true,true,true,true,false")])
+
+(define_insn "*floatsidf2_mixed_memory"
+  [(set (match_operand:DF 0 "register_operand" "=f,x")
+       (float:DF (match_operand:SI 1 "memory_operand" "m,m")))]
+  "TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "@
+   fild%z1\t%1
+   cvtsi2sd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "*,direct")
+   (set_attr "amdfam10_decode" "*,double")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "floathidf2"
+(define_insn "*floatsidf2_sse_vector"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (float:DF (match_operand:SI 1 "register_operand" "x")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
+  "cvtdq2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "V2DF")
+   (set_attr "athlon_decode" "double")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
+(define_split 
   [(set (match_operand:DF 0 "register_operand" "")
-       (float:DF (match_operand:HI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-{
-  if (TARGET_SSE2 && TARGET_SSE_MATH)
-    {
-      emit_insn (gen_floatsidf2 (operands[0],
-                                convert_to_mode (SImode, operands[1], 0)));
-      DONE;
-    }
+       (float:DF (match_operand:SI 1 "memory_operand" "")))]
+  "TARGET_USE_VECTOR_CONVERTS && reload_completed
+   && SSE_REG_P (operands[0])"
+  [(set (match_dup 0)
+       (float:V2DF
+         (vec_select:V2SI
+           (match_dup 2)
+           (parallel [(const_int 0) (const_int 1)]))))]
+{
+  operands[2] = simplify_gen_subreg (V4SImode, operands[0], DFmode, 0);
+  operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
+  emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1]));
 })
 
-(define_insn "*floathidf2_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (float:DF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387 && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)"
+(define_insn "*floatsidf2_sse"
+  [(set (match_operand:DF 0 "register_operand" "=x,x,!x")
+       (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
+  "@
+   cvtsi2sd\t{%1, %0|%0, %1}
+   cvtsi2sd\t{%1, %0|%0, %1}
+   cvtdq2pd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF,DF,V2DF")
+   (set_attr "athlon_decode" "double,direct,double")
+   (set_attr "amdfam10_decode" "vector,double,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsidf2_memory"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (float:DF (match_operand:SI 1 "memory_operand" "x")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
+       || optimize_size)"
+  "cvtsi2sd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatsi<mode>2_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+       (float:MODEF
+         (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
+  "TARGET_80387
+   && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
   "@
    fild%z1\t%1
    #"
   [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "DF")
+   (set_attr "mode" "<MODE>")
    (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "floatsidf2"
-  [(set (match_operand:DF 0 "register_operand" "")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-  "")
+(define_expand "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+       (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)"
+{
+  if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
+      && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode)
+      && !optimize_size
+      && !MEM_P (operands[1]))
+    {
+       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+       emit_move_insn (tmp, operands[1]);
+       operands[1] = tmp;
+    }
+})
 
-(define_insn "*floatsidf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,mr")))]
-  "TARGET_SSE2 && TARGET_MIX_SSE_I387"
+(define_insn "*floatdisf2_mixed"
+  [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
+       (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
+  "TARGET_64BIT && TARGET_MIX_SSE_I387
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "@
    fild%z1\t%1
    #
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtsi2sd\t{%1, %0|%0, %1}"
+   cvtsi2ss{q}\t{%1, %0|%0, %1}
+   cvtsi2ss{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "mode" "SF")
    (set_attr "unit" "*,i387,*,*")
-   (set_attr "athlon_decode" "*,*,double,direct")
+   (set_attr "athlon_decode" "*,*,vector,double")
    (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsidf2_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x,x")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,mr")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "cvtsi2sd\t{%1, %0|%0, %1}"
+(define_insn "*floatdisf2_mixed"
+  [(set (match_operand:SF 0 "register_operand" "=f,x")
+       (float:SF (match_operand:DI 1 "memory_operand" "m,m")))]
+  "TARGET_64BIT && TARGET_MIX_SSE_I387
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "@
+   fild%z1\t%1
+   cvtsi2ss{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "*,double")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "fp_int_src" "true")])
+
+(define_insn "*floatdisf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x,x")
+       (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_64BIT && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
+  "cvtsi2ss{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,direct")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "vector,double")
    (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsidf2_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387"
-  "@
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "DF")
-   (set_attr "unit" "*,i387")
+(define_insn "*floatdisf2_memory"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (float:SF (match_operand:DI 1 "memory_operand" "m")))]
+  "TARGET_64BIT && TARGET_SSE_MATH
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "double")
+   (set_attr "amdfam10_decode" "double")
    (set_attr "fp_int_src" "true")])
 
 (define_expand "floatdidf2"
       ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
       DONE;
     }
+  if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
+      && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode)
+      && !optimize_size
+      && !MEM_P (operands[1]))
+    {
+       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
+       emit_move_insn (tmp, operands[1]);
+       operands[1] = tmp;
+    }
 })
 
 (define_insn "*floatdidf2_mixed"
   [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,mr")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387"
+       (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "@
    fild%z1\t%1
    #
    (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
+(define_insn "*floatdidf2_mixed_memory"
+  [(set (match_operand:DF 0 "register_operand" "=f,x")
+       (float:DF (match_operand:DI 1 "memory_operand" "m,m")))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "@
+   fild%z1\t%1
+   cvtsi2sd{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "fmov,sseicvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "*,direct")
+   (set_attr "amdfam10_decode" "*,double")
+   (set_attr "fp_int_src" "true")])
+
 (define_insn "*floatdidf2_sse"
   [(set (match_operand:DF 0 "register_operand" "=x,x")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,mr")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
+       (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
   "cvtsi2sd{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
    (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdidf2_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387"
-  "@
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "fmov,multi")
+(define_insn "*floatdidf2_sse_memory"
+  [(set (match_operand:DF 0 "register_operand" "=x")
+       (float:DF (match_operand:DI 1 "memory_operand" "m")))]
+  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+  "cvtsi2sd{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
-   (set_attr "unit" "*,i387")
-   (set_attr "fp_int_src" "true")])
-
-(define_insn "floathixf2"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-       (float:XF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387"
-  "@
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "XF")
-   (set_attr "unit" "*,i387")
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "floatsixf2"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-       (float:XF (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387"
+(define_insn "*floatdi<mode>2_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+       (float:MODEF
+         (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
+  "TARGET_80387
+   && (!TARGET_SSE_MATH || !TARGET_64BIT
+       || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
   "@
    fild%z1\t%1
    #"
   [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "XF")
+   (set_attr "mode" "<MODE>")
    (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "floatdixf2"
+(define_insn "float<mode>xf2"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
-       (float:XF (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
+       (float:XF (match_operand:X87MODEI 1 "nonimmediate_operand" "m,?r")))]
   "TARGET_80387"
   "@
    fild%z1\t%1
   [(set (match_operand 0 "fp_register_operand" "")
        (float (match_operand 1 "register_operand" "")))]
   "reload_completed
-   && TARGET_80387
-   && FLOAT_MODE_P (GET_MODE (operands[0]))"
+   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
   [(const_int 0)]
 {
   operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
   DONE;
 })
 \f
-;; SSE extract/set expanders
-
-\f
 ;; Add instructions
 
 ;; %%% splits for addditi3
 (define_insn "*addti3_1"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
        (plus:TI (match_operand:TI 1 "nonimmediate_operand" "%0,0")
-                (match_operand:TI 2 "general_operand" "roiF,riF")))
+                (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, TImode, operands)"
   "#")
 (define_split
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
        (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
-                (match_operand:TI 2 "general_operand" "")))
+                (match_operand:TI 2 "x86_64_general_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG) (unspec:CC [(match_dup 1) (match_dup 2)]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, DImode, operands)"
   "add{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI")])
+
+(define_insn "*<addsub><mode>3_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+           (plusminus:SWI
+               (match_operand:SWI 1 "nonimmediate_operand" "<comm>0,0")
+               (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m"))
+           (match_dup 1)))
+   (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+       (plusminus:SWI (match_dup 1) (match_dup 2)))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<addsub>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*add<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+               (plus:SWI (match_operand:SWI 1 "nonimmediate_operand" "%0")
+                         (match_operand:SWI 2 "<general_operand>" "<r><i>m"))
+               (match_dup 1)))
+   (clobber (match_scratch:SWI 0 "=<r>"))]
+  "ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
+  "add{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*sub<mode>3_cconly_overflow"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+            (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m,<r>")
+                       (match_operand:SWI 1 "<general_operand>" "<r><i>,<r>m"))
+            (match_dup 0)))]
+  ""
+  "cmp{<imodesuffix>}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*<addsub>si3_zext_cc_overflow"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+           (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "<comm>0")
+                         (match_operand:SI 2 "general_operand" "g"))
+           (match_dup 1)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+       (zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))]
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+  "<addsub>{l}\t{%2, %k0|%k0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "SI")])
 
 (define_insn "addqi3_carry"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
          (zero_extend:DI
            (plus:SI (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
                              (match_operand:SI 1 "nonimmediate_operand" "%0"))
-                    (match_operand:SI 2 "general_operand" "rim"))))
+                    (match_operand:SI 2 "general_operand" "g"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
   "adc{l}\t{%2, %k0|%k0, %2}"
   "TARGET_80387"
   "")
 
-(define_expand "adddf3"
-  [(set (match_operand:DF 0 "register_operand" "")
-       (plus:DF (match_operand:DF 1 "register_operand" "")
-                (match_operand:DF 2 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-  "")
-
-(define_expand "addsf3"
-  [(set (match_operand:SF 0 "register_operand" "")
-       (plus:SF (match_operand:SF 1 "register_operand" "")
-                (match_operand:SF 2 "nonimmediate_operand" "")))]
-  "TARGET_80387 || TARGET_SSE_MATH"
+(define_expand "add<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (plus:MODEF (match_operand:MODEF 1 "register_operand" "")
+                   (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "")
 \f
 ;; Subtract instructions
 (define_insn "*subti3_1"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o")
        (minus:TI (match_operand:TI 1 "nonimmediate_operand" "0,0")
-                 (match_operand:TI 2 "general_operand" "roiF,riF")))
+                 (match_operand:TI 2 "x86_64_general_operand" "roe,re")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (MINUS, TImode, operands)"
   "#")
 (define_split
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
        (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
-                 (match_operand:TI 2 "general_operand" "")))
+                 (match_operand:TI 2 "x86_64_general_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && reload_completed"
   [(parallel [(set (reg:CC FLAGS_REG) (compare:CC (match_dup 1) (match_dup 2)))
    (set_attr "mode" "SI")])
 
 (define_insn "subsi3_carry_zext"
-  [(set (match_operand:DI 0 "register_operand" "=rm,r")
+  [(set (match_operand:DI 0 "register_operand" "=r")
          (zero_extend:DI
-           (minus:SI (match_operand:SI 1 "register_operand" "0,0")
+           (minus:SI (match_operand:SI 1 "register_operand" "0")
              (plus:SI (match_operand:SI 3 "ix86_carry_flag_operator" "")
-                (match_operand:SI 2 "general_operand" "ri,rm")))))
+                (match_operand:SI 2 "general_operand" "g")))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
   "sbb{l}\t{%2, %k0|%k0, %2}"
   [(set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI
          (minus:SI (match_operand:SI 1 "register_operand" "0")
-                   (match_operand:SI 2 "general_operand" "rim"))))
+                   (match_operand:SI 2 "general_operand" "g"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (MINUS, SImode, operands)"
   "sub{l}\t{%2, %k0|%k0, %2}"
   [(set (reg FLAGS_REG)
        (compare
          (minus:SI (match_operand:SI 1 "register_operand" "0")
-                   (match_operand:SI 2 "general_operand" "rim"))
+                   (match_operand:SI 2 "general_operand" "g"))
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI
 (define_insn "*subsi_3_zext"
   [(set (reg FLAGS_REG)
        (compare (match_operand:SI 1 "register_operand" "0")
-                (match_operand:SI 2 "general_operand" "rim")))
+                (match_operand:SI 2 "general_operand" "g")))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI
          (minus:SI (match_dup 1)
   "TARGET_80387"
   "")
 
-(define_expand "subdf3"
-  [(set (match_operand:DF 0 "register_operand" "")
-       (minus:DF (match_operand:DF 1 "register_operand" "")
-                 (match_operand:DF 2 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-  "")
-
-(define_expand "subsf3"
-  [(set (match_operand:SF 0 "register_operand" "")
-       (minus:SF (match_operand:SF 1 "register_operand" "")
-                 (match_operand:SF 2 "nonimmediate_operand" "")))]
-  "TARGET_80387 || TARGET_SSE_MATH"
+(define_expand "sub<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (minus:MODEF (match_operand:MODEF 1 "register_operand" "")
+                    (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "")
 \f
 ;; Multiply instructions
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "direct")))
-   (set_attr "amdfam10_decode" "direct")        
+   (set_attr "amdfam10_decode" "direct")
    (set_attr "mode" "QI")])
 
 (define_expand "mulqihi3"
    (set_attr "mode" "SI")])
 
 (define_expand "smuldi3_highpart"
-  [(parallel [(set (match_operand:DI 0 "register_operand" "=d")
+  [(parallel [(set (match_operand:DI 0 "register_operand" "")
                   (truncate:DI
                     (lshiftrt:TI
                       (mult:TI (sign_extend:TI
   "TARGET_80387"
   "")
 
-(define_expand "muldf3"
-  [(set (match_operand:DF 0 "register_operand" "")
-       (mult:DF (match_operand:DF 1 "register_operand" "")
-                (match_operand:DF 2 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+(define_expand "mul<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (mult:MODEF (match_operand:MODEF 1 "register_operand" "")
+                   (match_operand:MODEF 2 "nonimmediate_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "")
 
-(define_expand "mulsf3"
-  [(set (match_operand:SF 0 "register_operand" "")
-       (mult:SF (match_operand:SF 1 "register_operand" "")
-                (match_operand:SF 2 "nonimmediate_operand" "")))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "")
+;; SSE5 scalar multiply/add instructions are defined in sse.md.
+
 \f
 ;; Divide instructions
 
        (div:SF (match_operand:SF 1 "register_operand" "")
                (match_operand:SF 2 "nonimmediate_operand" "")))]
   "TARGET_80387 || TARGET_SSE_MATH"
-  "")
+{
+  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swdivsf (operands[0], operands[1],
+                        operands[2], SFmode);
+      DONE;
+    }
+})
 \f
 ;; Remainder instructions.
 
   [(set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI
          (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                 (match_operand:SI 2 "general_operand" "rim"))))
+                 (match_operand:SI 2 "general_operand" "g"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (AND, SImode, operands)"
   "and{l}\t{%2, %k0|%k0, %2}"
 (define_insn "*andsi_2"
   [(set (reg FLAGS_REG)
        (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:SI 2 "general_operand" "rim,ri"))
+                        (match_operand:SI 2 "general_operand" "g,ri"))
                 (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
        (and:SI (match_dup 1) (match_dup 2)))]
 (define_insn "*andsi_2_zext"
   [(set (reg FLAGS_REG)
        (compare (and:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                        (match_operand:SI 2 "general_operand" "rim"))
+                        (match_operand:SI 2 "general_operand" "g"))
                 (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
 (define_insn "*andhi_2"
   [(set (reg FLAGS_REG)
        (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:HI 2 "general_operand" "rim,ri"))
+                        (match_operand:HI 2 "general_operand" "g,ri"))
                 (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
        (and:HI (match_dup 1) (match_dup 2)))]
 (define_insn "*iorsi_1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,r")
        (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
-               (match_operand:SI 2 "general_operand" "ri,rmi")))
+               (match_operand:SI 2 "general_operand" "ri,g")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (IOR, SImode, operands)"
   "or{l}\t{%2, %0|%0, %2}"
 
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*iorsi_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=rm")
+  [(set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI
          (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                 (match_operand:SI 2 "general_operand" "rim"))))
+                 (match_operand:SI 2 "general_operand" "g"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (IOR, SImode, operands)"
   "or{l}\t{%2, %k0|%k0, %2}"
    (set_attr "mode" "SI")])
 
 (define_insn "*iorsi_1_zext_imm"
-  [(set (match_operand:DI 0 "register_operand" "=rm")
+  [(set (match_operand:DI 0 "register_operand" "=r")
        (ior:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%0"))
                (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
    (clobber (reg:CC FLAGS_REG))]
 (define_insn "*iorsi_2"
   [(set (reg FLAGS_REG)
        (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:SI 2 "general_operand" "rim,ri"))
+                        (match_operand:SI 2 "general_operand" "g,ri"))
                 (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
        (ior:SI (match_dup 1) (match_dup 2)))]
 (define_insn "*iorsi_2_zext"
   [(set (reg FLAGS_REG)
        (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                        (match_operand:SI 2 "general_operand" "rim"))
+                        (match_operand:SI 2 "general_operand" "g"))
                 (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ior:SI (match_dup 1) (match_dup 2))))]
 (define_insn "*iorsi_3"
   [(set (reg FLAGS_REG)
        (compare (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                        (match_operand:SI 2 "general_operand" "rim"))
+                        (match_operand:SI 2 "general_operand" "g"))
                 (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
   "ix86_match_ccmode (insn, CCNOmode)
 (define_insn "*iorhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
        (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-               (match_operand:HI 2 "general_operand" "rmi,ri")))
+               (match_operand:HI 2 "general_operand" "g,ri")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (IOR, HImode, operands)"
   "or{w}\t{%2, %0|%0, %2}"
 (define_insn "*iorhi_2"
   [(set (reg FLAGS_REG)
        (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:HI 2 "general_operand" "rim,ri"))
+                        (match_operand:HI 2 "general_operand" "g,ri"))
                 (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
        (ior:HI (match_dup 1) (match_dup 2)))]
 (define_insn "*iorhi_3"
   [(set (reg FLAGS_REG)
        (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
-                        (match_operand:HI 2 "general_operand" "rim"))
+                        (match_operand:HI 2 "general_operand" "g"))
                 (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCNOmode)
   [(set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI
          (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                 (match_operand:SI 2 "general_operand" "rim"))))
+                 (match_operand:SI 2 "general_operand" "g"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (XOR, SImode, operands)"
   "xor{l}\t{%2, %k0|%k0, %2}"
 (define_insn "*xorsi_2"
   [(set (reg FLAGS_REG)
        (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:SI 2 "general_operand" "rim,ri"))
+                        (match_operand:SI 2 "general_operand" "g,ri"))
                 (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
        (xor:SI (match_dup 1) (match_dup 2)))]
 (define_insn "*xorsi_2_zext"
   [(set (reg FLAGS_REG)
        (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                        (match_operand:SI 2 "general_operand" "rim"))
+                        (match_operand:SI 2 "general_operand" "g"))
                 (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (xor:SI (match_dup 1) (match_dup 2))))]
 (define_insn "*xorsi_3"
   [(set (reg FLAGS_REG)
        (compare (xor:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                        (match_operand:SI 2 "general_operand" "rim"))
+                        (match_operand:SI 2 "general_operand" "g"))
                 (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
   "ix86_match_ccmode (insn, CCNOmode)
 (define_insn "*xorhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
        (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-               (match_operand:HI 2 "general_operand" "rmi,ri")))
+               (match_operand:HI 2 "general_operand" "g,ri")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (XOR, HImode, operands)"
   "xor{w}\t{%2, %0|%0, %2}"
 (define_insn "*xorhi_2"
   [(set (reg FLAGS_REG)
        (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:HI 2 "general_operand" "rim,ri"))
+                        (match_operand:HI 2 "general_operand" "g,ri"))
                 (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
        (xor:HI (match_dup 1) (match_dup 2)))]
 (define_insn "*xorhi_3"
   [(set (reg FLAGS_REG)
        (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
-                        (match_operand:HI 2 "general_operand" "rim"))
+                        (match_operand:HI 2 "general_operand" "g"))
                 (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCNOmode)
 
 (define_insn "*negti2_1"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=ro")
-       (neg:TI (match_operand:TI 1 "general_operand" "0")))
+       (neg:TI (match_operand:TI 1 "nonimmediate_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
    && ix86_unary_operator_ok (NEG, TImode, operands)"
 
 (define_split
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
-       (neg:TI (match_operand:TI 1 "general_operand" "")))
+       (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && reload_completed"
   [(parallel
    && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
   "#")
 
-(define_expand "copysignsf3"
-  [(match_operand:SF 0 "register_operand" "")
-   (match_operand:SF 1 "nonmemory_operand" "")
-   (match_operand:SF 2 "register_operand" "")]
-  "TARGET_SSE_MATH"
-{
-  ix86_expand_copysign (operands);
-  DONE;
-})
-
-(define_insn_and_split "copysignsf3_const"
-  [(set (match_operand:SF 0 "register_operand"          "=x")
-       (unspec:SF
-         [(match_operand:V4SF 1 "vector_move_operand"  "xmC")
-          (match_operand:SF 2 "register_operand"       "0")
-          (match_operand:V4SF 3 "nonimmediate_operand" "xm")]
-         UNSPEC_COPYSIGN))]
-  "TARGET_SSE_MATH"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_copysign_const (operands);
-  DONE;
-})
-
-(define_insn "copysignsf3_var"
-  [(set (match_operand:SF 0 "register_operand"          "=x, x, x, x,x")
-       (unspec:SF
-         [(match_operand:SF 2 "register_operand"       " x, 0, 0, x,x")
-          (match_operand:SF 3 "register_operand"       " 1, 1, x, 1,x")
-          (match_operand:V4SF 4 "nonimmediate_operand" " X,xm,xm, 0,0")
-          (match_operand:V4SF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")]
-         UNSPEC_COPYSIGN))
-   (clobber (match_scratch:V4SF 1                      "=x, x, x, x,x"))]
-  "TARGET_SSE_MATH"
-  "#")
-
-(define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (unspec:SF
-         [(match_operand:SF 2 "register_operand" "")
-          (match_operand:SF 3 "register_operand" "")
-          (match_operand:V4SF 4 "" "")
-          (match_operand:V4SF 5 "" "")]
-         UNSPEC_COPYSIGN))
-   (clobber (match_scratch:V4SF 1 ""))]
-  "TARGET_SSE_MATH && reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_copysign_var (operands);
-  DONE;
-})
-
 (define_expand "negdf2"
   [(set (match_operand:DF 0 "nonimmediate_operand" "")
        (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
    && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
   "#")
 
-(define_expand "copysigndf3"
-  [(match_operand:DF 0 "register_operand" "")
-   (match_operand:DF 1 "nonmemory_operand" "")
-   (match_operand:DF 2 "register_operand" "")]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-{
-  ix86_expand_copysign (operands);
-  DONE;
-})
-
-(define_insn_and_split "copysigndf3_const"
-  [(set (match_operand:DF 0 "register_operand"          "=x")
-       (unspec:DF
-         [(match_operand:V2DF 1 "vector_move_operand"  "xmC")
-          (match_operand:DF 2 "register_operand"       "0")
-          (match_operand:V2DF 3 "nonimmediate_operand" "xm")]
-         UNSPEC_COPYSIGN))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_copysign_const (operands);
-  DONE;
-})
-
-(define_insn "copysigndf3_var"
-  [(set (match_operand:DF 0 "register_operand"          "=x, x, x, x,x")
-       (unspec:DF
-         [(match_operand:DF 2 "register_operand"       " x, 0, 0, x,x")
-          (match_operand:DF 3 "register_operand"       " 1, 1, x, 1,x")
-          (match_operand:V2DF 4 "nonimmediate_operand" " X,xm,xm, 0,0")
-          (match_operand:V2DF 5 "nonimmediate_operand" " 0,xm, 1,xm,1")]
-         UNSPEC_COPYSIGN))
-   (clobber (match_scratch:V2DF 1                      "=x, x, x, x,x"))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "#")
-
-(define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (unspec:DF
-         [(match_operand:DF 2 "register_operand" "")
-          (match_operand:DF 3 "register_operand" "")
-          (match_operand:V2DF 4 "" "")
-          (match_operand:V2DF 5 "" "")]
-         UNSPEC_COPYSIGN))
-   (clobber (match_scratch:V2DF 1 ""))]
-  "TARGET_SSE2 && TARGET_SSE_MATH && reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_copysign_var (operands);
-  DONE;
-})
-
 (define_expand "negxf2"
   [(set (match_operand:XF 0 "nonimmediate_operand" "")
        (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
    && ix86_unary_operator_ok (GET_CODE (operands[3]), XFmode, operands)"
   "#")
 
+(define_expand "negtf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+       (neg:TF (match_operand:TF 1 "nonimmediate_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_fp_absneg_operator (NEG, TFmode, operands); DONE;")
+
+(define_expand "abstf2"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "")
+       (abs:TF (match_operand:TF 1 "nonimmediate_operand" "")))]
+  "TARGET_64BIT"
+  "ix86_expand_fp_absneg_operator (ABS, TFmode, operands); DONE;")
+
+(define_insn "*absnegtf2_sse"
+  [(set (match_operand:TF 0 "nonimmediate_operand"    "=x,x,m")
+       (match_operator:TF 3 "absneg_operator"
+         [(match_operand:TF 1 "nonimmediate_operand" "0, x,0")]))
+   (use (match_operand:TF 2 "nonimmediate_operand"    "xm,0,X"))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && ix86_unary_operator_ok (GET_CODE (operands[3]), TFmode, operands)"
+  "#")
+
 ;; Splitters for fp abs and neg.
 
 (define_split
   "fabs"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "XF")])
+
+;; Copysign instructions
+
+(define_mode_iterator CSGNMODE [SF DF TF])
+(define_mode_attr CSGNVMODE [(SF "V4SF") (DF "V2DF") (TF "TF")])
+
+(define_expand "copysign<mode>3"
+  [(match_operand:CSGNMODE 0 "register_operand" "")
+   (match_operand:CSGNMODE 1 "nonmemory_operand" "")
+   (match_operand:CSGNMODE 2 "register_operand" "")]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_64BIT && (<MODE>mode == TFmode))"
+{
+  ix86_expand_copysign (operands);
+  DONE;
+})
+
+(define_insn_and_split "copysign<mode>3_const"
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x")
+       (unspec:CSGNMODE
+         [(match_operand:<CSGNVMODE> 1 "vector_move_operand" "xmC")
+          (match_operand:CSGNMODE 2 "register_operand" "0")
+          (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
+         UNSPEC_COPYSIGN))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_64BIT && (<MODE>mode == TFmode))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_copysign_const (operands);
+  DONE;
+})
+
+(define_insn "copysign<mode>3_var"
+  [(set (match_operand:CSGNMODE 0 "register_operand" "=x,x,x,x,x")
+       (unspec:CSGNMODE
+         [(match_operand:CSGNMODE 2 "register_operand" "x,0,0,x,x")
+          (match_operand:CSGNMODE 3 "register_operand" "1,1,x,1,x")
+          (match_operand:<CSGNVMODE> 4 "nonimmediate_operand" "X,xm,xm,0,0")
+          (match_operand:<CSGNVMODE> 5 "nonimmediate_operand" "0,xm,1,xm,1")]
+         UNSPEC_COPYSIGN))
+   (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
+  "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+   || (TARGET_64BIT && (<MODE>mode == TFmode))"
+  "#")
+
+(define_split
+  [(set (match_operand:CSGNMODE 0 "register_operand" "")
+       (unspec:CSGNMODE
+         [(match_operand:CSGNMODE 2 "register_operand" "")
+          (match_operand:CSGNMODE 3 "register_operand" "")
+          (match_operand:<CSGNVMODE> 4 "" "")
+          (match_operand:<CSGNVMODE> 5 "" "")]
+         UNSPEC_COPYSIGN))
+   (clobber (match_scratch:<CSGNVMODE> 1 ""))]
+  "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    || (TARGET_64BIT && (<MODE>mode == TFmode)))
+   && reload_completed"
+  [(const_int 0)]
+{
+  ix86_split_copysign_var (operands);
+  DONE;
+})
 \f
 ;; One complement instructions
 
   "#"
   [(set_attr "type" "multi")])
 
+;; This pattern must be defined before *ashlti3_2 to prevent
+;; combine pass from converting sse2_ashlti3 to *ashlti3_2.
+
+(define_insn "sse2_ashlti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+       (ashift:TI (match_operand:TI 1 "register_operand" "0")
+                  (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "pslldq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
 (define_insn "*ashlti3_2"
   [(set (match_operand:TI 0 "register_operand" "=r")
        (ashift:TI (match_operand:TI 1 "register_operand" "0")
          (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (ashift:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, DImode, operands)
+  "TARGET_64BIT
    && (optimize_size
        || !TARGET_PARTIAL_FLAG_REG_STALL
        || (operands[2] == const1_rtx
           && (TARGET_SHIFT1
-              || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+              || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
 {
   switch (get_attr_type (insn))
     {
                     (match_operand:QI 2 "immediate_operand" "e"))
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, DImode, operands)
+  "TARGET_64BIT
    && (optimize_size
        || !TARGET_PARTIAL_FLAG_REG_STALL
        || (operands[2] == const1_rtx
           && (TARGET_SHIFT1
-              || TARGET_DOUBLE_WITH_ADD)))"
+              || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, DImode, operands)"
 {
   switch (get_attr_type (insn))
     {
                   (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-                    ? flow2_completed : reload_completed)"
+                    ? epilogue_completed : reload_completed)"
   [(const_int 0)]
   "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;")
 
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashift:SI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-          && (TARGET_SHIFT1
-              || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+   "(optimize_size
+     || !TARGET_PARTIAL_FLAG_REG_STALL
+     || (operands[2] == const1_rtx
+        && (TARGET_SHIFT1
+            || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
                     (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-          && (TARGET_SHIFT1
-              || TARGET_DOUBLE_WITH_ADD)))"
+  "(optimize_size
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+       && (TARGET_SHIFT1
+           || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, SImode, operands)
+  "TARGET_64BIT
    && (optimize_size
        || !TARGET_PARTIAL_FLAG_REG_STALL
        || (operands[2] == const1_rtx
           && (TARGET_SHIFT1
-              || TARGET_DOUBLE_WITH_ADD)))"
+              || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashift:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-          && (TARGET_SHIFT1
-              || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+  "(optimize_size
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+       && (TARGET_SHIFT1
+           || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
 {
   switch (get_attr_type (insn))
     {
                     (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-          && (TARGET_SHIFT1
-              || TARGET_DOUBLE_WITH_ADD)))"
+  "(optimize_size
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+       && (TARGET_SHIFT1
+           || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, HImode, operands)"
 {
   switch (get_attr_type (insn))
     {
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashift:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, QImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-          && (TARGET_SHIFT1
-              || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))"
+  "(optimize_size
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+       && (TARGET_SHIFT1
+           || (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
 {
   switch (get_attr_type (insn))
     {
                     (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFT, QImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL
-       || (operands[2] == const1_rtx
-          && (TARGET_SHIFT1
-              || TARGET_DOUBLE_WITH_ADD)))"
+  "(optimize_size
+    || !TARGET_PARTIAL_FLAG_REG_STALL
+    || (operands[2] == const1_rtx
+       && (TARGET_SHIFT1
+           || TARGET_DOUBLE_WITH_ADD)))
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFT, QImode, operands)"
 {
   switch (get_attr_type (insn))
     {
        (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t%0"
   [(set_attr "type" "ishift")
    (set (attr "length")
          (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+  "TARGET_64BIT
    && (TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t%0"
   [(set_attr "type" "ishift")
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+  "TARGET_64BIT
    && (TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "TARGET_64BIT
+   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
                       (match_operand:QI 2 "const_int_operand" "n"))
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "TARGET_64BIT
+   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
                     (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-                    ? flow2_completed : reload_completed)"
+                    ? epilogue_completed : reload_completed)"
   [(const_int 0)]
   "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;")
 
        (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%0"
   [(set_attr "type" "ishift")
    (set (attr "length")
        (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "0")
                                     (match_operand:QI 2 "const1_operand" ""))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%k0"
   [(set_attr "type" "ishift")
    (set_attr "length" "2")])
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:SI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%0"
   [(set_attr "type" "ishift")
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCmode)
+  "TARGET_64BIT
    && (TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%k0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:SI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "TARGET_64BIT
+   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
        (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t%0"
   [(set_attr "type" "ishift")
    (set (attr "length")
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t%0"
   [(set_attr "type" "ishift")
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
        (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t%0"
   [(set_attr "type" "ishift")
    (set (attr "length")
        (ashiftrt:QI (match_dup 0)
                     (match_operand:QI 1 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
-   && (! TARGET_PARTIAL_REG_STALL || optimize_size)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t%0"
   [(set_attr "type" "ishift1")
    (set (attr "length")
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashiftrt:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t%0"
   [(set_attr "type" "ishift")
                       (match_operand:QI 2 "const1_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashiftrt:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
   "#"
   [(set_attr "type" "multi")])
 
+;; This pattern must be defined before *lshrti3_2 to prevent
+;; combine pass from converting sse2_lshrti3 to *lshrti3_2.
+
+(define_insn "sse2_lshrti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+       (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
+                    (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_SSE2"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "psrldq\t{%2, %0|%0, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix_data16" "1")
+   (set_attr "mode" "TI")])
+
 (define_insn "*lshrti3_2"
   [(set (match_operand:TI 0 "register_operand" "=r")
        (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
        (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t%0"
   [(set_attr "type" "ishift")
    (set (attr "length")
          (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+  "TARGET_64BIT
    && (TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t%0"
   [(set_attr "type" "ishift")
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+  "TARGET_64BIT
    && (TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:DI (match_dup 1) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "TARGET_64BIT
+   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
                       (match_operand:QI 2 "const_int_operand" "e"))
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "TARGET_64BIT
+   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "DI")])
                     (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && ((optimize > 0 && flag_peephole2)
-                    ? flow2_completed : reload_completed)"
+                    ? epilogue_completed : reload_completed)"
   [(const_int 0)]
   "ix86_split_lshr (operands, NULL_RTX, DImode); DONE;")
 
        (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%0"
   [(set_attr "type" "ishift")
    (set (attr "length")
        (lshiftrt:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "0"))
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%k0"
   [(set_attr "type" "ishift")
    (set_attr "length" "2")])
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:SI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%0"
   [(set_attr "type" "ishift")
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
+  "TARGET_64BIT
    && (TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%k0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:SI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
                     (match_operand:QI 2 "const_1_to_31_operand" "I"))
         (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
-  "TARGET_64BIT && ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "TARGET_64BIT
+   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "SI")])
        (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t%0"
   [(set_attr "type" "ishift")
    (set (attr "length")
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t%0"
   [(set_attr "type" "ishift")
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:HI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "HI")])
        (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t%0"
   [(set_attr "type" "ishift")
    (set (attr "length")
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (lshiftrt:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t%0"
   [(set_attr "type" "ishift")
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (lshiftrt:QI (match_dup 1) (match_dup 2)))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "ix86_match_ccmode (insn, CCGOCmode)
-   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)
-   && (optimize_size
-       || !TARGET_PARTIAL_FLAG_REG_STALL)"
+  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && ix86_match_ccmode (insn, CCGOCmode)
+   && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t{%2, %0|%0, %2}"
   [(set_attr "type" "ishift")
    (set_attr "mode" "QI")])
        (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0")
                   (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATE, DImode, operands)"
   "rol{q}\t%0"
   [(set_attr "type" "rotate")
    (set (attr "length")
        (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                   (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, SImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATE, SImode, operands)"
   "rol{l}\t%0"
   [(set_attr "type" "rotate")
    (set (attr "length")
          (rotate:SI (match_operand:SI 1 "register_operand" "0")
                     (match_operand:QI 2 "const1_operand" ""))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATE, SImode, operands)"
   "rol{l}\t%k0"
   [(set_attr "type" "rotate")
    (set_attr "length" "2")])
        (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0")
                   (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, HImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATE, HImode, operands)"
   "rol{w}\t%0"
   [(set_attr "type" "rotate")
    (set (attr "length")
        (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0")
                   (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, QImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATE, QImode, operands)"
   "rol{b}\t%0"
   [(set_attr "type" "rotate")
    (set (attr "length")
        (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
   "ror{q}\t%0"
   [(set_attr "type" "rotate")
    (set (attr "length")
        (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, SImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
   "ror{l}\t%0"
   [(set_attr "type" "rotate")
    (set (attr "length")
          (rotatert:SI (match_operand:SI 1 "register_operand" "0")
                       (match_operand:QI 2 "const1_operand" ""))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "TARGET_64BIT
+   && (TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
   "ror{l}\t%k0"
   [(set_attr "type" "rotate")
    (set (attr "length")
        (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, HImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATERT, HImode, operands)"
   "ror{w}\t%0"
   [(set_attr "type" "rotate")
    (set (attr "length")
        (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, QImode, operands)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(TARGET_SHIFT1 || optimize_size)
+   && ix86_binary_operator_ok (ROTATERT, QImode, operands)"
   "ror{b}\t%0"
   [(set_attr "type" "rotate")
    (set (attr "length")
        (match_operator:SF 1 "sse_comparison_operator"
          [(match_operand:SF 2 "register_operand" "0")
           (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE"
+  "TARGET_SSE && !TARGET_SSE5"
   "cmp%D1ss\t{%3, %0|%0, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "mode" "SF")])
        (match_operator:DF 1 "sse_comparison_operator"
          [(match_operand:DF 2 "register_operand" "0")
           (match_operand:DF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && !TARGET_SSE5"
   "cmp%D1sd\t{%3, %0|%0, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "mode" "DF")])
+
+(define_insn "*sse5_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (match_operator:MODEF 1 "sse5_comparison_float_operator"
+         [(match_operand:MODEF 2 "register_operand" "x")
+          (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_SSE5"
+  "com%Y1ss\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse4arg")
+   (set_attr "mode" "<MODE>")])
+
 \f
 ;; Basic conditional jump instructions.
 ;; We ignore the overflow flag for signed branch instructions.
          (pc)))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))]
-  "TARGET_CMOVE && TARGET_80387
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_CMOVE
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
          (label_ref (match_operand 3 "" ""))))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))]
-  "TARGET_CMOVE && TARGET_80387
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
+   && TARGET_CMOVE
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))
    (clobber (match_scratch:HI 4 "=a"))]
-  "TARGET_80387
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))
    (clobber (match_scratch:HI 4 "=a"))]
-  "TARGET_80387
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && ix86_fp_jump_nontrivial_p (GET_CODE (operands[0]))"
   "#")
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))
    (clobber (match_scratch:HI 4 "=a"))]
-  "TARGET_80387
-   && FLOAT_MODE_P (GET_MODE (operands[1]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])
    && !ix86_use_fcomi_compare (GET_CODE (operands[0]))
    && SELECT_CC_MODE (GET_CODE (operands[0]),
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))
    (clobber (match_scratch:HI 5 "=a,a"))]
-  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
-   && FLOAT_MODE_P (GET_MODE (operands[3]))
+  "X87_FLOAT_MODE_P (GET_MODE (operands[3]))
+   && TARGET_USE_<MODE>MODE_FIOP
    && GET_MODE (operands[1]) == GET_MODE (operands[3])
    && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0])))
    && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode
    (set_attr "modrm" "0")])
 
 (define_expand "indirect_jump"
-  [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))]
+  [(set (pc) (match_operand 0 "nonimmediate_operand" ""))]
   ""
   "")
 
    (set_attr "length_immediate" "0")])
 
 (define_expand "tablejump"
-  [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" "rm"))
+  [(parallel [(set (pc) (match_operand 0 "nonimmediate_operand" ""))
              (use (label_ref (match_operand 1 "" "")))])]
   ""
 {
       rtx op0, op1;
       enum rtx_code code;
 
-      if (TARGET_64BIT)
+      /* We can't use @GOTOFF for text labels on VxWorks;
+        see gotoff_operand.  */
+      if (TARGET_64BIT || TARGET_VXWORKS_RTP)
        {
          code = PLUS;
          op0 = operands[0];
 (define_insn "*call_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
         (match_operand 1 "" ""))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
 {
   if (constant_call_address_operand (operands[0], Pmode))
     return "call\t%P0";
 }
   [(set_attr "type" "call")])
 
+(define_insn "*call_1_rex64_large"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
+        (match_operand 1 "" ""))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "call\t%A0"
+  [(set_attr "type" "call")])
+
 (define_insn "*sibcall_1_rex64"
   [(call (mem:QI (match_operand:DI 0 "constant_call_address_operand" ""))
         (match_operand 1 "" ""))]
      registers we stored in the result block.  We avoid problems by
      claiming that all hard registers are used and clobbered at this
      point.  */
-  emit_insn (gen_blockage (const0_rtx));
+  emit_insn (gen_blockage ());
 
   DONE;
 })
 ;; all of memory.  This blocks insns from being moved across this point.
 
 (define_insn "blockage"
-  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_BLOCKAGE)]
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")])
+
+;; As USE insns aren't meaningful after reload, this is used instead
+;; to prevent deleting instructions setting registers for PIC code
+(define_insn "prologue_use"
+  [(unspec_volatile [(match_operand 0 "" "")] UNSPECV_PROLOGUE_USE)]
   ""
   ""
   [(set_attr "length" "0")])
   [(return)
    (unspec [(const_int 0)] UNSPEC_REP)]
   "reload_completed"
-  "rep {;} ret"
+  "rep{\;| }ret"
   [(set_attr "length" "1")
    (set_attr "length_immediate" "0")
    (set_attr "prefix_rep" "1")
   [(set_attr "length" "16")])
 
 (define_expand "prologue"
-  [(const_int 1)]
+  [(const_int 0)]
   ""
   "ix86_expand_prologue (); DONE;")
 
   [(set_attr "type" "lea")
    (set_attr "length" "6")])
 
+(define_insn "set_rip_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (unspec:DI [(match_operand:DI 1 "" "")] UNSPEC_SET_RIP))]
+  "TARGET_64BIT"
+  "lea{q}\t%l1(%%rip), %0"
+  [(set_attr "type" "lea")
+   (set_attr "length" "6")])
+
+(define_insn "set_got_offset_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (unspec:DI [(match_operand:DI 1 "" "")] UNSPEC_SET_GOT_OFFSET))]
+  "TARGET_64BIT"
+  "movabs{q}\t$_GLOBAL_OFFSET_TABLE_-%l1, %0"
+  [(set_attr "type" "imov")
+   (set_attr "length" "11")])
+
 (define_expand "epilogue"
-  [(const_int 1)]
+  [(const_int 0)]
   ""
   "ix86_expand_epilogue (1); DONE;")
 
 (define_expand "sibcall_epilogue"
-  [(const_int 1)]
+  [(const_int 0)]
   ""
   "ix86_expand_epilogue (0); DONE;")
 
   "!TARGET_64BIT"
   "#"
   "reload_completed"
-  [(const_int 1)]
+  [(const_int 0)]
   "ix86_expand_epilogue (2); DONE;")
 
 (define_insn_and_split "eh_return_di"
   "TARGET_64BIT"
   "#"
   "reload_completed"
-  [(const_int 1)]
+  [(const_int 0)]
   "ix86_expand_epilogue (2); DONE;")
 
 (define_insn "leave"
       (clobber (match_scratch:SI 2 ""))
       (clobber (reg:CC FLAGS_REG))])]
   ""
-  "")
+{
+  if (TARGET_CMOVE)
+    {
+      emit_insn (gen_ffs_cmove (operands[0], operands[1]));
+      DONE;
+    }
+})
 
-(define_insn_and_split "*ffs_cmove"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
-   (clobber (match_scratch:SI 2 "=&r"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
+(define_expand "ffs_cmove"
   [(set (match_dup 2) (const_int -1))
-   (parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))
-             (set (match_dup 0) (ctz:SI (match_dup 1)))])
+   (parallel [(set (reg:CCZ FLAGS_REG)
+                  (compare:CCZ (match_operand:SI 1 "register_operand" "")
+                               (const_int 0)))
+             (set (match_operand:SI 0 "nonimmediate_operand" "")
+                  (ctz:SI (match_dup 1)))])
    (set (match_dup 0) (if_then_else:SI
                        (eq (reg:CCZ FLAGS_REG) (const_int 0))
                        (match_dup 2)
                        (match_dup 0)))
    (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
              (clobber (reg:CC FLAGS_REG))])]
-  "")
+  "TARGET_CMOVE"
+  "operands[2] = gen_reg_rtx (SImode);")
 
 (define_insn_and_split "*ffs_no_cmove"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
        (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
    (clobber (match_scratch:SI 2 "=&q"))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "!TARGET_CMOVE"
   "#"
-  "reload_completed"
-  [(parallel [(set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))
+  "&& reload_completed"
+  [(parallel [(set (reg:CCZ FLAGS_REG)
+                  (compare:CCZ (match_dup 1) (const_int 0)))
              (set (match_dup 0) (ctz:SI (match_dup 1)))])
    (set (strict_low_part (match_dup 3))
        (eq:QI (reg:CCZ FLAGS_REG) (const_int 0)))
   [(set_attr "prefix_0f" "1")])
 
 (define_expand "ffsdi2"
-  [(parallel
-     [(set (match_operand:DI 0 "register_operand" "")
-          (ffs:DI (match_operand:DI 1 "nonimmediate_operand" "")))
-      (clobber (match_scratch:DI 2 ""))
-      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT && TARGET_CMOVE"
-  "")
-
-(define_insn_and_split "*ffs_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (ffs:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
-   (clobber (match_scratch:DI 2 "=&r"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_CMOVE"
-  "#"
-  "&& reload_completed"
   [(set (match_dup 2) (const_int -1))
    (parallel [(set (reg:CCZ FLAGS_REG)
-                  (compare:CCZ (match_dup 1) (const_int 0)))
-             (set (match_dup 0) (ctz:DI (match_dup 1)))])
+                  (compare:CCZ (match_operand:DI 1 "register_operand" "")
+                               (const_int 0)))
+             (set (match_operand:DI 0 "nonimmediate_operand" "")
+                  (ctz:DI (match_dup 1)))])
    (set (match_dup 0) (if_then_else:DI
                        (eq (reg:CCZ FLAGS_REG) (const_int 0))
                        (match_dup 2)
                        (match_dup 0)))
    (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
              (clobber (reg:CC FLAGS_REG))])]
-  "")
+  "TARGET_64BIT"
+  "operands[2] = gen_reg_rtx (DImode);")
 
 (define_insn "*ffsdi_1"
   [(set (reg:CCZ FLAGS_REG)
      (clobber (reg:CC FLAGS_REG))])]
   "!TARGET_64BIT && TARGET_GNU2_TLS"
 {
-  operands[3] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+  operands[3] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
   ix86_tls_descriptor_calls_expanded_in_cfun = true;
 })
 
   ""
   [(set (match_dup 0) (match_dup 5))]
 {
-  operands[5] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+  operands[5] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
   emit_insn (gen_tls_dynamic_gnu2_32 (operands[5], operands[1], operands[2]));
 })
 
      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_64BIT && TARGET_GNU2_TLS"
 {
-  operands[2] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+  operands[2] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
   ix86_tls_descriptor_calls_expanded_in_cfun = true;
 })
 
   ""
   [(set (match_dup 0) (match_dup 4))]
 {
-  operands[4] = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
+  operands[4] = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
   emit_insn (gen_tls_dynamic_gnu2_64 (operands[4], operands[1]));
 })
 
               (const_string "fop")))
    (set_attr "mode" "SF")])
 
+(define_insn "*rcpsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+                  UNSPEC_RCP))]
+  "TARGET_SSE_MATH"
+  "rcpss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
 (define_insn "*fop_sf_1_sse"
   [(set (match_operand:SF 0 "register_operand" "=x")
        (match_operator:SF 3 "binary_fp_operator"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
        (match_operator:XF 3 "binary_fp_operator"
           [(float_extend:XF
-             (match_operand:X87MODEF12 1 "nonimmediate_operand" "fm,0"))
+             (match_operand:MODEF 1 "nonimmediate_operand" "fm,0"))
            (match_operand:XF 2 "register_operand" "0,f")]))]
   "TARGET_80387"
   "* return output_387_binary_op (insn, operands);"
        (match_operator:XF 3 "binary_fp_operator"
          [(match_operand:XF 1 "register_operand" "0,f")
           (float_extend:XF
-            (match_operand:X87MODEF12 2 "nonimmediate_operand" "fm,0"))]))]
+            (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
   "TARGET_80387"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
   [(set (match_operand:XF 0 "register_operand" "=f,f")
        (match_operator:XF 3 "binary_fp_operator"
          [(float_extend:XF
-            (match_operand:X87MODEF12 1 "register_operand" "0,f"))
+            (match_operand:MODEF 1 "register_operand" "0,f"))
           (float_extend:XF
-            (match_operand:X87MODEF12 2 "nonimmediate_operand" "fm,0"))]))]
+            (match_operand:MODEF 2 "nonimmediate_operand" "fm,0"))]))]
   "TARGET_80387"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
        (match_operator 3 "binary_fp_operator"
           [(float (match_operand:X87MODEI12 1 "register_operand" ""))
            (match_operand 2 "register_operand" "")]))]
-  "TARGET_80387 && reload_completed
-   && FLOAT_MODE_P (GET_MODE (operands[0]))"
+  "reload_completed
+   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
   [(const_int 0)]
 {
   operands[4] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
        (match_operator 3 "binary_fp_operator"
           [(match_operand 1 "register_operand" "")
            (float (match_operand:X87MODEI12 2 "register_operand" ""))]))]
-  "TARGET_80387 && reload_completed
-   && FLOAT_MODE_P (GET_MODE (operands[0]))"
+  "reload_completed
+   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
   [(const_int 0)]
 {
   operands[4] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
 ;; all fancy i386 XFmode math functions.
 
 (define_insn "truncxf<mode>2_i387_noop_unspec"
-  [(set (match_operand:X87MODEF12 0 "register_operand" "=f")
-       (unspec:X87MODEF12 [(match_operand:XF 1 "register_operand" "f")]
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+       (unspec:MODEF [(match_operand:XF 1 "register_operand" "f")]
        UNSPEC_TRUNC_NOOP))]
   "TARGET_USE_FANCY_MATH_387"
   "* return output_387_reg_move (insn, operands);"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (sqrt:XF
          (float_extend:XF
-           (match_operand:X87MODEF12 1 "register_operand" "0"))))]
+           (match_operand:MODEF 1 "register_operand" "0"))))]
   "TARGET_USE_FANCY_MATH_387"
   "fsqrt"
   [(set_attr "type" "fpspc")
    (set_attr "athlon_decode" "direct")   
    (set_attr "amdfam10_decode" "direct")])
 
+(define_insn "*rsqrtsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+                  UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH"
+  "rsqrtss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "SF")])
+
+(define_expand "rsqrtsf2"
+  [(set (match_operand:SF 0 "register_operand" "")
+       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")]
+                  UNSPEC_RSQRT))]
+  "TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+   && flag_finite_math_only && !flag_trapping_math
+   && flag_unsafe_math_optimizations"
+{
+  ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
+  DONE;
+})
+
 (define_insn "*sqrt<mode>2_sse"
-  [(set (match_operand:SSEMODEF 0 "register_operand" "=x")
-       (sqrt:SSEMODEF
-         (match_operand:SSEMODEF 1 "nonimmediate_operand" "xm")))]
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (sqrt:MODEF
+         (match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
   "sqrts<ssemodefsuffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
    (set_attr "amdfam10_decode" "*")])
 
 (define_expand "sqrt<mode>2"
-  [(set (match_operand:X87MODEF12 0 "register_operand" "")
-       (sqrt:X87MODEF12
-         (match_operand:X87MODEF12 1 "nonimmediate_operand" "")))]
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (sqrt:MODEF
+         (match_operand:MODEF 1 "nonimmediate_operand" "")))]
   "TARGET_USE_FANCY_MATH_387
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
+  if (<MODE>mode == SFmode
+      && TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+      && flag_finite_math_only && !flag_trapping_math
+      && flag_unsafe_math_optimizations)
+    {
+      ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 0);
+      DONE;
+    }
+
   if (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
     {
       rtx op0 = gen_reg_rtx (XFmode);
        (unspec:XF [(match_dup 2) (match_dup 3)]
                   UNSPEC_FPREM_U))
    (set (reg:CCFP FPSR_REG)
-       (unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+       (unspec:CCFP [(match_dup 2) (match_dup 3)]
+                    UNSPEC_C2_FLAG))]
   "TARGET_USE_FANCY_MATH_387"
   "fprem"
   [(set_attr "type" "fpspc")
 {
   rtx label = gen_label_rtx ();
 
-  emit_label (label);
+  rtx op2;
+
+  if (rtx_equal_p (operands[1], operands[2]))
+    {
+      op2 = gen_reg_rtx (XFmode);
+      emit_move_insn (op2, operands[2]);
+    }
+  else
+    op2 = operands[2];
 
-  emit_insn (gen_fpremxf4_i387 (operands[1], operands[2],
-                               operands[1], operands[2]));
+  emit_label (label);
+  emit_insn (gen_fpremxf4_i387 (operands[1], op2, operands[1], op2));
   ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
 
   emit_move_insn (operands[0], operands[1]);
   DONE;
 })
 
 (define_expand "fmod<mode>3"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))
-   (use (match_operand:X87MODEF12 2 "general_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "general_operand" ""))]
   "TARGET_USE_FANCY_MATH_387"
 {
   rtx label = gen_label_rtx ();
   emit_label (label);
   emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
 
   /* Truncate the result properly for strict SSE math.  */
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
        (unspec:XF [(match_dup 2) (match_dup 3)]
                   UNSPEC_FPREM1_U))
    (set (reg:CCFP FPSR_REG)
-       (unspec:CCFP [(const_int 0)] UNSPEC_NOP))]
+       (unspec:CCFP [(match_dup 2) (match_dup 3)]
+                    UNSPEC_C2_FLAG))]
   "TARGET_USE_FANCY_MATH_387"
   "fprem1"
   [(set_attr "type" "fpspc")
 {
   rtx label = gen_label_rtx ();
 
-  emit_label (label);
+  rtx op2;
+
+  if (rtx_equal_p (operands[1], operands[2]))
+    {
+      op2 = gen_reg_rtx (XFmode);
+      emit_move_insn (op2, operands[2]);
+    }
+  else
+    op2 = operands[2];
 
-  emit_insn (gen_fprem1xf4_i387 (operands[1], operands[2],
-                                operands[1], operands[2]));
+  emit_label (label);
+  emit_insn (gen_fprem1xf4_i387 (operands[1], op2, operands[1], op2));
   ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
 
   emit_move_insn (operands[0], operands[1]);
   DONE;
 })
 
 (define_expand "remainder<mode>3"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))
-   (use (match_operand:X87MODEF12 2 "general_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "general_operand" ""))]
   "TARGET_USE_FANCY_MATH_387"
 {
   rtx label = gen_label_rtx ();
 
   emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
+  LABEL_NUSES (label) = 1;
 
   /* Truncate the result properly for strict SSE math.  */
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
 (define_insn "*sin_extend<mode>xf2_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 1 "register_operand" "0"))]
+                     (match_operand:MODEF 1 "register_operand" "0"))]
                   UNSPEC_SIN))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
 (define_insn "*cos_extend<mode>xf2_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 1 "register_operand" "0"))]
+                     (match_operand:MODEF 1 "register_operand" "0"))]
                   UNSPEC_COS))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    (set (match_operand:XF 1 "register_operand" "")
        (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
   "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
-   && !reload_completed && !reload_in_progress"
+   && !(reload_completed || reload_in_progress)"
   [(set (match_dup 1) (unspec:XF [(match_dup 2)] UNSPEC_SIN))]
   "")
 
    (set (match_operand:XF 1 "register_operand" "")
        (unspec:XF [(match_dup 2)] UNSPEC_SINCOS_SIN))]
   "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
-   && !reload_completed && !reload_in_progress"
+   && !(reload_completed || reload_in_progress)"
   [(set (match_dup 0) (unspec:XF [(match_dup 2)] UNSPEC_COS))]
   "")
 
 (define_insn "sincos_extend<mode>xf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 2 "register_operand" "0"))]
+                     (match_operand:MODEF 2 "register_operand" "0"))]
                   UNSPEC_SINCOS_COS))
    (set (match_operand:XF 1 "register_operand" "=u")
         (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
 (define_split
   [(set (match_operand:XF 0 "register_operand" "")
        (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 2 "register_operand" ""))]
+                     (match_operand:MODEF 2 "register_operand" ""))]
                   UNSPEC_SINCOS_COS))
    (set (match_operand:XF 1 "register_operand" "")
        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
   "find_regno_note (insn, REG_UNUSED, REGNO (operands[0]))
-   && !reload_completed && !reload_in_progress"
+   && !(reload_completed || reload_in_progress)"
   [(set (match_dup 1) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SIN))]
   "")
 
 (define_split
   [(set (match_operand:XF 0 "register_operand" "")
        (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 2 "register_operand" ""))]
+                     (match_operand:MODEF 2 "register_operand" ""))]
                   UNSPEC_SINCOS_COS))
    (set (match_operand:XF 1 "register_operand" "")
        (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_SINCOS_SIN))]
   "find_regno_note (insn, REG_UNUSED, REGNO (operands[1]))
-   && !reload_completed && !reload_in_progress"
+   && !(reload_completed || reload_in_progress)"
   [(set (match_dup 0) (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_COS))]
   "")
 
 (define_expand "sincos<mode>3"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))
-   (use (match_operand:X87MODEF12 2 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))
+   (use (match_operand:MODEF 2 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
    (set_attr "mode" "XF")])
 
 (define_insn "fptan_extend<mode>xf4_i387"
-  [(set (match_operand:X87MODEF12 0 "register_operand" "=f")
-       (match_operand:X87MODEF12 3 "const_double_operand" "F"))
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+       (match_operand:MODEF 3 "const_double_operand" "F"))
    (set (match_operand:XF 1 "register_operand" "=u")
         (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 2 "register_operand" "0"))]
+                     (match_operand:MODEF 2 "register_operand" "0"))]
                   UNSPEC_TAN))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
 })
 
 (define_expand "tan<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 (define_insn "fpatan_extend<mode>xf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 1 "register_operand" "0"))
+                     (match_operand:MODEF 1 "register_operand" "0"))
                    (float_extend:XF
-                     (match_operand:X87MODEF12 2 "register_operand" "u"))]
+                     (match_operand:MODEF 2 "register_operand" "u"))]
                   UNSPEC_FPATAN))
    (clobber (match_scratch:XF 3 "=2"))]
   "TARGET_USE_FANCY_MATH_387
   "")
 
 (define_expand "atan2<mode>3"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))
-   (use (match_operand:X87MODEF12 2 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))
+   (use (match_operand:MODEF 2 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 })
 
 (define_expand "atan<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 })
 
 (define_expand "asin<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 })
 
 (define_expand "acos<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 (define_insn "fyl2x_extend<mode>xf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 1 "register_operand" "0"))
+                     (match_operand:MODEF 1 "register_operand" "0"))
                    (match_operand:XF 2 "register_operand" "u")]
                   UNSPEC_FYL2X))
    (clobber (match_scratch:XF 3 "=2"))]
 })
 
 (define_expand "log<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 })
 
 (define_expand "log10<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 })
 
 (define_expand "log2<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 (define_insn "fyl2xp1_extend<mode>xf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
         (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 1 "register_operand" "0"))
+                     (match_operand:MODEF 1 "register_operand" "0"))
                    (match_operand:XF 2 "register_operand" "u")]
                   UNSPEC_FYL2XP1))
    (clobber (match_scratch:XF 3 "=2"))]
 })
 
 (define_expand "log1p<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 (define_insn "fxtract_extend<mode>xf3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(float_extend:XF
-                     (match_operand:X87MODEF12 2 "register_operand" "0"))]
+                     (match_operand:MODEF 2 "register_operand" "0"))]
                   UNSPEC_XTRACT_FRACT))
    (set (match_operand:XF 1 "register_operand" "=u")
         (unspec:XF [(float_extend:XF (match_dup 2))] UNSPEC_XTRACT_EXP))]
 })
 
 (define_expand "logb<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 
 (define_expand "ilogb<mode>2"
   [(use (match_operand:SI 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "register_operand" ""))]
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 })
 
 (define_expand "exp<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 })
 
 (define_expand "exp10<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 })
 
 (define_expand "exp2<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))]
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
   emit_move_insn (operands[2], standard_80387_constant_rtx (5)); /* fldl2e */
 })
 
-(define_expand "expm1<mode>2"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))]
+(define_expand "expm1<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))]
+ "TARGET_USE_FANCY_MATH_387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && flag_unsafe_math_optimizations && !optimize_size"
+{
+  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op1 = gen_reg_rtx (XFmode);
+
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+  emit_insn (gen_expm1xf2 (op0, op1));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
+  DONE;
+})
+
+(define_expand "ldexpxf3"
+  [(set (match_dup 3)
+       (float:XF (match_operand:SI 2 "register_operand" "")))
+   (parallel [(set (match_operand:XF 0 " register_operand" "")
+                  (unspec:XF [(match_operand:XF 1 "register_operand" "")
+                              (match_dup 3)]
+                             UNSPEC_FSCALE_FRACT))
+             (set (match_dup 4)
+                  (unspec:XF [(match_dup 1) (match_dup 3)]
+                             UNSPEC_FSCALE_EXP))])]
+  "TARGET_USE_FANCY_MATH_387
+   && flag_unsafe_math_optimizations && !optimize_size"
+{
+  operands[3] = gen_reg_rtx (XFmode);
+  operands[4] = gen_reg_rtx (XFmode);
+})
+
+(define_expand "ldexp<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:SI 2 "register_operand" ""))]
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
   rtx op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
-  emit_insn (gen_expm1xf2 (op0, op1));
+  emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
   emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
   DONE;
 })
 
-(define_expand "ldexpxf3"
-  [(set (match_dup 3)
-       (float:XF (match_operand:SI 2 "register_operand" "")))
-   (parallel [(set (match_operand:XF 0 " register_operand" "")
+(define_expand "scalbxf3"
+  [(parallel [(set (match_operand:XF 0 " register_operand" "")
                   (unspec:XF [(match_operand:XF 1 "register_operand" "")
-                              (match_dup 3)]
+                              (match_operand:XF 2 "register_operand" "")]
                              UNSPEC_FSCALE_FRACT))
-             (set (match_dup 4)
-                  (unspec:XF [(match_dup 1) (match_dup 3)]
+             (set (match_dup 3)
+                  (unspec:XF [(match_dup 1) (match_dup 2)]
                              UNSPEC_FSCALE_EXP))])]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations && !optimize_size"
 {
   operands[3] = gen_reg_rtx (XFmode);
-  operands[4] = gen_reg_rtx (XFmode);
 })
 
-(define_expand "ldexp<mode>3"
-  [(use (match_operand:X87MODEF12 0 "register_operand" ""))
-   (use (match_operand:X87MODEF12 1 "general_operand" ""))
-   (use (match_operand:SI 2 "register_operand" ""))]
+(define_expand "scalb<mode>3"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "general_operand" ""))
+   (use (match_operand:MODEF 2 "register_operand" ""))]
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
-  emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_scalbxf3 (op0, op1, op2));
   emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
   DONE;
 })
 \f
 
-(define_insn "frndintxf2"
+(define_insn "sse4_1_round<mode>2"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x")
+                      (match_operand:SI 2 "const_0_to_15_operand" "n")]
+                     UNSPEC_ROUND))]
+  "TARGET_ROUND"
+  "rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "rintxf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
-        UNSPEC_FRNDINT))]
+                  UNSPEC_FRNDINT))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
   "frndint"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")])
 
-(define_expand "rintdf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
+(define_expand "rint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "(TARGET_USE_FANCY_MATH_387
-    && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
     && flag_unsafe_math_optimizations)
-   || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
        && !flag_trapping_math
-       && !optimize_size)"
+       && (TARGET_ROUND || !optimize_size))"
 {
-  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !flag_trapping_math
-      && !optimize_size)
-    ix86_expand_rint (operand0, operand1);
-  else
+      && (TARGET_ROUND || !optimize_size))
     {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
-
-      emit_insn (gen_extenddfxf2 (op1, operands[1]));
-      emit_insn (gen_frndintxf2 (op0, op1));
-
-      emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
+      if (TARGET_ROUND)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (0x04)));
+      else
+       ix86_expand_rint (operand0, operand1);
     }
-  DONE;
-})
-
-(define_expand "rintsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
-  "(TARGET_USE_FANCY_MATH_387
-    && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-    && flag_unsafe_math_optimizations)
-   || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-       && !flag_trapping_math
-       && !optimize_size)"
-{
-  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math
-      && !optimize_size)
-    ix86_expand_rint (operand0, operand1);
   else
     {
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = gen_reg_rtx (XFmode);
 
-      emit_insn (gen_extendsfxf2 (op1, operands[1]));
-      emit_insn (gen_frndintxf2 (op0, op1));
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
+      emit_insn (gen_rintxf2 (op0, op1));
 
-      emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
     }
   DONE;
 })
 
-(define_expand "rintxf2"
-  [(use (match_operand:XF 0 "register_operand" ""))
-   (use (match_operand:XF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
-{
-  emit_insn (gen_frndintxf2 (operands[0], operands[1]));
-  DONE;
-})
-
-(define_expand "roundsf2"
-  [(match_operand:SF 0 "register_operand" "")
-   (match_operand:SF 1 "nonimmediate_operand" "")]
-  "SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-   && !flag_trapping_math && !flag_rounding_math
-   && !optimize_size"
-{
-  ix86_expand_round (operand0, operand1);
-  DONE;
-})
-
-(define_expand "rounddf2"
-  [(match_operand:DF 0 "register_operand" "")
-   (match_operand:DF 1 "nonimmediate_operand" "")]
-  "SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
+(define_expand "round<mode>2"
+  [(match_operand:MODEF 0 "register_operand" "")
+   (match_operand:MODEF 1 "nonimmediate_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
    && !flag_trapping_math && !flag_rounding_math
    && !optimize_size"
 {
-  if (TARGET_64BIT)
+  if (TARGET_64BIT || (<MODE>mode != DFmode))
     ix86_expand_round (operand0, operand1);
   else
     ix86_expand_rounddf_32 (operand0, operand1);
 })
 
 (define_insn_and_split "*fistdi2_1"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
-       (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
-        UNSPEC_FIST))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+       (unspec:DI [(match_operand:XF 1 "register_operand" "")]
+                  UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387
    && !(reload_completed || reload_in_progress)"
   "#"
 (define_insn "fistdi2"
   [(set (match_operand:DI 0 "memory_operand" "=m")
        (unspec:DI [(match_operand:XF 1 "register_operand" "f")]
-        UNSPEC_FIST))
+                  UNSPEC_FIST))
    (clobber (match_scratch:XF 2 "=&1f"))]
   "TARGET_USE_FANCY_MATH_387"
   "* return output_fix_trunc (insn, operands, 0);"
 (define_insn "fistdi2_with_temp"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
        (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
-        UNSPEC_FIST))
+                  UNSPEC_FIST))
    (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
    (clobber (match_scratch:XF 3 "=&1f,&1f"))]
   "TARGET_USE_FANCY_MATH_387"
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
        (unspec:DI [(match_operand:XF 1 "register_operand" "")]
-        UNSPEC_FIST))
+                  UNSPEC_FIST))
    (clobber (match_operand:DI 2 "memory_operand" ""))
    (clobber (match_scratch 3 ""))]
   "reload_completed"
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
        (unspec:DI [(match_operand:XF 1 "register_operand" "")]
-        UNSPEC_FIST))
+                  UNSPEC_FIST))
    (clobber (match_operand:DI 2 "memory_operand" ""))
    (clobber (match_scratch 3 ""))]
   "reload_completed"
   "")
 
 (define_insn_and_split "*fist<mode>2_1"
-  [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
-       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
-        UNSPEC_FIST))]
+  [(set (match_operand:X87MODEI12 0 "register_operand" "")
+       (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
+                          UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387
    && !(reload_completed || reload_in_progress)"
   "#"
 (define_insn "fist<mode>2"
   [(set (match_operand:X87MODEI12 0 "memory_operand" "=m")
        (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
-        UNSPEC_FIST))]
+                          UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387"
   "* return output_fix_trunc (insn, operands, 0);"
   [(set_attr "type" "fpspc")
 (define_insn "fist<mode>2_with_temp"
   [(set (match_operand:X87MODEI12 0 "register_operand" "=r")
        (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "f")]
-        UNSPEC_FIST))
+                          UNSPEC_FIST))
    (clobber (match_operand:X87MODEI12 2 "memory_operand" "=m"))]
   "TARGET_USE_FANCY_MATH_387"
   "#"
 (define_split
   [(set (match_operand:X87MODEI12 0 "register_operand" "")
        (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
-        UNSPEC_FIST))
+                          UNSPEC_FIST))
    (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
   "reload_completed"
-  [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)]
-                      UNSPEC_FIST))
+  [(set (match_dup 2) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))
    (set (match_dup 0) (match_dup 2))]
   "")
 
 (define_split
   [(set (match_operand:X87MODEI12 0 "memory_operand" "")
        (unspec:X87MODEI12 [(match_operand:XF 1 "register_operand" "")]
-        UNSPEC_FIST))
+                          UNSPEC_FIST))
    (clobber (match_operand:X87MODEI12 2 "memory_operand" ""))]
   "reload_completed"
-  [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)]
-                      UNSPEC_FIST))]
+  [(set (match_dup 0) (unspec:X87MODEI12 [(match_dup 1)] UNSPEC_FIST))]
   "")
 
 (define_expand "lrintxf<mode>2"
   [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
      (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
-      UNSPEC_FIST))]
+                     UNSPEC_FIST))]
   "TARGET_USE_FANCY_MATH_387"
   "")
 
-(define_expand "lrint<mode>di2"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "")
-     (unspec:DI [(match_operand:SSEMODEF 1 "register_operand" "")]
-      UNSPEC_FIX_NOTRUNC))]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT"
-  "")
-
-(define_expand "lrint<mode>si2"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-     (unspec:SI [(match_operand:SSEMODEF 1 "register_operand" "")]
-      UNSPEC_FIX_NOTRUNC))]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+(define_expand "lrint<MODEF:mode><SSEMODEI24:mode>2"
+  [(set (match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
+     (unspec:SSEMODEI24 [(match_operand:MODEF 1 "register_operand" "")]
+                       UNSPEC_FIX_NOTRUNC))]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)"
   "")
 
-(define_expand "lround<mode>di2"
-  [(match_operand:DI 0 "nonimmediate_operand" "")
-   (match_operand:SSEMODEF 1 "register_operand" "")]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
-   && !flag_trapping_math && !flag_rounding_math
-   && !optimize_size"
-{
-  ix86_expand_lround (operand0, operand1);
-  DONE;
-})
-
-(define_expand "lround<mode>si2"
-  [(match_operand:SI 0 "nonimmediate_operand" "")
-   (match_operand:SSEMODEF 1 "register_operand" "")]
-  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+(define_expand "lround<MODEF:mode><SSEMODEI24:mode>2"
+  [(match_operand:SSEMODEI24 0 "nonimmediate_operand" "")
+   (match_operand:MODEF 1 "register_operand" "")]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)
    && !flag_trapping_math && !flag_rounding_math
    && !optimize_size"
 {
 
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_floor"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+  [(set (match_operand:XF 0 "register_operand" "")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "")]
         UNSPEC_FRNDINT_FLOOR))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
   DONE;
 })
 
-(define_expand "floordf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
+(define_expand "floor<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations && !optimize_size)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math
+       && (TARGET_ROUND || !optimize_size))"
 {
-  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || !optimize_size))
     {
-      if (TARGET_64BIT)
+      if (TARGET_ROUND)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (0x01)));
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
        ix86_expand_floorceil (operand0, operand1, true);
       else
        ix86_expand_floorceildf_32 (operand0, operand1, true);
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = gen_reg_rtx (XFmode);
 
-      emit_insn (gen_extenddfxf2 (op1, operands[1]));
-      emit_insn (gen_frndintxf2_floor (op0, op1));
-
-      emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
-    }
-  DONE;
-})
-
-(define_expand "floorsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
-{
-  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
-    ix86_expand_floorceil (operand0, operand1, true);
-  else
-    {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
-
-      emit_insn (gen_extendsfxf2 (op1, operands[1]));
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_floor (op0, op1));
 
-      emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
     }
   DONE;
 })
 
 (define_insn_and_split "*fist<mode>2_floor_1"
-  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
-       (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")]
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+       (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
         UNSPEC_FIST_FLOOR))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
 
 (define_expand "lfloor<mode>di2"
   [(match_operand:DI 0 "nonimmediate_operand" "")
-   (match_operand:SSEMODEF 1 "register_operand" "")]
+   (match_operand:MODEF 1 "register_operand" "")]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
    && !flag_trapping_math
    && !optimize_size"
 
 (define_expand "lfloor<mode>si2"
   [(match_operand:SI 0 "nonimmediate_operand" "")
-   (match_operand:SSEMODEF 1 "register_operand" "")]
+   (match_operand:MODEF 1 "register_operand" "")]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
    && !flag_trapping_math
    && (!optimize_size || !TARGET_64BIT)"
 
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_ceil"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+  [(set (match_operand:XF 0 "register_operand" "")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "")]
         UNSPEC_FRNDINT_CEIL))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
   DONE;
 })
 
-(define_expand "ceildf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
+(define_expand "ceil<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations && !optimize_size)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math
+       && (TARGET_ROUND || !optimize_size))"
 {
-  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || !optimize_size))
     {
-      if (TARGET_64BIT)
+      if (TARGET_ROUND)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (0x02)));
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
        ix86_expand_floorceil (operand0, operand1, false);
       else
        ix86_expand_floorceildf_32 (operand0, operand1, false);
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = gen_reg_rtx (XFmode);
 
-      emit_insn (gen_extenddfxf2 (op1, operands[1]));
-      emit_insn (gen_frndintxf2_ceil (op0, op1));
-
-      emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
-    }
-  DONE;
-})
-
-(define_expand "ceilsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
-{
-  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
-    ix86_expand_floorceil (operand0, operand1, false);
-  else
-    {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
-
-      emit_insn (gen_extendsfxf2 (op1, operands[1]));
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_ceil (op0, op1));
 
-      emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
     }
   DONE;
 })
 
 (define_insn_and_split "*fist<mode>2_ceil_1"
-  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
-       (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "f,f")]
+  [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "")
+       (unspec:X87MODEI [(match_operand:XF 1 "register_operand" "")]
         UNSPEC_FIST_CEIL))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
 
 (define_expand "lceil<mode>di2"
   [(match_operand:DI 0 "nonimmediate_operand" "")
-   (match_operand:SSEMODEF 1 "register_operand" "")]
+   (match_operand:MODEF 1 "register_operand" "")]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
    && !flag_trapping_math"
 {
 
 (define_expand "lceil<mode>si2"
   [(match_operand:SI 0 "nonimmediate_operand" "")
-   (match_operand:SSEMODEF 1 "register_operand" "")]
+   (match_operand:MODEF 1 "register_operand" "")]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
    && !flag_trapping_math"
 {
 
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_trunc"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+  [(set (match_operand:XF 0 "register_operand" "")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "")]
         UNSPEC_FRNDINT_TRUNC))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
   DONE;
 })
 
-(define_expand "btruncdf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
+(define_expand "btrunc<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
+  "(TARGET_USE_FANCY_MATH_387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+    && flag_unsafe_math_optimizations && !optimize_size)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+       && !flag_trapping_math
+       && (TARGET_ROUND || !optimize_size))"
 {
-  if (SSE_FLOAT_MODE_P (DFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+      && !flag_trapping_math
+      && (TARGET_ROUND || !optimize_size))
     {
-      if (TARGET_64BIT)
+      if (TARGET_ROUND)
+       emit_insn (gen_sse4_1_round<mode>2
+                  (operands[0], operands[1], GEN_INT (0x03)));
+      else if (TARGET_64BIT || (<MODE>mode != DFmode))
        ix86_expand_trunc (operand0, operand1);
       else
        ix86_expand_truncdf_32 (operand0, operand1);
       rtx op0 = gen_reg_rtx (XFmode);
       rtx op1 = gen_reg_rtx (XFmode);
 
-      emit_insn (gen_extenddfxf2 (op1, operands[1]));
-      emit_insn (gen_frndintxf2_trunc (op0, op1));
-
-      emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
-    }
-  DONE;
-})
-
-(define_expand "btruncsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
-  "((TARGET_USE_FANCY_MATH_387
-     && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
-     && flag_unsafe_math_optimizations)
-    || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-        && !flag_trapping_math))
-   && !optimize_size"
-{
-  if (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH
-      && !flag_trapping_math)
-    ix86_expand_trunc (operand0, operand1);
-  else
-    {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
-
-      emit_insn (gen_extendsfxf2 (op1, operands[1]));
+      emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_trunc (op0, op1));
 
-      emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+      emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
     }
   DONE;
 })
 
 ;; Rounding mode control word calculation could clobber FLAGS_REG.
 (define_insn_and_split "frndintxf2_mask_pm"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (unspec:XF [(match_operand:XF 1 "register_operand" "0")]
+  [(set (match_operand:XF 0 "register_operand" "")
+       (unspec:XF [(match_operand:XF 1 "register_operand" "")]
         UNSPEC_FRNDINT_MASK_PM))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_USE_FANCY_MATH_387
   DONE;
 })
 
-(define_expand "nearbyintdf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DF 1 "register_operand" ""))]
-  "TARGET_USE_FANCY_MATH_387
-   && (!(TARGET_SSE2 && TARGET_SSE_MATH) || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations"
-{
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
-
-  emit_insn (gen_extenddfxf2 (op1, operands[1]));
-  emit_insn (gen_frndintxf2_mask_pm (op0, op1));
-
-  emit_insn (gen_truncxfdf2_i387_noop (operands[0], op0));
-  DONE;
-})
-
-(define_expand "nearbyintsf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SF 1 "register_operand" ""))]
+(define_expand "nearbyint<mode>2"
+  [(use (match_operand:MODEF 0 "register_operand" ""))
+   (use (match_operand:MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
    && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_extendsfxf2 (op1, operands[1]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_frndintxf2_mask_pm (op0, op1));
 
-  emit_insn (gen_truncxfsf2_i387_noop (operands[0], op0));
+  emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
   DONE;
 })
 
   [(use (match_operand:SI 0 "register_operand" ""))
    (use (match_operand:X87MODEF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-  && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-      || TARGET_MIX_SSE_I387)"
+   && TARGET_C99_FUNCTIONS
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
   rtx mask = GEN_INT (0x45);
   rtx val = GEN_INT (0x05);
   DONE;
 })
 
+(define_expand "signbit<mode>2"
+  [(use (match_operand:SI 0 "register_operand" ""))
+   (use (match_operand:X87MODEF 1 "register_operand" ""))]
+  "TARGET_USE_FANCY_MATH_387
+   && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+{
+  rtx mask = GEN_INT (0x0200);
+
+  rtx scratch = gen_reg_rtx (HImode);
+
+  emit_insn (gen_fxam<mode>2_i387 (scratch, operands[1]));
+  emit_insn (gen_andsi3 (operands[0], gen_lowpart (SImode, scratch), mask));
+  DONE;
+})
 \f
 ;; Block operation instructions
 
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "TARGET_64BIT"
-  "{rep\;movsq|rep movsq}"
+  "rep movsq"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!TARGET_64BIT"
-  "{rep\;movsl|rep movsd}"
+  "rep movs{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "TARGET_64BIT"
-  "{rep\;movsl|rep movsd}"
+  "rep movs{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!TARGET_64BIT"
-  "{rep\;movsb|rep movsb}"
+  "rep movsb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "TARGET_64BIT"
-  "{rep\;movsb|rep movsb}"
+  "rep movsb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
    (use (match_operand:DI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "TARGET_64BIT"
-  "{rep\;stosq|rep stosq}"
+  "rep stosq"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (use (match_operand:SI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!TARGET_64BIT"
-  "{rep\;stosl|rep stosd}"
+  "rep stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (use (match_operand:SI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "TARGET_64BIT"
-  "{rep\;stosl|rep stosd}"
+  "rep stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!TARGET_64BIT"
-  "{rep\;stosb|rep stosb}"
+  "rep stosb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "TARGET_64BIT"
-  "{rep\;stosb|rep stosb}"
+  "rep stosb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (match_operand:SI 2 "register_operand" "=c"))]
   "!TARGET_64BIT"
-  "repz{\;| }cmpsb"
+  "repz cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (match_operand:DI 2 "register_operand" "=c"))]
   "TARGET_64BIT"
-  "repz{\;| }cmpsb"
+  "repz cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (match_operand:SI 2 "register_operand" "=c"))]
   "!TARGET_64BIT"
-  "repz{\;| }cmpsb"
+  "repz cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (match_operand:DI 2 "register_operand" "=c"))]
   "TARGET_64BIT"
-  "repz{\;| }cmpsb"
+  "repz cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT"
-  "repnz{\;| }scasb"
+  "repnz scasb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "repnz{\;| }scasb"
+  "repnz scasb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
   [(set_attr "type" "fcmov")
    (set_attr "mode" "XF")])
 
+;; SSE5 conditional move
+(define_insn "*sse5_pcmov_<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x,x")
+       (if_then_else:MODEF 
+         (match_operand:MODEF 1 "nonimmediate_operand" "xm,x,0,0")
+         (match_operand:MODEF 2 "nonimmediate_operand" "0,0,x,xm")
+         (match_operand:MODEF 3 "vector_move_operand" "x,xm,xm,x")))]
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+  "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
+  [(set_attr "type" "sse4arg")])
+
 ;; These versions of the min/max patterns are intentionally ignorant of
 ;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
   [(set_attr "type" "alu,lea")
    (set_attr "mode" "DI")])
 
-(define_expand "allocate_stack_worker"
-  [(match_operand:SI 0 "register_operand" "")]
-  "TARGET_STACK_PROBE"
-{
-  if (reload_completed)
-    {
-      if (TARGET_64BIT)
-       emit_insn (gen_allocate_stack_worker_rex64_postreload (operands[0]));
-      else
-       emit_insn (gen_allocate_stack_worker_postreload (operands[0]));
-    }
-  else
-    {
-      if (TARGET_64BIT)
-       emit_insn (gen_allocate_stack_worker_rex64 (operands[0]));
-      else
-       emit_insn (gen_allocate_stack_worker_1 (operands[0]));
-    }
-  DONE;
-})
-
-(define_insn "allocate_stack_worker_1"
-  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")]
-    UNSPECV_STACK_PROBE)
+(define_insn "allocate_stack_worker_32"
+  [(set (match_operand:SI 0 "register_operand" "+a")
+       (unspec_volatile:SI [(match_dup 0)] UNSPECV_STACK_PROBE))
    (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0)))
-   (clobber (match_scratch:SI 1 "=0"))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_STACK_PROBE"
   "call\t__alloca"
   [(set_attr "type" "multi")
    (set_attr "length" "5")])
 
-(define_expand "allocate_stack_worker_postreload"
-  [(parallel [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")]
-                                   UNSPECV_STACK_PROBE)
-             (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0)))
-             (clobber (match_dup 0))
-             (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
-
-(define_insn "allocate_stack_worker_rex64"
-  [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")]
-    UNSPECV_STACK_PROBE)
+(define_insn "allocate_stack_worker_64"
+  [(set (match_operand:DI 0 "register_operand" "=a")
+       (unspec_volatile:DI [(match_dup 0)] UNSPECV_STACK_PROBE))
    (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0)))
-   (clobber (match_scratch:DI 1 "=0"))
+   (clobber (reg:DI R10_REG))
+   (clobber (reg:DI R11_REG))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && TARGET_STACK_PROBE"
-  "call\t__alloca"
+  "call\t___chkstk"
   [(set_attr "type" "multi")
    (set_attr "length" "5")])
 
-(define_expand "allocate_stack_worker_rex64_postreload"
-  [(parallel [(unspec_volatile:DI [(match_operand:DI 0 "register_operand" "a")]
-                                   UNSPECV_STACK_PROBE)
-             (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0)))
-             (clobber (match_dup 0))
-             (clobber (reg:CC FLAGS_REG))])]
-  ""
-  "")
-
 (define_expand "allocate_stack"
-  [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
-                  (minus:SI (reg:SI SP_REG)
-                            (match_operand:SI 1 "general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])
-   (parallel [(set (reg:SI SP_REG)
-                  (minus:SI (reg:SI SP_REG) (match_dup 1)))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "general_operand" "")]
   "TARGET_STACK_PROBE"
 {
-#ifdef CHECK_STACK_LIMIT
-  if (CONST_INT_P (operands[1])
+  rtx x;
+
+#ifndef CHECK_STACK_LIMIT
+#define CHECK_STACK_LIMIT 0
+#endif
+
+  if (CHECK_STACK_LIMIT && CONST_INT_P (operands[1])
       && INTVAL (operands[1]) < CHECK_STACK_LIMIT)
-    emit_insn (gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
-                          operands[1]));
+    {
+      x = expand_simple_binop (Pmode, MINUS, stack_pointer_rtx, operands[1],
+                              stack_pointer_rtx, 0, OPTAB_DIRECT);
+      if (x != stack_pointer_rtx)
+       emit_move_insn (stack_pointer_rtx, x);
+    }
   else
-#endif
-    emit_insn (gen_allocate_stack_worker (copy_to_mode_reg (SImode,
-                                                           operands[1])));
+    {
+      x = copy_to_mode_reg (Pmode, operands[1]);
+      if (TARGET_64BIT)
+       x = gen_allocate_stack_worker_64 (x);
+      else
+       x = gen_allocate_stack_worker_32 (x);
+      emit_insn (x);
+    }
 
   emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
   DONE;
    (set (match_operand 1 "register_operand" "")
        (and (match_dup 3) (match_dup 4)))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
-   /* Ensure that the operand will remain sign-extended immediate.  */
-   && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)
    && ! optimize_size
    && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
-       || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))"
+       || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[4]) >= 0 ? CCNOmode : CCZmode)"
   [(parallel [(set (match_dup 0)
                   (match_op_dup 2 [(and:SI (match_dup 3) (match_dup 4))
                                    (const_int 0)]))
                (match_operand:HI 3 "const_int_operand" ""))
           (const_int 0)]))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
-   /* Ensure that the operand will remain sign-extended immediate.  */
-   && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)
    && ! TARGET_FAST_PREFIX
-   && ! optimize_size"
+   && ! optimize_size
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
   [(set (match_dup 0)
        (match_op_dup 1 [(and:SI (match_dup 2) (match_dup 3))
                         (const_int 0)]))]
    (set (match_operand:SI 0 "memory_operand" "")
         (match_operand:SI 1 "immediate_operand" ""))]
   "! optimize_size
-   && get_attr_length (insn) >= ix86_cost->large_insn
-   && TARGET_SPLIT_LONG_MOVES"
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cost->large_insn"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
   [(match_scratch:HI 2 "r")
    (set (match_operand:HI 0 "memory_operand" "")
         (match_operand:HI 1 "immediate_operand" ""))]
-  "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
-  && TARGET_SPLIT_LONG_MOVES"
+  "! optimize_size
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cost->large_insn"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
   [(match_scratch:QI 2 "q")
    (set (match_operand:QI 0 "memory_operand" "")
         (match_operand:QI 1 "immediate_operand" ""))]
-  "! optimize_size && get_attr_length (insn) >= ix86_cost->large_insn
-  && TARGET_SPLIT_LONG_MOVES"
+  "! optimize_size
+   && TARGET_SPLIT_LONG_MOVES
+   && get_attr_length (insn) >= ix86_cost->large_insn"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
          [(match_operand:SI 2 "memory_operand" "")
           (const_int 0)]))
    (match_scratch:SI 3 "r")]
-  "ix86_match_ccmode (insn, CCNOmode) && ! optimize_size"
+  " ! optimize_size && ix86_match_ccmode (insn, CCNOmode)"
   [(set (match_dup 3) (match_dup 2))
    (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]
   "")
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
   "!optimize_size
-   && peep2_regno_dead_p (0, FLAGS_REG)
-   && ((TARGET_PENTIUM
+   && ((TARGET_NOT_UNPAIRABLE
         && (!MEM_P (operands[0])
             || !memory_displacement_operand (operands[0], SImode)))
-       || (TARGET_K6 && long_memory_operand (operands[0], SImode)))"
+       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], SImode)))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0)
                   (xor:SI (match_dup 1) (const_int -1)))
              (clobber (reg:CC FLAGS_REG))])]
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
   "!optimize_size
-   && peep2_regno_dead_p (0, FLAGS_REG)
-   && ((TARGET_PENTIUM
+   && ((TARGET_NOT_UNPAIRABLE
         && (!MEM_P (operands[0])
             || !memory_displacement_operand (operands[0], HImode)))
-       || (TARGET_K6 && long_memory_operand (operands[0], HImode)))"
+       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], HImode)))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0)
                   (xor:HI (match_dup 1) (const_int -1)))
              (clobber (reg:CC FLAGS_REG))])]
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
   "!optimize_size
-   && peep2_regno_dead_p (0, FLAGS_REG)
-   && ((TARGET_PENTIUM
+   && ((TARGET_NOT_UNPAIRABLE
         && (!MEM_P (operands[0])
             || !memory_displacement_operand (operands[0], QImode)))
-       || (TARGET_K6 && long_memory_operand (operands[0], QImode)))"
+       || (TARGET_NOT_VECTORMODE && long_memory_operand (operands[0], QImode)))
+   && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0)
                   (xor:QI (match_dup 1) (const_int -1)))
              (clobber (reg:CC FLAGS_REG))])]
   "(GET_MODE (operands[0]) == HImode
     || GET_MODE (operands[0]) == SImode
     || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
-   && (optimize_size || TARGET_PENTIUM)
+   && (optimize_size || TARGET_MOVE_M1_VIA_OR)
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (const_int -1))
              (clobber (reg:CC FLAGS_REG))])]
                   (mult:DI (match_operand:DI 1 "memory_operand" "")
                            (match_operand:DI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
-  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
+  "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size
    && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
                   (mult:SI (match_operand:SI 1 "memory_operand" "")
                            (match_operand:SI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
-  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
+  "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size
    && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
                     (mult:SI (match_operand:SI 1 "memory_operand" "")
                              (match_operand:SI 2 "immediate_operand" ""))))
              (clobber (reg:CC FLAGS_REG))])]
-  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
+  "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size
    && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
                            (match_operand:DI 2 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:DI 3 "r")]
-  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
+  "TARGET_SLOW_IMUL_IMM8 && !optimize_size
    && satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
                            (match_operand:SI 2 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:SI 3 "r")]
-  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
+  "TARGET_SLOW_IMUL_IMM8 && !optimize_size
    && satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
                            (match_operand:HI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:HI 3 "r")]
-  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size"
+  "TARGET_SLOW_IMUL_IMM8 && !optimize_size"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
              (clobber (reg:CC FLAGS_REG))])]
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
              (match_operand:DI 2 "" "")))]
-  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT
+   && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC"
 {
   if (constant_call_address_operand (operands[1], Pmode))
     return "call\t%P1";
 }
   [(set_attr "type" "callv")])
 
+(define_insn "*call_value_1_rex64_large"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
+             (match_operand:DI 2 "" "")))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+  "call\t%A1"
+  [(set_attr "type" "callv")])
+
 (define_insn "*sibcall_value_1_rex64"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
   }
   [(set_attr "type" "multi")])
 
+(define_mode_iterator CRC32MODE [QI HI SI])
+(define_mode_attr crc32modesuffix [(QI "b") (HI "w") (SI "l")])
+(define_mode_attr crc32modeconstraint [(QI "qm") (HI "rm") (SI "rm")])
+
+(define_insn "sse4_2_crc32<mode>"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI
+         [(match_operand:SI 1 "register_operand" "0")
+          (match_operand:CRC32MODE 2 "nonimmediate_operand" "<crc32modeconstraint>")]
+         UNSPEC_CRC32))]
+  "TARGET_SSE4_2"
+  "crc32<crc32modesuffix>\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "SI")])
+
+(define_insn "sse4_2_crc32di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (unspec:DI
+         [(match_operand:DI 1 "register_operand" "0")
+          (match_operand:DI 2 "nonimmediate_operand" "rm")]
+         UNSPEC_CRC32))]
+  "TARGET_SSE4_2 && TARGET_64BIT"
+  "crc32q\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_rep" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "mode" "DI")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")