OSDN Git Service

gcc/
[pf3gnuchains/gcc-fork.git] / gcc / config / i386 / i386.md
index ec2787f..f0bec12 100644 (file)
@@ -1,6 +1,6 @@
 ;; GCC machine description for IA-32 and x86-64.
 ;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-;; 2001, 2002, 2003, 2004, 2005, 2006, 2007
+;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
 ;; Free Software Foundation, Inc.
 ;; Mostly by William Schelter.
 ;; x86_64 support added by Jan Hubicka
@@ -58,6 +58,7 @@
    (UNSPEC_GOTNTPOFF           7)
    (UNSPEC_INDNTPOFF           8)
    (UNSPEC_PLTOFF              9)
+   (UNSPEC_MACHOPIC_OFFSET     10)
 
    ; Prologue support
    (UNSPEC_STACK_ALLOC         11)
@@ -95,7 +96,6 @@
    (UNSPEC_RCP                 45)
    (UNSPEC_RSQRT               46)
    (UNSPEC_SFENCE              47)
-   (UNSPEC_NOP                 48)     ; prevents combiner cleverness
    (UNSPEC_PFRCP               49)
    (UNSPEC_PFRCPIT1            40)
    (UNSPEC_PFRCPIT2            41)
    (UNSPEC_LFENCE              45)
    (UNSPEC_PSADBW              46)
    (UNSPEC_LDDQU               47)
+   (UNSPEC_MS_TO_SYSV_CALL     48)
 
    ; Generic math support
    (UNSPEC_COPYSIGN            50)
    (UNSPEC_SSE5_UNSIGNED_CMP   151)
    (UNSPEC_SSE5_TRUEFALSE      152)
    (UNSPEC_SSE5_PERMUTE                153)
-   (UNSPEC_SSE5_ASHIFT         154)
-   (UNSPEC_SSE5_LSHIFT         155)
-   (UNSPEC_FRCZ                        156)
-   (UNSPEC_CVTPH2PS            157)
-   (UNSPEC_CVTPS2PH            158)
+   (UNSPEC_FRCZ                        154)
+   (UNSPEC_CVTPH2PS            155)
+   (UNSPEC_CVTPS2PH            156)
+
+   ; For AES support
+   (UNSPEC_AESENC              159)
+   (UNSPEC_AESENCLAST          160)
+   (UNSPEC_AESDEC              161)
+   (UNSPEC_AESDECLAST          162)
+   (UNSPEC_AESIMC              163)
+   (UNSPEC_AESKEYGENASSIST     164)
+
+   ; For PCLMUL support
+   (UNSPEC_PCLMUL              165)
+
+   ; For AVX support
+   (UNSPEC_PCMP                        166)
+   (UNSPEC_VPERMIL             167)
+   (UNSPEC_VPERMIL2F128                168)
+   (UNSPEC_MASKLOAD            169)
+   (UNSPEC_MASKSTORE           170)
+   (UNSPEC_CAST                        171)
+   (UNSPEC_VTESTP              172)
   ])
 
 (define_constants
    (UNSPECV_ALIGN              7)
    (UNSPECV_MONITOR            8)
    (UNSPECV_MWAIT              9)
-   (UNSPECV_CMPXCHG_1          10)
-   (UNSPECV_CMPXCHG_2          11)
+   (UNSPECV_CMPXCHG            10)
    (UNSPECV_XCHG               12)
    (UNSPECV_LOCK               13)
    (UNSPECV_PROLOGUE_USE       14)
+   (UNSPECV_CLD                        15)
+   (UNSPECV_VZEROALL           16)
+   (UNSPECV_VZEROUPPER         17)
   ])
 
 ;; Constants to represent pcomtrue/pcomfalse variants
    (COM_TRUE_P                 5)
   ])
 
+;; Constants used in the SSE5 pperm instruction
+(define_constants
+  [(PPERM_SRC                  0x00)   /* copy source */
+   (PPERM_INVERT               0x20)   /* invert source */
+   (PPERM_REVERSE              0x40)   /* bit reverse source */
+   (PPERM_REV_INV              0x60)   /* bit reverse & invert src */
+   (PPERM_ZERO                 0x80)   /* all 0's */
+   (PPERM_ONES                 0xa0)   /* all 1's */
+   (PPERM_SIGN                 0xc0)   /* propagate sign bit */
+   (PPERM_INV_SIGN             0xe0)   /* invert & propagate sign */
+   (PPERM_SRC1                 0x00)   /* use first source byte */
+   (PPERM_SRC2                 0x10)   /* use second source byte */
+   ])
+
 ;; Registers by name.
 (define_constants
   [(AX_REG                      0)
    (DX_REG                      1)
    (CX_REG                      2)
+   (BX_REG                      3)
    (SI_REG                      4)
    (DI_REG                      5)
    (BP_REG                      6)
    (FLAGS_REG                  17)
    (FPSR_REG                   18)
    (FPCR_REG                   19)
+   (XMM0_REG                   21)
+   (XMM1_REG                   22)
+   (XMM2_REG                   23)
+   (XMM3_REG                   24)
+   (XMM4_REG                   25)
+   (XMM5_REG                   26)
+   (XMM6_REG                   27)
+   (XMM7_REG                   28)
    (R10_REG                    39)
    (R11_REG                    40)
+   (R13_REG                    42)
+   (XMM8_REG                   45)
+   (XMM9_REG                   46)
+   (XMM10_REG                  47)
+   (XMM11_REG                  48)
+   (XMM12_REG                  49)
+   (XMM13_REG                  50)
+   (XMM14_REG                  51)
+   (XMM15_REG                  52)
   ])
 
 ;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
 ;; "reload_completed && TARGET_64BIT".
 
 \f
-;; Processor type.  This attribute must exactly match the processor_type
-;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,
-                    nocona,core2,generic32,generic64,amdfam10"
-  (const (symbol_ref "ix86_tune")))
+;; Processor type.
+(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,
+                   generic64,amdfam10"
+  (const (symbol_ref "ix86_schedule")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
 ;; provided in other attributes.
 
 ;; Main data type used by the insn
 (define_attr "mode"
-  "unknown,none,QI,HI,SI,DI,SF,DF,XF,TI,V4SF,V2DF,V2SF,V1DF"
+  "unknown,none,QI,HI,SI,DI,TI,OI,SF,DF,XF,TF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF"
   (const_string "unknown"))
 
 ;; The CPU unit operations uses.
 ;; There are also additional prefixes in SSSE3.
 (define_attr "prefix_extra" "" (const_int 0))
 
+;; Prefix used: original, VEX or maybe VEX.
+(define_attr "prefix" "orig,vex,maybe_vex"
+  (if_then_else (eq_attr "mode" "OI,V8SF,V4DF")
+    (const_string "vex")
+    (const_string "orig")))
+
+;; There is a 8bit immediate for VEX.
+(define_attr "prefix_vex_imm8" "" (const_int 0))
+
+;; VEX W bit is used.
+(define_attr "prefix_vex_w" "" (const_int 0))
+
+;; The length of VEX prefix
+(define_attr "length_vex" ""
+  (if_then_else (eq_attr "prefix_0f" "1")
+    (if_then_else (eq_attr "prefix_vex_w" "1")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 1, 1)")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 1, 0)"))
+    (if_then_else (eq_attr "prefix_vex_w" "1")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 0, 1)")
+      (symbol_ref "ix86_attr_length_vex_default (insn, 0, 0)"))))
+
 ;; Set when modrm byte is used.
 (define_attr "modrm" ""
   (cond [(eq_attr "type" "str,leave")
         (eq_attr "unit" "i387")
           (plus (const_int 2)
                 (plus (attr "prefix_data16")
-                      (attr "length_address")))]
+                      (attr "length_address")))
+        (ior (eq_attr "prefix" "vex")
+             (and (eq_attr "prefix" "maybe_vex")
+                   (ne (symbol_ref "TARGET_AVX") (const_int 0))))
+          (plus (attr "length_vex")
+                (plus (attr "prefix_vex_imm8")
+                      (plus (attr "modrm")
+                            (attr "length_address"))))]
         (plus (plus (attr "modrm")
                     (plus (attr "prefix_0f")
                           (plus (attr "prefix_rex")
   [(set_attr "length" "128")
    (set_attr "type" "multi")])
 
+;; All integer comparison codes.
+(define_code_iterator int_cond [ne eq ge gt le lt geu gtu leu ltu ])
+
+;; All floating-point comparison codes.
+(define_code_iterator fp_cond [unordered ordered
+                              uneq unge ungt unle unlt ltgt ])
+
 (define_code_iterator plusminus [plus minus])
 
-;; Base name for define_insn and insn mnemonic.
-(define_code_attr addsub [(plus "add") (minus "sub")])
+(define_code_iterator sat_plusminus [ss_plus us_plus ss_minus us_minus])
+
+;; Base name for define_insn
+(define_code_attr plusminus_insn
+  [(plus "add") (ss_plus "ssadd") (us_plus "usadd")
+   (minus "sub") (ss_minus "sssub") (us_minus "ussub")])
+
+;; Base name for insn mnemonic.
+(define_code_attr plusminus_mnemonic
+  [(plus "add") (ss_plus "adds") (us_plus "addus")
+   (minus "sub") (ss_minus "subs") (us_minus "subus")])
 
 ;; Mark commutative operators as such in constraints.
-(define_code_attr comm [(plus "%") (minus "")])
+(define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
+                       (minus "") (ss_minus "") (us_minus "")])
+
+;; Mapping of signed max and min
+(define_code_iterator smaxmin [smax smin])
+
+;; Mapping of unsigned max and min
+(define_code_iterator umaxmin [umax umin])
+
+;; Mapping of signed/unsigned max and min
+(define_code_iterator maxmin [smax smin umax umin])
+
+;; Base name for integer and FP insn mnemonic
+(define_code_attr maxminiprefix [(smax "maxs") (smin "mins")
+                                (umax "maxu") (umin "minu")])
+(define_code_attr maxminfprefix [(smax "max") (smin "min")])
+
+;; Mapping of parallel logic operators
+(define_code_iterator plogic [and ior xor])
+
+;; Base name for insn mnemonic.
+(define_code_attr plogicprefix [(and "and") (ior "or") (xor "xor")])
+
+;; Mapping of abs neg operators
+(define_code_iterator absneg [abs neg])
+
+;; Base name for x87 insn mnemonic.
+(define_code_attr absnegprefix [(abs "abs") (neg "chs")])
 
 ;; All single word integer modes.
 (define_mode_iterator SWI [QI HI SI (DI "TARGET_64BIT")])
 
+;; Single word integer modes without QImode.
+(define_mode_iterator SWI248 [HI SI (DI "TARGET_64BIT")])
+
 ;; Instruction suffix for integer modes.
 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
 
 (define_mode_attr r [(QI "q") (HI "r") (SI "r") (DI "r")])
 
 ;; Immediate operand constraint for integer modes.
-(define_mode_attr i [(QI "i") (HI "i") (SI "i") (DI "e")])
+(define_mode_attr i [(QI "n") (HI "n") (SI "i") (DI "e")])
 
 ;; General operand predicate for integer modes.
 (define_mode_attr general_operand
 ;; SSE vector mode corresponding to a scalar mode
 (define_mode_attr ssevecmode
   [(QI "V16QI") (HI "V8HI") (SI "V4SI") (DI "V2DI") (SF "V4SF") (DF "V2DF")])
+
+;; Instruction suffix for REX 64bit operators.
+(define_mode_attr rex64suffix [(SI "") (DI "{q}")])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
 \f
 ;; Scheduling descriptions
 
 (define_insn "cmpdi_ccno_1_rex64"
   [(set (reg FLAGS_REG)
        (compare (match_operand:DI 0 "nonimmediate_operand" "r,?mr")
-                (match_operand:DI 1 "const0_operand" "n,n")))]
+                (match_operand:DI 1 "const0_operand" "")))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)"
   "@
    test{q}\t%0, %0
 (define_insn "*cmpsi_ccno_1"
   [(set (reg FLAGS_REG)
        (compare (match_operand:SI 0 "nonimmediate_operand" "r,?mr")
-                (match_operand:SI 1 "const0_operand" "n,n")))]
+                (match_operand:SI 1 "const0_operand" "")))]
   "ix86_match_ccmode (insn, CCNOmode)"
   "@
    test{l}\t%0, %0
 (define_insn "*cmphi_ccno_1"
   [(set (reg FLAGS_REG)
        (compare (match_operand:HI 0 "nonimmediate_operand" "r,?mr")
-                (match_operand:HI 1 "const0_operand" "n,n")))]
+                (match_operand:HI 1 "const0_operand" "")))]
   "ix86_match_ccmode (insn, CCNOmode)"
   "@
    test{w}\t%0, %0
 (define_insn "*cmphi_minus_1"
   [(set (reg FLAGS_REG)
        (compare (minus:HI (match_operand:HI 0 "nonimmediate_operand" "rm,r")
-                          (match_operand:HI 1 "general_operand" "ri,mr"))
+                          (match_operand:HI 1 "general_operand" "rn,mr"))
                 (const_int 0)))]
   "ix86_match_ccmode (insn, CCGOCmode)"
   "cmp{w}\t{%1, %0|%0, %1}"
 (define_insn "*cmphi_1"
   [(set (reg FLAGS_REG)
        (compare (match_operand:HI 0 "nonimmediate_operand" "rm,r")
-                (match_operand:HI 1 "general_operand" "ri,mr")))]
+                (match_operand:HI 1 "general_operand" "rn,mr")))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_match_ccmode (insn, CCmode)"
   "cmp{w}\t{%1, %0|%0, %1}"
 (define_insn "*cmpqi_ccno_1"
   [(set (reg FLAGS_REG)
        (compare (match_operand:QI 0 "nonimmediate_operand" "q,?mq")
-                (match_operand:QI 1 "const0_operand" "n,n")))]
+                (match_operand:QI 1 "const0_operand" "")))]
   "ix86_match_ccmode (insn, CCNOmode)"
   "@
    test{b}\t%0, %0
 (define_insn "*cmpqi_1"
   [(set (reg FLAGS_REG)
        (compare (match_operand:QI 0 "nonimmediate_operand" "qm,q")
-                (match_operand:QI 1 "general_operand" "qi,mq")))]
+                (match_operand:QI 1 "general_operand" "qn,mq")))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
     && ix86_match_ccmode (insn, CCmode)"
   "cmp{b}\t{%1, %0|%0, %1}"
 (define_insn "*cmpqi_minus_1"
   [(set (reg FLAGS_REG)
        (compare (minus:QI (match_operand:QI 0 "nonimmediate_operand" "qm,q")
-                          (match_operand:QI 1 "general_operand" "qi,mq"))
+                          (match_operand:QI 1 "general_operand" "qn,mq"))
                 (const_int 0)))]
   "ix86_match_ccmode (insn, CCGOCmode)"
   "cmp{b}\t{%1, %0|%0, %1}"
              (match_operand 0 "ext_register_operand" "Q")
              (const_int 8)
              (const_int 8)) 0)
-         (match_operand:QI 1 "const0_operand" "n")))]
+         (match_operand:QI 1 "const0_operand" "")))]
   "ix86_match_ccmode (insn, CCNOmode)"
   "test{b}\t%h0, %h0"
   [(set_attr "type" "test")
        (unspec:HI
          [(compare:CCFP
             (match_operand 1 "register_operand" "f")
-            (match_operand 2 "const0_operand" "X"))]
+            (match_operand 2 "const0_operand" ""))]
        UNSPEC_FNSTSW))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && GET_MODE (operands[1]) == GET_MODE (operands[2])"
   [(set (reg:CCFP FLAGS_REG)
        (compare:CCFP
          (match_operand 1 "register_operand" "f")
-         (match_operand 2 "const0_operand" "X")))
+         (match_operand 2 "const0_operand" "")))
    (clobber (match_operand:HI 0 "register_operand" "=a"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && TARGET_SAHF && !TARGET_CMOVE
               [(match_operand:X87MODEI12 2 "memory_operand" "m")]))]
          UNSPEC_FNSTSW))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
-   && TARGET_USE_<MODE>MODE_FIOP
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
    && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
   "* return output_fp_compare (insn, operands, 0, 0);"
   [(set_attr "type" "multi")
    (clobber (match_operand:HI 0 "register_operand" "=a"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && TARGET_SAHF && !TARGET_CMOVE
-   && TARGET_USE_<MODE>MODE_FIOP
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
    && (GET_MODE (operands [3]) == GET_MODE (operands[1]))"
   "#"
   "&& reload_completed"
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 0);"
   [(set_attr "type" "fcmp,ssecomi")
+   (set_attr "prefix" "orig,maybe_vex")
    (set (attr "mode")
      (if_then_else (match_operand:SF 1 "" "")
         (const_string "SF")
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 0);"
   [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
      (if_then_else (match_operand:SF 1 "" "")
         (const_string "SF")
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 1);"
   [(set_attr "type" "fcmp,ssecomi")
+   (set_attr "prefix" "orig,maybe_vex")
    (set (attr "mode")
      (if_then_else (match_operand:SF 1 "" "")
         (const_string "SF")
    && GET_MODE (operands[0]) == GET_MODE (operands[1])"
   "* return output_fp_compare (insn, operands, 1, 1);"
   [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
      (if_then_else (match_operand:SF 1 "" "")
         (const_string "SF")
 
 (define_insn "*movsi_xor"
   [(set (match_operand:SI 0 "register_operand" "=r")
-       (match_operand:SI 1 "const0_operand" "i"))
+       (match_operand:SI 1 "const0_operand" ""))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed && (!TARGET_USE_MOV0 || optimize_size)"
+  "reload_completed"
   "xor{l}\t%0, %0"
   [(set_attr "type" "alu1")
    (set_attr "mode" "SI")
        (match_operand:SI 1 "immediate_operand" "i"))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed
-   && operands[1] == constm1_rtx
-   && (TARGET_MOVE_M1_VIA_OR || optimize_size)"
+   && operands[1] == constm1_rtx"
 {
   operands[1] = constm1_rtx;
   return "or{l}\t{%1, %0|%0, %1}";
 
 (define_insn "*movsi_1"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-                       "=r  ,m  ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
+                       "=r,m ,*y,*y,?rm,?*y,*x,*x,?r ,m ,?*Yi,*x")
        (match_operand:SI 1 "general_operand"
-                       "rinm,rin,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r   ,m "))]
+                       "g ,ri,C ,*y,*y ,rm ,C ,*x,*Yi,*x,r   ,m "))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_SSELOG1:
       if (get_attr_mode (insn) == MODE_TI)
-        return "pxor\t%0, %0";
-      return "xorps\t%0, %0";
+        return "%vpxor\t%0, %d0";
+      return "%vxorps\t%0, %d0";
 
     case TYPE_SSEMOV:
       switch (get_attr_mode (insn))
        {
        case MODE_TI:
-         return "movdqa\t{%1, %0|%0, %1}";
+         return "%vmovdqa\t{%1, %0|%0, %1}";
        case MODE_V4SF:
-         return "movaps\t{%1, %0|%0, %1}";
+         return "%vmovaps\t{%1, %0|%0, %1}";
        case MODE_SI:
-          return "movd\t{%1, %0|%0, %1}";
+          return "%vmovd\t{%1, %0|%0, %1}";
        case MODE_SF:
-          return "movss\t{%1, %0|%0, %1}";
+          return "%vmovss\t{%1, %0|%0, %1}";
        default:
          gcc_unreachable ();
        }
              (const_string "lea")
           ]
           (const_string "imov")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4,5")
+       (const_string "orig")
+       (const_string "maybe_vex")))
    (set (attr "mode")
      (cond [(eq_attr "alternative" "2,3")
              (const_string "DI")
 ;; For 64BIT abi we always round up to 8 bytes.
 (define_insn "*pushhi2_rex64"
   [(set (match_operand:HI 0 "push_operand" "=X")
-       (match_operand:HI 1 "nonmemory_no_elim_operand" "ri"))]
+       (match_operand:HI 1 "nonmemory_no_elim_operand" "rn"))]
   "TARGET_64BIT"
   "push{q}\t%q1"
   [(set_attr "type" "push")
     }
 }
   [(set (attr "type")
-     (cond [(ne (symbol_ref "optimize_size") (const_int 0))
+     (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
              (const_string "imov")
            (and (eq_attr "alternative" "0")
                 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
        (match_operand:HI 1 "register_operand" "+r"))
    (set (match_dup 1)
        (match_dup 0))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_size"
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "xchg{l}\t%k1, %k0"
   [(set_attr "type" "imov")
    (set_attr "mode" "SI")
 (define_expand "movstricthi"
   [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" ""))
        (match_operand:HI 1 "general_operand" ""))]
-  "! TARGET_PARTIAL_REG_STALL || optimize_size"
+  ""
 {
+  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+    FAIL;
   /* Don't generate memory->memory moves, go through a register */
   if (MEM_P (operands[0]) && MEM_P (operands[1]))
     operands[1] = force_reg (HImode, operands[1]);
 (define_insn "*movstricthi_1"
   [(set (strict_low_part (match_operand:HI 0 "nonimmediate_operand" "+rm,r"))
        (match_operand:HI 1 "general_operand" "rn,m"))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "mov{w}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imov")
 
 (define_insn "*movstricthi_xor"
   [(set (strict_low_part (match_operand:HI 0 "register_operand" "+r"))
-       (match_operand:HI 1 "const0_operand" "i"))
+       (match_operand:HI 1 "const0_operand" ""))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed
-   && ((!TARGET_USE_MOV0 && !TARGET_PARTIAL_REG_STALL) || optimize_size)"
+  "reload_completed"
   "xor{w}\t%0, %0"
   [(set_attr "type" "alu1")
    (set_attr "mode" "HI")
 ;; For 64BIT abi we always round up to 8 bytes.
 (define_insn "*pushqi2_rex64"
   [(set (match_operand:QI 0 "push_operand" "=X")
-       (match_operand:QI 1 "nonmemory_no_elim_operand" "qi"))]
+       (match_operand:QI 1 "nonmemory_no_elim_operand" "qn"))]
   "TARGET_64BIT"
   "push{q}\t%q1"
   [(set_attr "type" "push")
      (cond [(and (eq_attr "alternative" "5")
                 (not (match_operand:QI 1 "aligned_operand" "")))
              (const_string "imovx")
-           (ne (symbol_ref "optimize_size") (const_int 0))
+           (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0))
              (const_string "imov")
            (and (eq_attr "alternative" "3")
                 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
                  (and (eq_attr "alternative" "0,1")
                       (and (ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
                                (const_int 0))
-                           (and (eq (symbol_ref "optimize_size")
+                           (and (eq (symbol_ref "optimize_function_for_size_p (cfun)")
                                     (const_int 0))
                                 (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
                                     (const_int 0))))))
           ]
           (const_string "QI")))])
 
-(define_expand "reload_outqi"
-  [(parallel [(match_operand:QI 0 "" "=m")
-              (match_operand:QI 1 "register_operand" "r")
-              (match_operand:QI 2 "register_operand" "=&q")])]
-  ""
-{
-  rtx op0, op1, op2;
-  op0 = operands[0]; op1 = operands[1]; op2 = operands[2];
-
-  gcc_assert (!reg_overlap_mentioned_p (op2, op0));
-  if (! q_regs_operand (op1, QImode))
-    {
-      emit_insn (gen_movqi (op2, op1));
-      op1 = op2;
-    }
-  emit_insn (gen_movqi (op0, op1));
-  DONE;
-})
-
 (define_insn "*swapqi_1"
   [(set (match_operand:QI 0 "register_operand" "+r")
        (match_operand:QI 1 "register_operand" "+r"))
    (set (match_dup 1)
        (match_dup 0))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_size"
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "xchg{l}\t%k1, %k0"
   [(set_attr "type" "imov")
    (set_attr "mode" "SI")
 (define_expand "movstrictqi"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
        (match_operand:QI 1 "general_operand" ""))]
-  "! TARGET_PARTIAL_REG_STALL || optimize_size"
+  ""
 {
+  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+    FAIL;
   /* Don't generate memory->memory moves, go through a register.  */
   if (MEM_P (operands[0]) && MEM_P (operands[1]))
     operands[1] = force_reg (QImode, operands[1]);
 (define_insn "*movstrictqi_1"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
        (match_operand:QI 1 "general_operand" "*qn,m"))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "mov{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imov")
 
 (define_insn "*movstrictqi_xor"
   [(set (strict_low_part (match_operand:QI 0 "q_regs_operand" "+q"))
-       (match_operand:QI 1 "const0_operand" "i"))
+       (match_operand:QI 1 "const0_operand" ""))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed && (!TARGET_USE_MOV0 || optimize_size)"
+  "reload_completed"
   "xor{b}\t%0, %0"
   [(set_attr "type" "alu1")
    (set_attr "mode" "QI")
    && !x86_64_immediate_operand (operands[1], DImode) && 1"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
-  "split_di (operands + 1, 1, operands + 2, operands + 3);
+  "split_di (&operands[1], 1, &operands[2], &operands[3]);
    operands[1] = gen_lowpart (DImode, operands[2]);
    operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
                                                    GEN_INT (4)));
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
-  "split_di (operands + 1, 1, operands + 2, operands + 3);
+  "split_di (&operands[1], 1, &operands[2], &operands[3]);
    operands[1] = gen_lowpart (DImode, operands[2]);
    operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
                                                    GEN_INT (4)));
 
 (define_insn "*movdi_xor_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r")
-       (match_operand:DI 1 "const0_operand" "i"))
+       (match_operand:DI 1 "const0_operand" ""))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (!TARGET_USE_MOV0 || optimize_size)
+  "TARGET_64BIT
    && reload_completed"
   "xor{l}\t%k0, %k0";
   [(set_attr "type" "alu1")
   [(set (match_operand:DI 0 "register_operand" "=r")
        (match_operand:DI 1 "const_int_operand" "i"))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (TARGET_MOVE_M1_VIA_OR || optimize_size)
+  "TARGET_64BIT
    && reload_completed
    && operands[1] == constm1_rtx"
 {
 
 (define_insn "*movdi_2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-                       "=r  ,o  ,*y,m*y,*y,*Yt,m  ,*Yt,*Yt,*x,m ,*x,*x")
+                       "=r  ,o  ,*y,m*y,*y,*Y2,m  ,*Y2,*Y2,*x,m ,*x,*x")
        (match_operand:DI 1 "general_operand"
-                       "riFo,riF,C ,*y ,m ,C  ,*Yt,*Yt,m  ,C ,*x,*x,m "))]
+                       "riFo,riF,C ,*y ,m ,C  ,*Y2,*Y2,m  ,C ,*x,*x,m "))]
   "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    #
    pxor\t%0, %0
    movq\t{%1, %0|%0, %1}
    movq\t{%1, %0|%0, %1}
-   pxor\t%0, %0
-   movq\t{%1, %0|%0, %1}
-   movdqa\t{%1, %0|%0, %1}
-   movq\t{%1, %0|%0, %1}
+   %vpxor\t%0, %d0
+   %vmovq\t{%1, %0|%0, %1}
+   %vmovdqa\t{%1, %0|%0, %1}
+   %vmovq\t{%1, %0|%0, %1}
    xorps\t%0, %0
    movlps\t{%1, %0|%0, %1}
    movaps\t{%1, %0|%0, %1}
    movlps\t{%1, %0|%0, %1}"
   [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
 
 (define_split
        return "movdq2q\t{%1, %0|%0, %1}";
 
     case TYPE_SSEMOV:
+      if (TARGET_AVX)
+       {
+         if (get_attr_mode (insn) == MODE_TI)
+           return "vmovdqa\t{%1, %0|%0, %1}";
+         else
+           return "vmovq\t{%1, %0|%0, %1}";
+       }
+
       if (get_attr_mode (insn) == MODE_TI)
        return "movdqa\t{%1, %0|%0, %1}";
       /* FALLTHRU */
       return "movq\t{%1, %0|%0, %1}";
 
     case TYPE_SSELOG1:
+      return "%vpxor\t%0, %d0";
+
     case TYPE_MMXADD:
       return "pxor\t%0, %0";
 
           (const_string "imov")))
    (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "11,12,13,14,15,16")
+       (const_string "maybe_vex")
+       (const_string "orig")))
    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
 
 ;; Stores and loads of ax to arbitrary constant address.
    && !x86_64_immediate_operand (operands[1], DImode) && 1"
   [(set (match_dup 2) (match_dup 3))
    (set (match_dup 4) (match_dup 5))]
-  "split_di (operands, 2, operands + 2, operands + 4);")
+  "split_di (&operands[0], 2, &operands[2], &operands[4]);")
 
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 2) (match_dup 3))
    (set (match_dup 4) (match_dup 5))]
-  "split_di (operands, 2, operands + 2, operands + 4);")
+  "split_di (&operands[0], 2, &operands[2], &operands[4]);")
 
 (define_insn "*swapdi_rex64"
   [(set (match_operand:DI 0 "register_operand" "+r")
    (set_attr "athlon_decode" "vector")
    (set_attr "amdfam10_decode" "double")])
 
+(define_expand "movoi"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "")
+       (match_operand:OI 1 "general_operand" ""))]
+  "TARGET_AVX"
+  "ix86_expand_move (OImode, operands); DONE;")
+
+(define_insn "*movoi_internal"
+  [(set (match_operand:OI 0 "nonimmediate_operand" "=x,x,m")
+       (match_operand:OI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vxorps\t%0, %0, %0";
+    case 1:
+    case 2:
+      if (misaligned_operand (operands[0], OImode)
+         || misaligned_operand (operands[1], OImode))
+       return "vmovdqu\t{%1, %0|%0, %1}";
+      else
+       return "vmovdqa\t{%1, %0|%0, %1}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "OI")])
+
 (define_expand "movti"
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
        (match_operand:TI 1 "nonimmediate_operand" ""))]
     {
     case 0:
       if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
+       return "%vxorps\t%0, %d0";
       else
-       return "pxor\t%0, %0";
+       return "%vpxor\t%0, %d0";
     case 1:
     case 2:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
+      /* TDmode values are passed as TImode on the stack.  Moving them
+        to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+         || misaligned_operand (operands[1], TImode))
+       {
+         if (get_attr_mode (insn) == MODE_V4SF)
+           return "%vmovups\t{%1, %0|%0, %1}";
+        else
+          return "%vmovdqu\t{%1, %0|%0, %1}";
+       }
       else
-       return "movdqa\t{%1, %0|%0, %1}";
+       {
+         if (get_attr_mode (insn) == MODE_V4SF)
+           return "%vmovaps\t{%1, %0|%0, %1}";
+        else
+          return "%vmovdqa\t{%1, %0|%0, %1}";
+       }
     default:
       gcc_unreachable ();
     }
 }
   [(set_attr "type" "sselog1,ssemov,ssemov")
+   (set_attr "prefix" "maybe_vex")
    (set (attr "mode")
        (cond [(ior (eq (symbol_ref "TARGET_SSE2") (const_int 0))
-                   (ne (symbol_ref "optimize_size") (const_int 0)))
+                   (ne (symbol_ref "optimize_function_for_size_p (cfun)") (const_int 0)))
                 (const_string "V4SF")
               (and (eq_attr "alternative" "2")
                    (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
              (const_string "TI")))])
 
 (define_insn "*movti_rex64"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,x,xm")
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r,o,x,x,xm")
        (match_operand:TI 1 "general_operand" "riFo,riF,C,xm,x"))]
   "TARGET_64BIT
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
       return "#";
     case 2:
       if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
+       return "%vxorps\t%0, %d0";
       else
-       return "pxor\t%0, %0";
+       return "%vpxor\t%0, %d0";
     case 3:
     case 4:
-      if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
+      /* TDmode values are passed as TImode on the stack.  Moving them
+        to stack may result in unaligned memory access.  */
+      if (misaligned_operand (operands[0], TImode)
+         || misaligned_operand (operands[1], TImode))
+       {
+         if (get_attr_mode (insn) == MODE_V4SF)
+           return "%vmovups\t{%1, %0|%0, %1}";
+        else
+          return "%vmovdqu\t{%1, %0|%0, %1}";
+       }
       else
-       return "movdqa\t{%1, %0|%0, %1}";
+       {
+         if (get_attr_mode (insn) == MODE_V4SF)
+           return "%vmovaps\t{%1, %0|%0, %1}";
+        else
+          return "%vmovdqa\t{%1, %0|%0, %1}";
+       }
     default:
       gcc_unreachable ();
     }
 }
   [(set_attr "type" "*,*,sselog1,ssemov,ssemov")
+   (set_attr "prefix" "*,*,maybe_vex,maybe_vex,maybe_vex")
    (set (attr "mode")
         (cond [(eq_attr "alternative" "2,3")
                 (if_then_else
-                  (ne (symbol_ref "optimize_size")
+                  (ne (symbol_ref "optimize_function_for_size_p (cfun)")
                       (const_int 0))
                   (const_string "V4SF")
                   (const_string "TI"))
                 (if_then_else
                   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
                            (const_int 0))
-                       (ne (symbol_ref "optimize_size")
+                       (ne (symbol_ref "optimize_function_for_size_p (cfun)")
                            (const_int 0)))
                   (const_string "V4SF")
                   (const_string "TI"))]
    (set_attr "mode" "SF,SI,SF")])
 
 (define_insn "*pushsf_rex64"
-  [(set (match_operand:SF 0 "push_operand" "=<,<,<")
+  [(set (match_operand:SF 0 "push_operand" "=X,X,X")
        (match_operand:SF 1 "nonmemory_no_elim_operand" "f,rF,x"))]
   "TARGET_64BIT"
 {
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!TARGET_SSE_MATH && optimize_size
+       || (!TARGET_SSE_MATH && optimize_function_for_size_p (cfun)
           && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || memory_operand (operands[0], SFmode))"
       return "mov{l}\t{%1, %0|%0, %1}";
     case 5:
       if (get_attr_mode (insn) == MODE_TI)
-       return "pxor\t%0, %0";
+       return "%vpxor\t%0, %d0";
       else
-       return "xorps\t%0, %0";
+       return "%vxorps\t%0, %d0";
     case 6:
       if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
+       return "%vmovaps\t{%1, %0|%0, %1}";
+      else
+       return "%vmovss\t{%1, %d0|%d0, %1}";
+    case 7:
+      if (TARGET_AVX)
+       return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
+                                  : "vmovss\t{%1, %0|%0, %1}";
       else
        return "movss\t{%1, %0|%0, %1}";
-    case 7: case 8:
-      return "movss\t{%1, %0|%0, %1}";
+    case 8:
+      return "%vmovss\t{%1, %0|%0, %1}";
 
-    case 9: case 10:
-    case 12: case 13: case 14: case 15:
+    case 9: case 10: case 14: case 15:
       return "movd\t{%1, %0|%0, %1}";
+    case 12: case 13:
+      return "%vmovd\t{%1, %0|%0, %1}";
 
     case 11:
       return "movq\t{%1, %0|%0, %1}";
     }
 }
   [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8,12,13")
+       (const_string "maybe_vex")
+       (const_string "orig")))
    (set (attr "mode")
         (cond [(eq_attr "alternative" "3,4,9,10")
                 (const_string "SI")
                                 (const_int 0))
                             (ne (symbol_ref "TARGET_SSE2")
                                 (const_int 0)))
-                       (eq (symbol_ref "optimize_size")
+                       (eq (symbol_ref "optimize_function_for_size_p (cfun)")
                            (const_int 0)))
                   (const_string "TI")
                   (const_string "V4SF"))
 
 (define_insn "*pushdf_nointeger"
   [(set (match_operand:DF 0 "push_operand" "=<,<,<,<")
-       (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Yt"))]
+       (match_operand:DF 1 "general_no_elim_operand" "f,Fo,*r,Y2"))]
   "!TARGET_64BIT && !TARGET_INTEGER_DFMODE_MOVES"
 {
   /* This insn should be already split before reg-stack.  */
 
 (define_insn "*pushdf_integer"
   [(set (match_operand:DF 0 "push_operand" "=<,<,<")
-       (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Yt"))]
+       (match_operand:DF 1 "general_no_elim_operand" "f,rFo,Y2"))]
   "TARGET_64BIT || TARGET_INTEGER_DFMODE_MOVES"
 {
   /* This insn should be already split before reg-stack.  */
 (define_split
   [(set (match_operand:DF 0 "push_operand" "")
        (match_operand:DF 1 "any_fp_register_operand" ""))]
-  "!TARGET_64BIT && reload_completed"
-  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
-   (set (mem:DF (reg:SI SP_REG)) (match_dup 1))]
-  "")
-
-(define_split
-  [(set (match_operand:DF 0 "push_operand" "")
-       (match_operand:DF 1 "any_fp_register_operand" ""))]
-  "TARGET_64BIT && reload_completed"
-  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
-   (set (mem:DF (reg:DI SP_REG)) (match_dup 1))]
+  "reload_completed"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (mem:DF (reg:P SP_REG)) (match_dup 1))]
   "")
 
 (define_split
 
 (define_insn "*movdf_nointeger"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-                       "=f,m,f,*r  ,o  ,Yt*x,Yt*x,Yt*x ,m  ")
+                       "=f,m,f,*r  ,o  ,Y2*x,Y2*x,Y2*x ,m  ")
        (match_operand:DF 1 "general_operand"
-                       "fm,f,G,*roF,F*r,C   ,Yt*x,mYt*x,Yt*x"))]
+                       "fm,f,G,*roF,*Fr,C   ,Y2*x,mY2*x,Y2*x"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ((optimize_size || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
+   && ((optimize_function_for_size_p (cfun)
+       || !TARGET_INTEGER_DFMODE_MOVES) && !TARGET_64BIT)
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
+           && !memory_operand (operands[0], DFmode)
           && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
-       || memory_operand (operands[0], DFmode))"
+       || ((optimize_function_for_size_p (cfun)
+            || !TARGET_MEMORY_MISMATCH_STALL
+           || reload_in_progress || reload_completed)
+          && memory_operand (operands[0], DFmode)))"
 {
   switch (which_alternative)
     {
       switch (get_attr_mode (insn))
        {
        case MODE_V4SF:
-         return "xorps\t%0, %0";
+         return "%vxorps\t%0, %d0";
        case MODE_V2DF:
-         return "xorpd\t%0, %0";
+         return "%vxorpd\t%0, %d0";
        case MODE_TI:
-         return "pxor\t%0, %0";
+         return "%vpxor\t%0, %d0";
        default:
          gcc_unreachable ();
        }
       switch (get_attr_mode (insn))
        {
        case MODE_V4SF:
-         return "movaps\t{%1, %0|%0, %1}";
+         return "%vmovaps\t{%1, %0|%0, %1}";
        case MODE_V2DF:
-         return "movapd\t{%1, %0|%0, %1}";
+         return "%vmovapd\t{%1, %0|%0, %1}";
        case MODE_TI:
-         return "movdqa\t{%1, %0|%0, %1}";
+         return "%vmovdqa\t{%1, %0|%0, %1}";
        case MODE_DI:
-         return "movq\t{%1, %0|%0, %1}";
+         return "%vmovq\t{%1, %0|%0, %1}";
        case MODE_DF:
-         return "movsd\t{%1, %0|%0, %1}";
+         if (TARGET_AVX)
+           {
+             if (REG_P (operands[0]) && REG_P (operands[1]))
+               return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+             else
+               return "vmovsd\t{%1, %0|%0, %1}";
+           }
+         else
+           return "movsd\t{%1, %0|%0, %1}";
        case MODE_V1DF:
-         return "movlpd\t{%1, %0|%0, %1}";
+         if (TARGET_AVX)
+           {
+             if (REG_P (operands[0]))
+               return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+             else
+               return "vmovlpd\t{%1, %0|%0, %1}";
+           }
+         else
+           return "movlpd\t{%1, %0|%0, %1}";
        case MODE_V2SF:
-         return "movlps\t{%1, %0|%0, %1}";
+         if (TARGET_AVX)
+           {
+             if (REG_P (operands[0]))
+               return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+             else
+               return "vmovlps\t{%1, %0|%0, %1}";
+           }
+         else
+           return "movlps\t{%1, %0|%0, %1}";
        default:
          gcc_unreachable ();
        }
     }
 }
   [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
    (set (attr "mode")
         (cond [(eq_attr "alternative" "0,1,2")
                 (const_string "DF")
 
               /* xorps is one byte shorter.  */
               (eq_attr "alternative" "5")
-                (cond [(ne (symbol_ref "optimize_size")
+                (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
                            (const_int 0))
                          (const_string "V4SF")
                        (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
                  movaps encodes one byte shorter.  */
               (eq_attr "alternative" "6")
                 (cond
-                  [(ne (symbol_ref "optimize_size")
+                  [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
                        (const_int 0))
                      (const_string "V4SF")
                    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
 
 (define_insn "*movdf_integer_rex64"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-               "=f,m,f,r  ,m ,Yt*x,Yt*x,Yt*x,m   ,Yi,r ")
+               "=f,m,f,r  ,m ,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
        (match_operand:DF 1 "general_operand"
-               "fm,f,G,rmF,Fr,C   ,Yt*x,m   ,Yt*x,r ,Yi"))]
+               "fm,f,G,rmF,Fr,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
           && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || memory_operand (operands[0], DFmode))"
       switch (get_attr_mode (insn))
        {
        case MODE_V4SF:
-         return "xorps\t%0, %0";
+         return "%vxorps\t%0, %d0";
        case MODE_V2DF:
-         return "xorpd\t%0, %0";
+         return "%vxorpd\t%0, %d0";
        case MODE_TI:
-         return "pxor\t%0, %0";
+         return "%vpxor\t%0, %d0";
        default:
          gcc_unreachable ();
        }
       switch (get_attr_mode (insn))
        {
        case MODE_V4SF:
-         return "movaps\t{%1, %0|%0, %1}";
+         return "%vmovaps\t{%1, %0|%0, %1}";
        case MODE_V2DF:
-         return "movapd\t{%1, %0|%0, %1}";
+         return "%vmovapd\t{%1, %0|%0, %1}";
        case MODE_TI:
-         return "movdqa\t{%1, %0|%0, %1}";
+         return "%vmovdqa\t{%1, %0|%0, %1}";
        case MODE_DI:
-         return "movq\t{%1, %0|%0, %1}";
+         return "%vmovq\t{%1, %0|%0, %1}";
        case MODE_DF:
-         return "movsd\t{%1, %0|%0, %1}";
+         if (TARGET_AVX)
+           {
+             if (REG_P (operands[0]) && REG_P (operands[1]))
+               return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+             else
+               return "vmovsd\t{%1, %0|%0, %1}";
+           }
+         else
+           return "movsd\t{%1, %0|%0, %1}";
        case MODE_V1DF:
-         return "movlpd\t{%1, %0|%0, %1}";
+         return "%vmovlpd\t{%1, %d0|%d0, %1}";
        case MODE_V2SF:
-         return "movlps\t{%1, %0|%0, %1}";
+         return "%vmovlps\t{%1, %d0|%d0, %1}";
        default:
          gcc_unreachable ();
        }
 
     case 9:
     case 10:
-      return "movd\t{%1, %0|%0, %1}";
+    return "%vmovd\t{%1, %0|%0, %1}";
 
     default:
       gcc_unreachable();
     }
 }
   [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
    (set (attr "mode")
         (cond [(eq_attr "alternative" "0,1,2")
                 (const_string "DF")
 
               /* xorps is one byte shorter.  */
               (eq_attr "alternative" "5")
-                (cond [(ne (symbol_ref "optimize_size")
+                (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
                            (const_int 0))
                          (const_string "V4SF")
                        (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
                  movaps encodes one byte shorter.  */
               (eq_attr "alternative" "6")
                 (cond
-                  [(ne (symbol_ref "optimize_size")
+                  [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
                        (const_int 0))
                      (const_string "V4SF")
                    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
 
 (define_insn "*movdf_integer"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-               "=f,m,f,r  ,o ,Yt*x,Yt*x,Yt*x,m   ")
+               "=f,m,f,r  ,o ,Y2*x,Y2*x,Y2*x,m   ")
        (match_operand:DF 1 "general_operand"
-               "fm,f,G,roF,Fr,C   ,Yt*x,m   ,Yt*x"))]
+               "fm,f,G,roF,Fr,C   ,Y2*x,m   ,Y2*x"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && !optimize_size && TARGET_INTEGER_DFMODE_MOVES
+   && optimize_function_for_speed_p (cfun)
+   && TARGET_INTEGER_DFMODE_MOVES
    && (reload_in_progress || reload_completed
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
-       || (!(TARGET_SSE2 && TARGET_SSE_MATH) && optimize_size
+       || (!(TARGET_SSE2 && TARGET_SSE_MATH)
+           && optimize_function_for_size_p (cfun)
           && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || memory_operand (operands[0], DFmode))"
 
               /* xorps is one byte shorter.  */
               (eq_attr "alternative" "5")
-                (cond [(ne (symbol_ref "optimize_size")
+                (cond [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
                            (const_int 0))
                          (const_string "V4SF")
                        (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR")
                  movaps encodes one byte shorter.  */
               (eq_attr "alternative" "6")
                 (cond
-                  [(ne (symbol_ref "optimize_size")
+                  [(ne (symbol_ref "optimize_function_for_size_p (cfun)")
                        (const_int 0))
                      (const_string "V4SF")
                    (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
 (define_insn "*pushxf_nointeger"
   [(set (match_operand:XF 0 "push_operand" "=X,X,X")
        (match_operand:XF 1 "general_no_elim_operand" "f,Fo,*r"))]
-  "optimize_size"
+  "optimize_function_for_size_p (cfun)"
 {
   /* This insn should be already split before reg-stack.  */
   gcc_unreachable ();
 (define_insn "*pushxf_integer"
   [(set (match_operand:XF 0 "push_operand" "=<,<")
        (match_operand:XF 1 "general_no_elim_operand" "f,ro"))]
-  "!optimize_size"
+  "optimize_function_for_speed_p (cfun)"
 {
   /* This insn should be already split before reg-stack.  */
   gcc_unreachable ();
 (define_split
   [(set (match_operand:XF 0 "push_operand" "")
        (match_operand:XF 1 "any_fp_register_operand" ""))]
-  "!TARGET_64BIT"
-  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2)))
-   (set (mem:XF (reg:SI SP_REG)) (match_dup 1))]
-  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
-
-(define_split
-  [(set (match_operand:XF 0 "push_operand" "")
-       (match_operand:XF 1 "any_fp_register_operand" ""))]
-  "TARGET_64BIT"
-  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2)))
-   (set (mem:XF (reg:DI SP_REG)) (match_dup 1))]
+  ""
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:XF (reg:P SP_REG)) (match_dup 1))]
   "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
 
 ;; Do not use integer registers when optimizing for size
 (define_insn "*movxf_nointeger"
   [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,*r,o")
        (match_operand:XF 1 "general_operand" "fm,f,G,*roF,F*r"))]
-  "optimize_size
+  "optimize_function_for_size_p (cfun)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
-       || (optimize_size && standard_80387_constant_p (operands[1]))
+       || standard_80387_constant_p (operands[1])
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || memory_operand (operands[0], XFmode))"
 {
 (define_insn "*movxf_integer"
   [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,r,o")
        (match_operand:XF 1 "general_operand" "fm,f,G,roF,Fr"))]
-  "!optimize_size
+  "optimize_function_for_speed_p (cfun)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (reload_in_progress || reload_completed
-       || (optimize_size && standard_80387_constant_p (operands[1]))
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || memory_operand (operands[0], XFmode))"
 {
 (define_expand "movtf"
   [(set (match_operand:TF 0 "nonimmediate_operand" "")
        (match_operand:TF 1 "nonimmediate_operand" ""))]
-  "TARGET_64BIT"
+  "TARGET_SSE2"
 {
   ix86_expand_move (TFmode, operands);
   DONE;
 (define_insn "*movtf_internal"
   [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
        (match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
-  "TARGET_64BIT
+  "TARGET_SSE2
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (which_alternative)
     case 0:
     case 1:
       if (get_attr_mode (insn) == MODE_V4SF)
-       return "movaps\t{%1, %0|%0, %1}";
+       return "%vmovaps\t{%1, %0|%0, %1}";
       else
-       return "movdqa\t{%1, %0|%0, %1}";
+       return "%vmovdqa\t{%1, %0|%0, %1}";
     case 2:
       if (get_attr_mode (insn) == MODE_V4SF)
-       return "xorps\t%0, %0";
+       return "%vxorps\t%0, %d0";
       else
-       return "pxor\t%0, %0";
+       return "%vpxor\t%0, %d0";
     case 3:
     case 4:
        return "#";
     }
 }
   [(set_attr "type" "ssemov,ssemov,sselog1,*,*")
+   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,*,*")
    (set (attr "mode")
         (cond [(eq_attr "alternative" "0,2")
                 (if_then_else
-                  (ne (symbol_ref "optimize_size")
+                  (ne (symbol_ref "optimize_function_for_size_p (cfun)")
                       (const_int 0))
                   (const_string "V4SF")
                   (const_string "TI"))
                 (if_then_else
                   (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
                            (const_int 0))
-                       (ne (symbol_ref "optimize_size")
+                       (ne (symbol_ref "optimize_function_for_size_p (cfun)")
                            (const_int 0)))
                   (const_string "V4SF")
                   (const_string "TI"))]
               (const_string "DI")))])
 
+(define_insn "*pushtf_sse"
+  [(set (match_operand:TF 0 "push_operand" "=<,<,<")
+       (match_operand:TF 1 "general_no_elim_operand" "x,Fo,*r"))]
+  "TARGET_SSE2"
+{
+  /* This insn should be already split before reg-stack.  */
+  gcc_unreachable ();
+}
+  [(set_attr "type" "multi")
+   (set_attr "unit" "sse,*,*")
+   (set_attr "mode" "TF,SI,SI")])
+
+(define_split
+  [(set (match_operand:TF 0 "push_operand" "")
+       (match_operand:TF 1 "general_operand" ""))]
+  "TARGET_SSE2 && reload_completed
+   && !SSE_REG_P (operands[1])"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
+(define_split
+  [(set (match_operand:TF 0 "push_operand" "")
+       (match_operand:TF 1 "any_fp_register_operand" ""))]
+  "TARGET_SSE2"
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -16)))
+   (set (mem:TF (reg:P SP_REG)) (match_dup 1))]
+  "")
+
 (define_split
   [(set (match_operand 0 "nonimmediate_operand" "")
        (match_operand 1 "general_operand" ""))]
      (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "")))]
   ""
 {
-  if (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
+  if (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
     {
       operands[1] = force_reg (HImode, operands[1]);
       emit_insn (gen_zero_extendhisi2_and (operands[0], operands[1]));
   [(set (match_operand:SI 0 "register_operand" "=r")
      (zero_extend:SI (match_operand:HI 1 "register_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
   "#"
   [(set_attr "type" "alu1")
    (set_attr "mode" "SI")])
   [(set (match_operand:SI 0 "register_operand" "")
        (zero_extend:SI (match_operand:HI 1 "register_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "reload_completed && TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
+  "reload_completed && TARGET_ZERO_EXTEND_WITH_AND
+   && optimize_function_for_speed_p (cfun)"
   [(parallel [(set (match_dup 0) (and:SI (match_dup 0) (const_int 65535)))
              (clobber (reg:CC FLAGS_REG))])]
   "")
 (define_insn "*zero_extendhisi2_movzwl"
   [(set (match_operand:SI 0 "register_operand" "=r")
      (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "rm")))]
-  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
+  "!TARGET_ZERO_EXTEND_WITH_AND
+   || optimize_function_for_size_p (cfun)"
   "movz{wl|x}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
   [(set (match_operand:HI 0 "register_operand" "=r,?&q")
      (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
   "#"
   [(set_attr "type" "alu1")
    (set_attr "mode" "HI")])
   [(set (match_operand:HI 0 "register_operand" "=r,r")
      (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
+  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
   "#"
   [(set_attr "type" "imovx,alu1")
    (set_attr "mode" "HI")])
 (define_insn "*zero_extendqihi2_movzbl"
   [(set (match_operand:HI 0 "register_operand" "=r")
      (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
-  "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed"
-  "movz{bl|x}\t{%1, %k0|%k0, %k1}"
+  "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+   && reload_completed"
+  "movz{bl|x}\t{%1, %k0|%k0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
 
        (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed
-   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size)
+   && (!TARGET_ZERO_EXTEND_WITH_AND
+       || optimize_function_for_size_p (cfun))
    && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
   [(set (match_operand:HI 0 "register_operand" "")
        (zero_extend:HI (match_operand:QI 1 "nonimmediate_operand" "")))])
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed
    && ANY_QI_REG_P (operands[0])
-   && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
+   && (TARGET_ZERO_EXTEND_WITH_AND
+       && optimize_function_for_speed_p (cfun))
    && !reg_overlap_mentioned_p (operands[0], operands[1])"
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
   [(set (match_operand:SI 0 "register_operand" "=r,?&q")
      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "0,qm")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ZERO_EXTEND_WITH_AND && !optimize_size"
+  "TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun)"
   "#"
   [(set_attr "type" "alu1")
    (set_attr "mode" "SI")])
   [(set (match_operand:SI 0 "register_operand" "=r,r")
      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm,0")))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_size"
+  "!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun)"
   "#"
   [(set_attr "type" "imovx,alu1")
    (set_attr "mode" "SI")])
 (define_insn "*zero_extendqisi2_movzbw"
   [(set (match_operand:SI 0 "register_operand" "=r")
      (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "qm")))]
-  "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_size) && reload_completed"
+  "(!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
+   && reload_completed"
   "movz{bl|x}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imovx")
    (set_attr "mode" "SI")])
        (zero_extend:SI (match_operand:QI 1 "nonimmediate_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed
-   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_size)
+   && (!TARGET_ZERO_EXTEND_WITH_AND || optimize_function_for_size_p (cfun))
    && (!REG_P (operands[1]) || ANY_QI_REG_P (operands[1]))"
   [(set (match_dup 0)
        (zero_extend:SI (match_dup 1)))])
   "reload_completed
    && ANY_QI_REG_P (operands[0])
    && (ANY_QI_REG_P (operands[1]) || MEM_P (operands[1]))
-   && (TARGET_ZERO_EXTEND_WITH_AND && !optimize_size)
+   && (TARGET_ZERO_EXTEND_WITH_AND && optimize_function_for_speed_p (cfun))
    && !reg_overlap_mentioned_p (operands[0], operands[1])"
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
 })
 
 (define_insn "zero_extendsidi2_32"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Yt")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,?r,?o,?*Ym,?*y,?*Yi,*Y2")
        (zero_extend:DI
         (match_operand:SI 1 "nonimmediate_operand" "0,rm,r ,r   ,m  ,r   ,m")))
    (clobber (reg:CC FLAGS_REG))]
    #
    movd\t{%1, %0|%0, %1}
    movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}"
-  [(set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")
-   (set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")])
+   %vmovd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "multi,multi,multi,mmxmov,mmxmov,ssemov,ssemov")
+   (set_attr "prefix" "*,*,*,orig,orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "SI,SI,SI,DI,DI,TI,TI")])
 
 (define_insn "zero_extendsidi2_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Yt")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,o,?*Ym,?*y,?*Yi,*Y2")
      (zero_extend:DI
        (match_operand:SI 1 "nonimmediate_operand"  "rm,0,r   ,m  ,r   ,m")))]
   "TARGET_64BIT"
    #
    movd\t{%1, %0|%0, %1}
    movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}
-   movd\t{%1, %0|%0, %1}"
+   %vmovd\t{%1, %0|%0, %1}
+   %vmovd\t{%1, %0|%0, %1}"
   [(set_attr "type" "imovx,imov,mmxmov,mmxmov,ssemov,ssemov")
+   (set_attr "prefix" "orig,*,orig,orig,maybe_vex,maybe_vex")
    (set_attr "mode" "SI,DI,DI,DI,TI,TI")])
 
 (define_split
   emit_move_insn (operands[3], operands[1]);
 
   /* Generate a cltd if possible and doing so it profitable.  */
-  if ((optimize_size || TARGET_USE_CLTD)
+  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
       && true_regnum (operands[1]) == AX_REG
       && true_regnum (operands[2]) == DX_REG)
     {
     emit_move_insn (operands[3], operands[1]);
 
   /* Generate a cltd if possible and doing so it profitable.  */
-  if ((optimize_size || TARGET_USE_CLTD)
+  if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
       && true_regnum (operands[3]) == AX_REG)
     {
       emit_insn (gen_ashrsi3_31 (operands[4], operands[3], GEN_INT (31)));
 ;; %%% Kill these when call knows how to work out a DFmode push earlier.
 (define_insn "*dummy_extendsfdf2"
   [(set (match_operand:DF 0 "push_operand" "=<")
-       (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY")))]
+       (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fY2")))]
   "0"
   "#")
 
 (define_split
   [(set (match_operand:DF 0 "push_operand" "")
        (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
-  "!TARGET_64BIT"
-  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
-   (set (mem:DF (reg:SI SP_REG)) (float_extend:DF (match_dup 1)))])
-
-(define_split
-  [(set (match_operand:DF 0 "push_operand" "")
-       (float_extend:DF (match_operand:SF 1 "fp_register_operand" "")))]
-  "TARGET_64BIT"
-  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
-   (set (mem:DF (reg:DI SP_REG)) (float_extend:DF (match_dup 1)))])
+  ""
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (const_int -8)))
+   (set (mem:DF (reg:P SP_REG)) (float_extend:DF (match_dup 1)))])
 
 (define_insn "*dummy_extendsfxf2"
   [(set (match_operand:XF 0 "push_operand" "=<")
   [(set (match_operand:XF 0 "push_operand" "")
        (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))]
   ""
-  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2)))
-   (set (mem:XF (reg:SI SP_REG)) (float_extend:XF (match_dup 1)))]
-  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
-
-(define_split
-  [(set (match_operand:XF 0 "push_operand" "")
-       (float_extend:XF (match_operand:SF 1 "fp_register_operand" "")))]
-  "TARGET_64BIT"
-  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2)))
-   (set (mem:DF (reg:DI SP_REG)) (float_extend:XF (match_dup 1)))]
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:XF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
   "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
 
 (define_split
   [(set (match_operand:XF 0 "push_operand" "")
        (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))]
   ""
-  [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (match_dup 2)))
-   (set (mem:DF (reg:SI SP_REG)) (float_extend:XF (match_dup 1)))]
-  "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
-
-(define_split
-  [(set (match_operand:XF 0 "push_operand" "")
-       (float_extend:XF (match_operand:DF 1 "fp_register_operand" "")))]
-  "TARGET_64BIT"
-  [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (match_dup 2)))
-   (set (mem:XF (reg:DI SP_REG)) (float_extend:XF (match_dup 1)))]
+  [(set (reg:P SP_REG) (plus:P (reg:P SP_REG) (match_dup 2)))
+   (set (mem:DF (reg:P SP_REG)) (float_extend:XF (match_dup 1)))]
   "operands[2] = GEN_INT (TARGET_128BIT_LONG_DOUBLE ? -16 : -12);")
 
 (define_expand "extendsfdf2"
   [(set (match_operand:DF 0 "register_operand" "")
         (float_extend:DF
          (match_operand:SF 1 "nonimmediate_operand" "")))]
-  "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size
+  "TARGET_USE_VECTOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
    && reload_completed && SSE_REG_P (operands[0])"
    [(set (match_dup 2)
         (float_extend:V2DF
       return output_387_reg_move (insn, operands);
 
     case 2:
-      return "cvtss2sd\t{%1, %0|%0, %1}";
+      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
 
     default:
       gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov,fmov,ssecvt")
+   (set_attr "prefix" "orig,orig,maybe_vex")
    (set_attr "mode" "SF,XF,DF")])
 
 (define_insn "*extendsfdf2_sse"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=x")
         (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2 && TARGET_SSE_MATH"
-  "cvtss2sd\t{%1, %0|%0, %1}"
+  "%vcvtss2sd\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "DF")])
 
 (define_insn "*extendsfdf2_i387"
     ;
   else
     {
-      rtx temp = assign_386_stack_local (SFmode, SLOT_VIRTUAL);
+      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+      rtx temp = assign_386_stack_local (SFmode, slot);
       emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
       DONE;
     }
   [(set (match_operand:SF 0 "register_operand" "")
         (float_truncate:SF
          (match_operand:DF 1 "nonimmediate_operand" "")))]
-  "(TARGET_USE_VECTOR_CONVERTS || TARGET_GENERIC) && !optimize_size
+  "TARGET_USE_VECTOR_FP_CONVERTS
+   && optimize_insn_for_speed_p ()
    && reload_completed && SSE_REG_P (operands[0])"
    [(set (match_dup 2)
         (vec_concat:V4SF
   "")
 
 (define_insn "*truncdfsf_fast_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,f,x")
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=fm,x")
         (float_truncate:SF
-          (match_operand:DF 1 "nonimmediate_operand" "f ,f,xm")))]
+          (match_operand:DF 1 "nonimmediate_operand" "f  ,xm")))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387 && flag_unsafe_math_optimizations"
 {
   switch (which_alternative)
     {
     case 0:
-    case 1:
       return output_387_reg_move (insn, operands);
-    case 2:
-      return "cvtsd2ss\t{%1, %0|%0, %1}";
+    case 1:
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
     default:
       gcc_unreachable ();
     }
 }
-  [(set_attr "type" "fmov,fmov,ssecvt")
+  [(set_attr "type" "fmov,ssecvt")
+   (set_attr "prefix" "orig,maybe_vex")
    (set_attr "mode" "SF")])
 
 ;; Yes, this one doesn't depend on flag_unsafe_math_optimizations,
         (float_truncate:SF
           (match_operand:DF 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2 && TARGET_SSE_MATH"
-  "cvtsd2ss\t{%1, %0|%0, %1}"
+  "%vcvtsd2ss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
 
 (define_insn "*truncdfsf_fast_i387"
    (set_attr "mode" "SF")])
 
 (define_insn "*truncdfsf_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,?fx*r,Yt")
+  [(set (match_operand:SF 0 "nonimmediate_operand"   "=m,?fx*r,Y2")
        (float_truncate:SF
-         (match_operand:DF 1 "nonimmediate_operand" "f ,f    ,Ytm")))
+         (match_operand:DF 1 "nonimmediate_operand" "f ,f    ,Y2m")))
    (clobber (match_operand:SF 2 "memory_operand"     "=X,m    ,X"))]
   "TARGET_MIX_SSE_I387"
 {
     case 1:
       return "#";
     case 2:
-      return "cvtsd2ss\t{%1, %0|%0, %1}";
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
     default:
       gcc_unreachable ();
     }
 }
   [(set_attr "type" "fmov,multi,ssecvt")
    (set_attr "unit" "*,i387,*")
+   (set_attr "prefix" "orig,orig,maybe_vex")
    (set_attr "mode" "SF")])
 
 (define_insn "*truncdfsf_i387"
       DONE;
     }
   else
-    operands[2] = assign_386_stack_local (<MODE>mode, SLOT_VIRTUAL);
+    {
+      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+      operands[2] = assign_386_stack_local (<MODE>mode, slot);
+    }
 })
 
 (define_insn "*truncxfsf2_mixed"
    (set_attr "mode" "SF")])
 
 (define_insn "*truncxfdf2_mixed"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?fYt*r")
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,?fY2*r")
        (float_truncate:DF
          (match_operand:XF 1 "register_operand" "f,f")))
    (clobber (match_operand:DF 2 "memory_operand" "=X,m"))]
      (use (match_dup 2))
      (clobber (match_scratch:<ssevecmode> 3 ""))
      (clobber (match_scratch:<ssevecmode> 4 ""))])]
-  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH && !optimize_size"
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
 {
   enum machine_mode mode = <MODE>mode;
   enum machine_mode vecmode = <ssevecmode>mode;
   REAL_VALUE_TYPE TWO31r;
   rtx two31;
 
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   real_ldexp (&TWO31r, &dconst1, 31);
   two31 = const_double_from_real_value (TWO31r, mode);
   two31 = ix86_build_const_vector (mode, true, two31);
    (use (match_operand:<ssevecmode> 4  "nonimmediate_operand" "m,x"))
    (clobber (match_scratch:<ssevecmode> 1 "=x,&x"))
    (clobber (match_scratch:<ssevecmode> 2 "=x,x"))]
-  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH && !optimize_size"
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && optimize_function_for_speed_p (cfun)"
   "#"
   "&& reload_completed"
   [(const_int 0)]
        (fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
   "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
    && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "cvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
+  "%vcvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")])
        (fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
   "SSE_FLOAT_MODE_P (<MODE>mode)
    && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
+  "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")])
 
 ;; Avoid vector decoded forms of the instruction.
 (define_peephole2
-  [(match_scratch:DF 2 "Yt")
+  [(match_scratch:DF 2 "Y2")
    (set (match_operand:SSEMODEI24 0 "register_operand" "")
        (fix:SSEMODEI24 (match_operand:DF 1 "memory_operand" "")))]
-  "TARGET_AVOID_VECTOR_DECODE && !optimize_size"
+  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
   "")
   [(match_scratch:SF 2 "x")
    (set (match_operand:SSEMODEI24 0 "register_operand" "")
        (fix:SSEMODEI24 (match_operand:SF 1 "memory_operand" "")))]
-  "TARGET_AVOID_VECTOR_DECODE && !optimize_size"
+  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (fix:SSEMODEI24 (match_dup 2)))]
   "")
 (define_insn "fix_trunc<mode>_i387_fisttp_with_temp"
   [(set (match_operand:X87MODEI 0 "nonimmediate_operand" "=m,?r")
        (fix:X87MODEI (match_operand 1 "register_operand" "f,f")))
-   (clobber (match_operand:X87MODEI 2 "memory_operand" "=m,m"))
+   (clobber (match_operand:X87MODEI 2 "memory_operand" "=X,m"))
    (clobber (match_scratch:XF 3 "=&1f,&1f"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && TARGET_FISTTP
        (fix:DI (match_operand 1 "register_operand" "f,f")))
    (use (match_operand:HI 2 "memory_operand" "m,m"))
    (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
    (clobber (match_scratch:XF 5 "=&1f,&1f"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && !TARGET_FISTTP
        (fix:X87MODEI12 (match_operand 1 "register_operand" "f,f")))
    (use (match_operand:HI 2 "memory_operand" "m,m"))
    (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[1]))
    && !TARGET_FISTTP
    && !SSE_FLOAT_MODE_P (GET_MODE (operands[1]))"
 ;; wants to be able to do this between registers.
 
 (define_expand "floathi<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "")
-       (float:MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-{
-  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-    {
-      emit_insn
-       (gen_floatsi<mode>2 (operands[0],
-                            convert_to_mode (SImode, operands[1], 0)));
-      DONE;
-    }
-})
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "")
 
-(define_insn "*floathi<mode>2_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
-       (float:MODEF
-         (match_operand:HI 1 "nonimmediate_operand" "m,?r")))]
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*floathi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:HI 1 "register_operand" "")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+             (float:X87MODEF (match_dup 1)))
+   (clobber (match_dup 2))])]
+  "operands[2] = assign_386_stack_local (HImode, SLOT_TEMP);")
+
+(define_insn "*floathi<mode>2_i387_with_temp"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+       (float:X87MODEF (match_operand:HI 1 "nonimmediate_operand" "m,?r")))
+  (clobber (match_operand:HI 2 "memory_operand" "=m,m"))]
   "TARGET_80387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)"
-  "@
-   fild%z1\t%1
-   #"
+  "#"
   [(set_attr "type" "fmov,multi")
    (set_attr "mode" "<MODE>")
    (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "floatsi<mode>2"
-  [(set (match_operand:MODEF 0 "register_operand" "")
-       (float:MODEF (match_operand:SI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
-  "
-   /* When we use vector converts, we can't have input in memory.  */
-   if (GET_MODE (operands[0]) == DFmode
-       && TARGET_USE_VECTOR_CONVERTS && !optimize_size && TARGET_SSE_MATH
-       && SSE_FLOAT_MODE_P (DFmode))
-     operands[1] = force_reg (SImode, operands[1]);
-   else if (GET_MODE (operands[0]) == SFmode
-            && !optimize_size && TARGET_USE_VECTOR_CONVERTS && TARGET_SSE_MATH
-            && SSE_FLOAT_MODE_P (SFmode))
-     {
-       /* When !flag_trapping_math, we handle SImode->SFmode vector
-         conversions same way as SImode->DFmode.
-
-         For flat_trapping_math we can't safely use vector conversion without
-         clearing upper half, otherwise precision exception might occur.
-         However we can still generate the common sequence converting value
-         from general register to XMM register as:
-
-           mov         reg32, mem32
-           movd        mem32, xmm
-           cvtdq2pd xmm,xmm
-
-         because we know that movd clears the upper half.
-
-         Sadly in this case we can't rely on reload moving the value to XMM
-         register, since we need to know if upper half is OK, so we need
-         to do reloading by hand.  We force operand to memory unless target
-         supports inter unit moves.  */
-       if (!flag_trapping_math)
-         operands[1] = force_reg (SImode, operands[1]);
-       else if (!MEM_P (operands[1]))
-        {
-          rtx tmp = assign_386_stack_local (SImode, SLOT_VIRTUAL);
-          emit_move_insn (tmp, operands[1]);
-          operands[1] = tmp;
-        }
-     }
-   /* Offload operand of cvtsi2ss and cvtsi2sd into memory for
-      !TARGET_INTER_UNIT_CONVERSIONS
-      It is necessary for the patterns to not accept nonmemory operands
-      as we would optimize out later.  */
-   else if (!TARGET_INTER_UNIT_CONVERSIONS
-           && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (operands[0]))
-           && !optimize_size
-           && !MEM_P (operands[1]))
-     {
-       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
-       emit_move_insn (tmp, operands[1]);
-       operands[1] = tmp;
-     }
-  ")
-
-(define_insn "*floatsisf2_mixed_vector"
-  [(set (match_operand:SF 0 "register_operand" "=x,f,?f")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
-  "TARGET_MIX_SSE_I387 && !flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
-  "@
-   cvtdq2ps\t{%1, %0|%0, %1}
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "sseicvt,fmov,multi")
-   (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387,*")
-   (set_attr "athlon_decode" "double,*,*")
-   (set_attr "amdfam10_decode" "double,*,*")
-   (set_attr "fp_int_src" "false,true,true")])
-
-(define_insn "*floatsisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m")))]
-  "TARGET_MIX_SSE_I387
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "@
-   fild%z1\t%1
-   #
-   cvtsi2ss\t{%1, %0|%0, %1}
-   cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387,*,*")
-   (set_attr "athlon_decode" "*,*,vector,double")
-   (set_attr "amdfam10_decode" "*,*,vector,double")
-   (set_attr "fp_int_src" "true")])
-
-(define_insn "*floatsisf2_mixed_memory"
-  [(set (match_operand:SF 0 "register_operand" "=f,x")
-       (float:SF (match_operand:SI 1 "memory_operand" "m,m")))]
-  "TARGET_MIX_SSE_I387
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "@
-   fild%z1\t%1
-   cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "*,double")
-   (set_attr "amdfam10_decode" "*,double")
+(define_insn "*floathi<mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+       (float:X87MODEF (match_operand:HI 1 "memory_operand" "m")))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)"
+  "fild%z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<MODE>")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsisf2_sse_vector_nointernunit"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (float:SF (match_operand:SI 1 "memory_operand" "m")))]
-  "TARGET_SSE_MATH && flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
-   && !TARGET_INTER_UNIT_MOVES"
-  "#"
-  [(set_attr "type" "multi")])
-
-(define_insn "*floatsisf2_sse_vector_internunit"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "rm,x")))]
-  "TARGET_SSE_MATH && flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
-   && TARGET_INTER_UNIT_MOVES"
-  "#"
-  [(set_attr "type" "multi")])
-
 (define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "")))]
-  "flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && reload_completed
-   && (TARGET_INTER_UNIT_MOVES || MEM_P (operands[1]))
-   && !SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])"
-  [(set (match_dup 0)
-       (float:V4SF (match_dup 2)))]
-{
-  operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0);
-  operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
-  emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1]));
-})
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:HI 1 "register_operand" "")))
+   (clobber (match_operand:HI 2 "memory_operand" ""))]
+  "TARGET_80387
+   && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+       || TARGET_MIX_SSE_I387)
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+  "")
 
 (define_split
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:SI 1 "register_operand" "")))]
-  "flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && reload_completed
-   && SSE_REG_P (operands[1]) && SSE_REG_P (operands[0])"
-  [(set (match_dup 2) (vec_duplicate:V4SI (match_dup 1)))
-   (set (match_dup 0)
-       (float:V4SF (match_dup 2)))]
-{
-  operands[2] = simplify_gen_subreg (V4SImode, operands[0], SFmode, 0);
-  operands[0] = simplify_gen_subreg (V4SFmode, operands[0], SFmode, 0);
-})
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:HI 1 "memory_operand" "")))
+   (clobber (match_operand:HI 2 "memory_operand" ""))]
+   "TARGET_80387
+    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+        || TARGET_MIX_SSE_I387)
+    && reload_completed"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
 
-(define_insn "*floatsisf2_sse_vector"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (float:SF (match_operand:SI 1 "register_operand" "x")))]
-  "TARGET_SSE_MATH && !flag_trapping_math
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size
-   && !TARGET_INTER_UNIT_MOVES"
-  "cvtdq2ps\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double")
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "fp_int_src" "true")])
+(define_expand "float<SSEMODEI24:mode><X87MODEF:mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))]
+  "TARGET_80387
+   || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+       && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)"
+  "")
 
-(define_insn "*floatsisf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (float:SF (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_SSE_MATH
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "amdfam10_decode" "vector,double")
-   (set_attr "fp_int_src" "true")])
+;; Pre-reload splitter to add memory clobber to the pattern.
+(define_insn_and_split "*float<SSEMODEI24:mode><X87MODEF:mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))]
+  "((TARGET_80387
+     && (!((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+          && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH)
+        || TARGET_MIX_SSE_I387))
+    || ((<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+       && SSE_FLOAT_MODE_P (<X87MODEF:MODE>mode) && TARGET_SSE_MATH
+       && ((<SSEMODEI24:MODE>mode == SImode
+            && TARGET_SSE2 && TARGET_USE_VECTOR_CONVERTS
+            && optimize_function_for_speed_p (cfun)
+            && flag_trapping_math)
+           || !(TARGET_INTER_UNIT_CONVERSIONS
+                || optimize_function_for_size_p (cfun)))))
+   && !(reload_completed || reload_in_progress)"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (float:X87MODEF (match_dup 1)))
+             (clobber (match_dup 2))])]
+{
+  operands[2] = assign_386_stack_local (<SSEMODEI24:MODE>mode, SLOT_TEMP);
 
-(define_insn "*floatsisf2_sse_memory"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (float:SF (match_operand:SI 1 "memory_operand" "m")))]
-  "TARGET_SSE_MATH
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "cvtsi2ss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double")
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "fp_int_src" "true")])
+  /* Avoid store forwarding (partial memory) stall penalty
+     by passing DImode value through XMM registers.  */
+  if (<SSEMODEI24:MODE>mode == DImode && !TARGET_64BIT
+      && TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+      && optimize_function_for_speed_p (cfun))
+    {
+      emit_insn (gen_floatdi<X87MODEF:mode>2_i387_with_xmm (operands[0],
+                                                           operands[1],
+                                                           operands[2]));
+      DONE;
+    }
+})
 
-(define_insn "*floatsidf2_mixed_vector"
-  [(set (match_operand:DF 0 "register_operand" "=x,f,f")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "x,m,r")))]
-  "TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
-  "@
-   cvtdq2pd\t{%1, %0|%0, %1}
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "sseicvt,fmov,multi")
-   (set_attr "mode" "V2DF,DF,DF")
-   (set_attr "unit" "*,*,i387")
-   (set_attr "athlon_decode" "double,*,*")
-   (set_attr "amdfam10_decode" "double,*,*")
-   (set_attr "fp_int_src" "false,true,true")])
-
-(define_insn "*floatsidf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x,!x")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "m,r,r,m,x")))]
+(define_insn "*floatsi<mode>2_vector_mixed_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x,x")
+       (float:MODEF
+         (match_operand:SI 1 "nonimmediate_operand" "m,?r,r,m,!x")))
+   (clobber (match_operand:SI 2 "memory_operand" "=X,m,m,X,m"))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "@
-   fild%z1\t%1
-   #
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtdq2pd\t{%1, %0|%0, %1}"
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
   [(set_attr "type" "fmov,multi,sseicvt,sseicvt,sseicvt")
-   (set_attr "mode" "DF,DF,DF,DF,V2DF")
+   (set_attr "mode" "<MODE>,<MODE>,<MODE>,<MODE>,<ssevecmode>")
    (set_attr "unit" "*,i387,*,*,*")
    (set_attr "athlon_decode" "*,*,double,direct,double")
    (set_attr "amdfam10_decode" "*,*,vector,double,double")
-   (set_attr "fp_int_src" "true,true,true,true,false")])
+   (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsidf2_mixed_memory"
-  [(set (match_operand:DF 0 "register_operand" "=f,x")
-       (float:DF (match_operand:SI 1 "memory_operand" "m,m")))]
+(define_insn "*floatsi<mode>2_vector_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+       (float:MODEF (match_operand:SI 1 "memory_operand" "m,m")))]
   "TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
   "@
    fild%z1\t%1
-   cvtsi2sd\t{%1, %0|%0, %1}"
+   #"
   [(set_attr "type" "fmov,sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "mode" "<MODE>,<ssevecmode>")
+   (set_attr "unit" "i387,*")
    (set_attr "athlon_decode" "*,direct")
    (set_attr "amdfam10_decode" "*,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatsidf2_sse_vector"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (float:DF (match_operand:SI 1 "register_operand" "x")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && TARGET_USE_VECTOR_CONVERTS && !optimize_size"
-  "cvtdq2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "V2DF")
-   (set_attr "athlon_decode" "double")
-   (set_attr "amdfam10_decode" "double")
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x,x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r,r,m")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m,m,X"))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387"
+  "#"
+  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "unit" "*,i387,*,*")
+   (set_attr "athlon_decode" "*,*,double,direct")
+   (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-       (float:DF (match_operand:SI 1 "memory_operand" "")))]
-  "TARGET_USE_VECTOR_CONVERTS && reload_completed
-   && SSE_REG_P (operands[0])"
-  [(set (match_dup 0)
-       (float:V2DF
-         (vec_select:V2SI
-           (match_dup 2)
-           (parallel [(const_int 0) (const_int 1)]))))]
-{
-  operands[2] = simplify_gen_subreg (V4SImode, operands[0], DFmode, 0);
-  operands[0] = simplify_gen_subreg (V2DFmode, operands[0], DFmode, 0);
-  emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), operands[1]));
-})
-
-(define_insn "*floatsidf2_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x,x,!x")
-       (float:DF (match_operand:SI 1 "nonimmediate_operand" "r,m,x")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "@
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtsi2sd\t{%1, %0|%0, %1}
-   cvtdq2pd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF,DF,V2DF")
-   (set_attr "athlon_decode" "double,direct,double")
-   (set_attr "amdfam10_decode" "vector,double,double")
-   (set_attr "fp_int_src" "true")])
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && TARGET_INTER_UNIT_CONVERSIONS
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
 
-(define_insn "*floatsidf2_memory"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (float:DF (match_operand:SI 1 "memory_operand" "x")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && ((!TARGET_USE_VECTOR_CONVERTS && TARGET_INTER_UNIT_CONVERSIONS)
-       || optimize_size)"
-  "cvtsi2sd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "direct")
-   (set_attr "amdfam10_decode" "double")
-   (set_attr "fp_int_src" "true")])
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:MODEF (match_dup 2)))]
+  "")
 
-(define_insn "*floatsi<mode>2_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_interunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x,x")
        (float:MODEF
-         (match_operand:SI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387
-   && (!TARGET_SSE_MATH || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,r,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
   "@
    fild%z1\t%1
-   #"
-  [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "<MODE>")
-   (set_attr "unit" "*,i387")
-   (set_attr "fp_int_src" "true")])
-
-(define_expand "floatdisf2"
-  [(set (match_operand:SF 0 "register_operand" "")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_64BIT && TARGET_SSE_MATH)"
-{
-  if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
-      && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (SFmode)
-      && !optimize_size
-      && !MEM_P (operands[1]))
-    {
-       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
-       emit_move_insn (tmp, operands[1]);
-       operands[1] = tmp;
-    }
-})
-
-(define_insn "*floatdisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,?f,x,x")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
-  "TARGET_64BIT && TARGET_MIX_SSE_I387
-   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "@
-   fild%z1\t%1
-   #
-   cvtsi2ss{q}\t{%1, %0|%0, %1}
-   cvtsi2ss{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "unit" "*,i387,*,*")
-   (set_attr "athlon_decode" "*,*,vector,double")
-   (set_attr "amdfam10_decode" "*,*,vector,double")
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "fmov,sseicvt,sseicvt")
+   (set_attr "prefix" "orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "unit" "i387,*,*")
+   (set_attr "athlon_decode" "*,double,direct")
+   (set_attr "amdfam10_decode" "*,vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdisf2_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,x")
-       (float:SF (match_operand:DI 1 "memory_operand" "m,m")))]
-  "TARGET_64BIT && TARGET_MIX_SSE_I387
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_mixed_nointerunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "memory_operand" "m,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_MIX_SSE_I387
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
   "@
    fild%z1\t%1
-   cvtsi2ss{q}\t{%1, %0|%0, %1}"
+   %vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "fmov,sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "*,double")
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "*,direct")
    (set_attr "amdfam10_decode" "*,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdisf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x,x")
-       (float:SF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_64BIT && TARGET_SSE_MATH
-   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+(define_insn "*floatsi<mode>2_vector_sse_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x")
+       (float:MODEF
+         (match_operand:SI 1 "nonimmediate_operand" "r,m,!x")))
+   (clobber (match_operand:SI 2 "memory_operand" "=m,X,m"))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "vector,double")
-   (set_attr "amdfam10_decode" "vector,double")
+   (set_attr "mode" "<MODE>,<MODE>,<ssevecmode>")
+   (set_attr "athlon_decode" "double,direct,double")
+   (set_attr "amdfam10_decode" "vector,double,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdisf2_memory"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (float:SF (match_operand:DI 1 "memory_operand" "m")))]
-  "TARGET_64BIT && TARGET_SSE_MATH
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "cvtsi2ss{q}\t{%1, %0|%0, %1}"
+(define_insn "*floatsi<mode>2_vector_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (float:MODEF (match_operand:SI 1 "memory_operand" "m")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)"
+  "#"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double")
+   (set_attr "mode" "<MODE>")
+   (set_attr "athlon_decode" "direct")
    (set_attr "amdfam10_decode" "double")
    (set_attr "fp_int_src" "true")])
 
-(define_expand "floatdidf2"
-  [(set (match_operand:DF 0 "register_operand" "")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "register_operand" "")))
+   (clobber (match_operand:SI 2 "memory_operand" ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
 {
-  if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)
+  rtx op1 = operands[1];
+
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = SUBREG_REG (op1);
+
+  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
     {
-      ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
-      DONE;
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_insn (gen_sse2_loadld (operands[4],
+                                 CONST0_RTX (V4SImode), operands[1]));
     }
-  if (!TARGET_INTER_UNIT_CONVERSIONS && TARGET_64BIT
-      && TARGET_SSE_MATH && SSE_FLOAT_MODE_P (DFmode)
-      && !optimize_size
-      && !MEM_P (operands[1]))
+  /* We can ignore possible trapping value in the
+     high part of SSE register for non-trapping math. */
+  else if (SSE_REG_P (op1) && !flag_trapping_math)
+    operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+  else
     {
-       rtx tmp = assign_386_stack_local (GET_MODE (operands[1]), SLOT_VIRTUAL);
-       emit_move_insn (tmp, operands[1]);
-       operands[1] = tmp;
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_move_insn (operands[2], operands[1]);
+      emit_insn (gen_sse2_loadld (operands[4],
+                                 CONST0_RTX (V4SImode), operands[2]));
     }
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
 })
 
-(define_insn "*floatdidf2_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "m,r,r,m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "@
-   fild%z1\t%1
-   #
-   cvtsi2sd{q}\t{%1, %0|%0, %1}
-   cvtsi2sd{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,multi,sseicvt,sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "unit" "*,i387,*,*")
-   (set_attr "athlon_decode" "*,*,double,direct")
-   (set_attr "amdfam10_decode" "*,*,vector,double")
-   (set_attr "fp_int_src" "true")])
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "memory_operand" "")))
+   (clobber (match_operand:SI 2 "memory_operand" ""))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
 
-(define_insn "*floatdidf2_mixed_memory"
-  [(set (match_operand:DF 0 "register_operand" "=f,x")
-       (float:DF (match_operand:DI 1 "memory_operand" "m,m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "@
-   fild%z1\t%1
-   cvtsi2sd{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "fmov,sseicvt")
-   (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "*,direct")
-   (set_attr "amdfam10_decode" "*,double")
+  emit_insn (gen_sse2_loadld (operands[4],
+                             CONST0_RTX (V4SImode), operands[1]));
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "register_operand" "")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  rtx op1 = operands[1];
+
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  if (GET_CODE (op1) == SUBREG)
+    op1 = SUBREG_REG (op1);
+
+  if (GENERAL_REG_P (op1) && TARGET_INTER_UNIT_MOVES)
+    {
+      operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+      emit_insn (gen_sse2_loadld (operands[4],
+                                 CONST0_RTX (V4SImode), operands[1]));
+    }
+  /* We can ignore possible trapping value in the
+     high part of SSE register for non-trapping math. */
+  else if (SSE_REG_P (op1) && !flag_trapping_math)
+    operands[4] = simplify_gen_subreg (V4SImode, operands[1], SImode, 0);
+  else
+    gcc_unreachable ();
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SI 1 "memory_operand" "")))]
+  "TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_USE_VECTOR_CONVERTS && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(const_int 0)]
+{
+  operands[3] = simplify_gen_subreg (<ssevecmode>mode, operands[0],
+                                    <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (V4SImode, operands[0], <MODE>mode, 0);
+
+  emit_insn (gen_sse2_loadld (operands[4],
+                             CONST0_RTX (V4SImode), operands[1]));
+  emit_insn
+    (gen_sse2_cvtdq2p<ssemodefsuffix> (operands[3], operands[4]));
+  DONE;
+})
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_with_temp"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=m,X"))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH"
+  "#"
+  [(set_attr "type" "sseicvt")
+   (set_attr "mode" "<MODEF:MODE>")
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdidf2_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x,x")
-       (float:DF (match_operand:DI 1 "nonimmediate_operand" "r,m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
-   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "cvtsi2sd{q}\t{%1, %0|%0, %1}"
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "r,m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
    (set_attr "athlon_decode" "double,direct")
    (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdidf2_sse_memory"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (float:DF (match_operand:DI 1 "memory_operand" "m")))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
-   && !TARGET_INTER_UNIT_CONVERSIONS && !optimize_size"
-  "cvtsi2sd{q}\t{%1, %0|%0, %1}"
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "nonimmediate_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && (TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
+
+(define_insn "*float<SSEMODEI24:mode><MODEF:mode>2_sse_nointerunit"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (float:MODEF
+         (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))"
+  "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sseicvt")
-   (set_attr "mode" "DF")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "<MODEF:MODE>")
    (set_attr "athlon_decode" "direct")
    (set_attr "amdfam10_decode" "double")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "*floatdi<mode>2_i387"
-  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
-       (float:MODEF
-         (match_operand:DI 1 "nonimmediate_operand" "m,?r")))]
-  "TARGET_80387
-   && (!TARGET_SSE_MATH || !TARGET_64BIT
-       || !SSE_FLOAT_MODE_P (GET_MODE (operands[0])))"
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && !(TARGET_INTER_UNIT_CONVERSIONS || optimize_function_for_size_p (cfun))
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:MODEF (match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:MODEF 0 "register_operand" "")
+       (float:MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
+   && reload_completed
+   && (SSE_REG_P (operands[0])
+       || (GET_CODE (operands[0]) == SUBREG
+          && SSE_REG_P (operands[0])))"
+  [(set (match_dup 0) (float:MODEF (match_dup 1)))]
+  "")
+
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387_with_temp"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+       (float:X87MODEF
+         (match_operand:SSEMODEI24 1 "nonimmediate_operand" "m,?r")))
+  (clobber (match_operand:SSEMODEI24 2 "memory_operand" "=X,m"))]
+  "TARGET_80387"
   "@
    fild%z1\t%1
    #"
   [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "<MODE>")
+   (set_attr "mode" "<X87MODEF:MODE>")
    (set_attr "unit" "*,i387")
    (set_attr "fp_int_src" "true")])
 
-(define_insn "float<mode>xf2"
-  [(set (match_operand:XF 0 "register_operand" "=f,f")
-       (float:XF (match_operand:X87MODEI 1 "nonimmediate_operand" "m,?r")))]
+(define_insn "*float<SSEMODEI24:mode><X87MODEF:mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+       (float:X87MODEF
+         (match_operand:SSEMODEI24 1 "memory_operand" "m")))]
   "TARGET_80387"
-  "@
-   fild%z1\t%1
-   #"
-  [(set_attr "type" "fmov,multi")
-   (set_attr "mode" "XF")
-   (set_attr "unit" "*,i387")
+  "fild%z1\t%1"
+  [(set_attr "type" "fmov")
+   (set_attr "mode" "<X87MODEF:MODE>")
    (set_attr "fp_int_src" "true")])
 
-;; %%% Kill these when reload knows how to do it.
 (define_split
-  [(set (match_operand 0 "fp_register_operand" "")
-       (float (match_operand 1 "register_operand" "")))]
-  "reload_completed
-   && X87_FLOAT_MODE_P (GET_MODE (operands[0]))"
-  [(const_int 0)]
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:SSEMODEI24 1 "register_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "TARGET_80387
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
+  "")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:SSEMODEI24 1 "memory_operand" "")))
+   (clobber (match_operand:SSEMODEI24 2 "memory_operand" ""))]
+  "TARGET_80387
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
+
+;; Avoid store forwarding (partial memory) stall penalty
+;; by passing DImode value through XMM registers.  */
+
+(define_insn "floatdi<X87MODEF:mode>2_i387_with_xmm"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+       (float:X87MODEF
+         (match_operand:DI 1 "nonimmediate_operand" "m,?r")))
+   (clobber (match_scratch:V4SI 3 "=X,x"))
+   (clobber (match_scratch:V4SI 4 "=X,x"))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<X87MODEF:MODE>")
+   (set_attr "unit" "i387")
+   (set_attr "fp_int_src" "true")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:DI 1 "register_operand" "")))
+   (clobber (match_scratch:V4SI 3 ""))
+   (clobber (match_scratch:V4SI 4 ""))
+   (clobber (match_operand:DI 2 "memory_operand" ""))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0) (float:X87MODEF (match_dup 2)))]
 {
-  operands[2] = ix86_force_to_memory (GET_MODE (operands[1]), operands[1]);
-  operands[2] = gen_rtx_FLOAT (GET_MODE (operands[0]), operands[2]);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[2]));
-  ix86_free_from_memory (GET_MODE (operands[1]));
-  DONE;
+  /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+     Assemble the 64-bit DImode value in an xmm register.  */
+  emit_insn (gen_sse2_loadld (operands[3], CONST0_RTX (V4SImode),
+                             gen_rtx_SUBREG (SImode, operands[1], 0)));
+  emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
+                             gen_rtx_SUBREG (SImode, operands[1], 4)));
+  emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
+
+  operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
 })
 
-(define_expand "floatunssisf2"
-  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SI 1 "nonimmediate_operand" ""))]
-  "!TARGET_64BIT"
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (float:X87MODEF (match_operand:DI 1 "memory_operand" "")))
+   (clobber (match_scratch:V4SI 3 ""))
+   (clobber (match_scratch:V4SI 4 ""))
+   (clobber (match_operand:DI 2 "memory_operand" ""))]
+  "TARGET_80387 && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES
+   && !TARGET_64BIT && optimize_function_for_speed_p (cfun)
+   && reload_completed
+   && FP_REG_P (operands[0])"
+  [(set (match_dup 0) (float:X87MODEF (match_dup 1)))]
+  "")
+
+;; Avoid store forwarding (partial memory) stall penalty by extending
+;; SImode value to DImode through XMM register instead of pushing two
+;; SImode values to stack. Note that even !TARGET_INTER_UNIT_MOVES
+;; targets benefit from this optimization. Also note that fild
+;; loads from memory only.
+
+(define_insn "*floatunssi<mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,f")
+       (unsigned_float:X87MODEF
+         (match_operand:SI 1 "nonimmediate_operand" "x,m")))
+   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+   (clobber (match_scratch:SI 3 "=X,x"))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE"
+  "#"
+  [(set_attr "type" "multi")
+   (set_attr "mode" "<MODE>")])
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (unsigned_float:X87MODEF
+         (match_operand:SI 1 "register_operand" "")))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0)
+       (float:X87MODEF (match_dup 2)))]
+  "operands[1] = simplify_gen_subreg (DImode, operands[1], SImode, 0);")
+
+(define_split
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (unsigned_float:X87MODEF
+         (match_operand:SI 1 "memory_operand" "")))
+   (clobber (match_operand:DI 2 "memory_operand" ""))
+   (clobber (match_scratch:SI 3 ""))]
+  "!TARGET_64BIT
+   && TARGET_80387 && TARGET_SSE
+   && reload_completed"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 0)
+       (float:X87MODEF (match_dup 2)))]
 {
-  if (TARGET_SSE_MATH && TARGET_SSE2)
-    ix86_expand_convert_uns_sisf_sse (operands[0], operands[1]);
-  else
-    x86_emit_floatuns (operands);
-  DONE;
+  emit_move_insn (operands[3], operands[1]);
+  operands[3] = simplify_gen_subreg (DImode, operands[3], SImode, 0);
 })
 
-(define_expand "floatunssidf2"
-  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:SI 1 "nonimmediate_operand" ""))]
-  "!TARGET_64BIT && TARGET_SSE_MATH && TARGET_SSE2"
-  "ix86_expand_convert_uns_sidf_sse (operands[0], operands[1]); DONE;")
+(define_expand "floatunssi<mode>2"
+  [(parallel
+     [(set (match_operand:X87MODEF 0 "register_operand" "")
+          (unsigned_float:X87MODEF
+            (match_operand:SI 1 "nonimmediate_operand" "")))
+      (clobber (match_dup 2))
+      (clobber (match_scratch:SI 3 ""))])]
+  "!TARGET_64BIT
+   && ((TARGET_80387 && TARGET_SSE)
+       || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+{
+  if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
+    {
+      ix86_expand_convert_uns_si<mode>_sse (operands[0], operands[1]);
+      DONE;
+    }
+  else
+    {
+      int slot = virtuals_instantiated ? SLOT_TEMP : SLOT_VIRTUAL;
+      operands[2] = assign_386_stack_local (DImode, slot);
+    }
+})
 
 (define_expand "floatunsdisf2"
   [(use (match_operand:SF 0 "register_operand" ""))
 (define_expand "floatunsdidf2"
   [(use (match_operand:DF 0 "register_operand" ""))
    (use (match_operand:DI 1 "nonimmediate_operand" ""))]
-  "TARGET_SSE_MATH && TARGET_SSE2
-   && (TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)"
+  "(TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)
+   && TARGET_SSE2 && TARGET_SSE_MATH"
 {
   if (TARGET_64BIT)
     x86_emit_floatuns (operands);
 (define_expand "addti3"
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
        (plus:TI (match_operand:TI 1 "nonimmediate_operand" "")
-                (match_operand:TI 2 "x86_64_general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                (match_operand:TI 2 "x86_64_general_operand" "")))]
   "TARGET_64BIT"
   "ix86_expand_binary_operator (PLUS, TImode, operands); DONE;")
 
                                     (match_dup 4))
                            (match_dup 5)))
              (clobber (reg:CC FLAGS_REG))])]
-  "split_ti (operands+0, 1, operands+0, operands+3);
-   split_ti (operands+1, 1, operands+1, operands+4);
-   split_ti (operands+2, 1, operands+2, operands+5);")
+  "split_ti (&operands[0], 3, &operands[0], &operands[3]);")
 
 ;; %%% splits for addsidi3
 ;  [(set (match_operand:DI 0 "nonimmediate_operand" "")
 (define_expand "adddi3"
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
        (plus:DI (match_operand:DI 1 "nonimmediate_operand" "")
-                (match_operand:DI 2 "x86_64_general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                (match_operand:DI 2 "x86_64_general_operand" "")))]
   ""
   "ix86_expand_binary_operator (PLUS, DImode, operands); DONE;")
 
                                     (match_dup 4))
                            (match_dup 5)))
              (clobber (reg:CC FLAGS_REG))])]
-  "split_di (operands+0, 1, operands+0, operands+3);
-   split_di (operands+1, 1, operands+1, operands+4);
-   split_di (operands+2, 1, operands+2, operands+5);")
+  "split_di (&operands[0], 3, &operands[0], &operands[3]);")
 
 (define_insn "adddi3_carry_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
   [(set_attr "type" "alu")
    (set_attr "mode" "DI")])
 
-(define_insn "*<addsub><mode>3_cc_overflow"
+(define_insn "*<plusminus_insn><mode>3_cc_overflow"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
            (plusminus:SWI
    (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
        (plusminus:SWI (match_dup 1) (match_dup 2)))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<addsub>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  "<plusminus_mnemonic>{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
   [(set_attr "type" "icmp")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<addsub>si3_zext_cc_overflow"
+(define_insn "*<plusminus_insn>si3_zext_cc_overflow"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
            (plusminus:SI (match_operand:SI 1 "nonimmediate_operand" "<comm>0")
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (plusminus:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<addsub>{l}\t{%2, %k0|%k0, %2}"
+  "<plusminus_mnemonic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
          (plus:QI (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
                            (match_operand:QI 1 "nonimmediate_operand" "%0,0"))
-                  (match_operand:QI 2 "general_operand" "qi,qm")))
+                  (match_operand:QI 2 "general_operand" "qn,qm")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, QImode, operands)"
   "adc{b}\t{%2, %0|%0, %2}"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
          (plus:HI (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
                            (match_operand:HI 1 "nonimmediate_operand" "%0,0"))
-                  (match_operand:HI 2 "general_operand" "ri,rm")))
+                  (match_operand:HI 2 "general_operand" "rn,rm")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, HImode, operands)"
   "adc{w}\t{%2, %0|%0, %2}"
 (define_insn "addqi3_cc"
   [(set (reg:CC FLAGS_REG)
        (unspec:CC [(match_operand:QI 1 "nonimmediate_operand" "%0,0")
-                   (match_operand:QI 2 "general_operand" "qi,qm")]
+                   (match_operand:QI 2 "general_operand" "qn,qm")]
                   UNSPEC_ADD_CARRY))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
        (plus:QI (match_dup 1) (match_dup 2)))]
    (set_attr "mode" "QI")])
 
 (define_expand "addsi3"
-  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
-                  (plus:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                           (match_operand:SI 2 "general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+       (plus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+                (match_operand:SI 2 "general_operand" "")))]
   ""
   "ix86_expand_binary_operator (PLUS, SImode, operands); DONE;")
 
              (match_operand 3 "immediate_operand" "i")))]
   "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
     || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
-   && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && GET_MODE (operands[0]) == GET_MODE (operands[1])
    && GET_MODE (operands[0]) == GET_MODE (operands[2])
    && (GET_MODE (operands[0]) == GET_MODE (operands[3])
              (match_operand 3 "nonmemory_operand" "ri")))]
   "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
     || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
-   && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && GET_MODE (operands[0]) == GET_MODE (operands[1])
    && (GET_MODE (operands[0]) == GET_MODE (operands[3])
        || GET_MODE (operands[3]) == VOIDmode)"
              (match_operand 4 "immediate_operand" "i")))]
   "(GET_MODE (operands[0]) == QImode || GET_MODE (operands[0]) == HImode
     || (TARGET_64BIT && GET_MODE (operands[0]) == SImode))
-   && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && GET_MODE (operands[0]) == GET_MODE (operands[1])
    && GET_MODE (operands[0]) == GET_MODE (operands[3])"
   "#"
 (define_insn "*addsi_1"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,rm,r")
        (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0,r")
-                (match_operand:SI 2 "general_operand" "rmni,rni,lni")))
+                (match_operand:SI 2 "general_operand" "g,ri,li")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
   [(set (match_operand:DI 0 "register_operand" "=r,r")
        (zero_extend:DI
          (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
-                  (match_operand:SI 2 "general_operand" "rmni,lni"))))
+                  (match_operand:SI 2 "general_operand" "g,li"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
 {
   [(set (reg FLAGS_REG)
        (compare
          (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,0")
-                  (match_operand:SI 2 "general_operand" "rmni,rni"))
+                  (match_operand:SI 2 "general_operand" "g,ri"))
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=r,rm")
        (plus:SI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare
          (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                  (match_operand:SI 2 "general_operand" "rmni"))
+                  (match_operand:SI 2 "general_operand" "g"))
          (const_int 0)))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
 
 (define_insn "*addsi_3"
   [(set (reg FLAGS_REG)
-       (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni"))
+       (compare (neg:SI (match_operand:SI 2 "general_operand" "g"))
                 (match_operand:SI 1 "nonimmediate_operand" "%0")))
    (clobber (match_scratch:SI 0 "=r"))]
   "ix86_match_ccmode (insn, CCZmode)
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*addsi_3_zext"
   [(set (reg FLAGS_REG)
-       (compare (neg:SI (match_operand:SI 2 "general_operand" "rmni"))
+       (compare (neg:SI (match_operand:SI 2 "general_operand" "g"))
                 (match_operand:SI 1 "nonimmediate_operand" "%0")))
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   [(set (reg FLAGS_REG)
        (compare
          (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
-                  (match_operand:SI 2 "general_operand" "rmni"))
+                  (match_operand:SI 2 "general_operand" "g"))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
    (set_attr "mode" "SI")])
 
 (define_expand "addhi3"
-  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
-                  (plus:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                           (match_operand:HI 2 "general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+       (plus:HI (match_operand:HI 1 "nonimmediate_operand" "")
+                (match_operand:HI 2 "general_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (PLUS, HImode, operands); DONE;")
 
 (define_insn "*addhi_1_lea"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
        (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,r")
-                (match_operand:HI 2 "general_operand" "ri,rm,lni")))
+                (match_operand:HI 2 "general_operand" "rn,rm,ln")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL
    && ix86_binary_operator_ok (PLUS, HImode, operands)"
 (define_insn "*addhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
        (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-                (match_operand:HI 2 "general_operand" "ri,rm")))
+                (match_operand:HI 2 "general_operand" "rn,rm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_PARTIAL_REG_STALL
    && ix86_binary_operator_ok (PLUS, HImode, operands)"
   [(set (reg FLAGS_REG)
        (compare
          (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-                  (match_operand:HI 2 "general_operand" "rmni,rni"))
+                  (match_operand:HI 2 "general_operand" "rmn,rn"))
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
        (plus:HI (match_dup 1) (match_dup 2)))]
 
 (define_insn "*addhi_3"
   [(set (reg FLAGS_REG)
-       (compare (neg:HI (match_operand:HI 2 "general_operand" "rmni"))
+       (compare (neg:HI (match_operand:HI 2 "general_operand" "rmn"))
                 (match_operand:HI 1 "nonimmediate_operand" "%0")))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCZmode)
   [(set (reg FLAGS_REG)
        (compare
          (plus:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
-                  (match_operand:HI 2 "general_operand" "rmni"))
+                  (match_operand:HI 2 "general_operand" "rmn"))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCGOCmode)
    (set_attr "mode" "HI")])
 
 (define_expand "addqi3"
-  [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "")
-                  (plus:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                           (match_operand:QI 2 "general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+       (plus:QI (match_operand:QI 1 "nonimmediate_operand" "")
+                (match_operand:QI 2 "general_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (PLUS, QImode, operands); DONE;")
 
        (plus:QI (match_dup 0)
                 (match_operand:QI 1 "general_operand" "qn,qnm")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
   [(set (reg FLAGS_REG)
        (compare
          (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
-                  (match_operand:QI 2 "general_operand" "qmni,qni"))
+                  (match_operand:QI 2 "general_operand" "qmn,qn"))
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
        (plus:QI (match_dup 1) (match_dup 2)))]
 
 (define_insn "*addqi_3"
   [(set (reg FLAGS_REG)
-       (compare (neg:QI (match_operand:QI 2 "general_operand" "qmni"))
+       (compare (neg:QI (match_operand:QI 2 "general_operand" "qmn"))
                 (match_operand:QI 1 "nonimmediate_operand" "%0")))
    (clobber (match_scratch:QI 0 "=q"))]
   "ix86_match_ccmode (insn, CCZmode)
   [(set (reg FLAGS_REG)
        (compare
          (plus:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
-                  (match_operand:QI 2 "general_operand" "qmni"))
+                  (match_operand:QI 2 "general_operand" "qmn"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
   "ix86_match_ccmode (insn, CCGOCmode)
 ;; %%% splits for subditi3
 
 (define_expand "subti3"
-  [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "")
-                  (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
-                            (match_operand:TI 2 "x86_64_general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+       (minus:TI (match_operand:TI 1 "nonimmediate_operand" "")
+                 (match_operand:TI 2 "x86_64_general_operand" "")))]
   "TARGET_64BIT"
   "ix86_expand_binary_operator (MINUS, TImode, operands); DONE;")
 
                             (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
                                      (match_dup 5))))
              (clobber (reg:CC FLAGS_REG))])]
-  "split_ti (operands+0, 1, operands+0, operands+3);
-   split_ti (operands+1, 1, operands+1, operands+4);
-   split_ti (operands+2, 1, operands+2, operands+5);")
+  "split_ti (&operands[0], 3, &operands[0], &operands[3]);")
 
 ;; %%% splits for subsidi3
 
 (define_expand "subdi3"
-  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
-                  (minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
-                            (match_operand:DI 2 "x86_64_general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+       (minus:DI (match_operand:DI 1 "nonimmediate_operand" "")
+                 (match_operand:DI 2 "x86_64_general_operand" "")))]
   ""
   "ix86_expand_binary_operator (MINUS, DImode, operands); DONE;")
 
                             (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
                                      (match_dup 5))))
              (clobber (reg:CC FLAGS_REG))])]
-  "split_di (operands+0, 1, operands+0, operands+3);
-   split_di (operands+1, 1, operands+1, operands+4);
-   split_di (operands+2, 1, operands+2, operands+5);")
+  "split_di (&operands[0], 3, &operands[0], &operands[3]);")
 
 (define_insn "subdi3_carry_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r")
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
          (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
            (plus:QI (match_operand:QI 3 "ix86_carry_flag_operator" "")
-              (match_operand:QI 2 "general_operand" "qi,qm"))))
+              (match_operand:QI 2 "general_operand" "qn,qm"))))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (MINUS, QImode, operands)"
   "sbb{b}\t{%2, %0|%0, %2}"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
          (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
            (plus:HI (match_operand:HI 3 "ix86_carry_flag_operator" "")
-              (match_operand:HI 2 "general_operand" "ri,rm"))))
+              (match_operand:HI 2 "general_operand" "rn,rm"))))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (MINUS, HImode, operands)"
   "sbb{w}\t{%2, %0|%0, %2}"
    (set_attr "mode" "SI")])
 
 (define_expand "subsi3"
-  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
-                  (minus:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                            (match_operand:SI 2 "general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+       (minus:SI (match_operand:SI 1 "nonimmediate_operand" "")
+                 (match_operand:SI 2 "general_operand" "")))]
   ""
   "ix86_expand_binary_operator (MINUS, SImode, operands); DONE;")
 
    (set_attr "mode" "DI")])
 
 (define_expand "subhi3"
-  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
-                  (minus:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                            (match_operand:HI 2 "general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+       (minus:HI (match_operand:HI 1 "nonimmediate_operand" "")
+                 (match_operand:HI 2 "general_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (MINUS, HImode, operands); DONE;")
 
 (define_insn "*subhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
        (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-                 (match_operand:HI 2 "general_operand" "ri,rm")))
+                 (match_operand:HI 2 "general_operand" "rn,rm")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (MINUS, HImode, operands)"
   "sub{w}\t{%2, %0|%0, %2}"
   [(set (reg FLAGS_REG)
        (compare
          (minus:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-                   (match_operand:HI 2 "general_operand" "ri,rm"))
+                   (match_operand:HI 2 "general_operand" "rn,rm"))
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
        (minus:HI (match_dup 1) (match_dup 2)))]
 (define_insn "*subhi_3"
   [(set (reg FLAGS_REG)
        (compare (match_operand:HI 1 "nonimmediate_operand" "0,0")
-                (match_operand:HI 2 "general_operand" "ri,rm")))
+                (match_operand:HI 2 "general_operand" "rn,rm")))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm,r")
        (minus:HI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCmode)
    (set_attr "mode" "HI")])
 
 (define_expand "subqi3"
-  [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "")
-                  (minus:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                            (match_operand:QI 2 "general_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+       (minus:QI (match_operand:QI 1 "nonimmediate_operand" "")
+                 (match_operand:QI 2 "general_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (MINUS, QImode, operands); DONE;")
 
 (define_insn "*subqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
        (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-                 (match_operand:QI 2 "general_operand" "qn,qmn")))
+                 (match_operand:QI 2 "general_operand" "qn,qm")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (MINUS, QImode, operands)"
   "sub{b}\t{%2, %0|%0, %2}"
 (define_insn "*subqi_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
        (minus:QI (match_dup 0)
-                 (match_operand:QI 1 "general_operand" "qn,qmn")))
+                 (match_operand:QI 1 "general_operand" "qn,qm")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "sub{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
   [(set (reg FLAGS_REG)
        (compare
          (minus:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-                   (match_operand:QI 2 "general_operand" "qi,qm"))
+                   (match_operand:QI 2 "general_operand" "qn,qm"))
          (const_int 0)))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q")
-       (minus:HI (match_dup 1) (match_dup 2)))]
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+       (minus:QI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (MINUS, QImode, operands)"
   "sub{b}\t{%2, %0|%0, %2}"
 (define_insn "*subqi_3"
   [(set (reg FLAGS_REG)
        (compare (match_operand:QI 1 "nonimmediate_operand" "0,0")
-                (match_operand:QI 2 "general_operand" "qi,qm")))
-   (set (match_operand:HI 0 "nonimmediate_operand" "=qm,q")
-       (minus:HI (match_dup 1) (match_dup 2)))]
+                (match_operand:QI 2 "general_operand" "qn,qm")))
+   (set (match_operand:QI 0 "nonimmediate_operand" "=qm,q")
+       (minus:QI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCmode)
    && ix86_binary_operator_ok (MINUS, QImode, operands)"
   "sub{b}\t{%2, %0|%0, %2}"
 (define_insn "*mulhi3_1"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r")
        (mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
-                (match_operand:HI 2 "general_operand" "K,i,mr")))
+                (match_operand:HI 2 "general_operand" "K,n,mr")))
    (clobber (reg:CC FLAGS_REG))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "@
                (match_operand:SF 2 "nonimmediate_operand" "")))]
   "TARGET_80387 || TARGET_SSE_MATH"
 {
-  if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+  if (TARGET_SSE_MATH && TARGET_RECIP && optimize_insn_for_speed_p ()
       && flag_finite_math_only && !flag_trapping_math
       && flag_unsafe_math_optimizations)
     {
    (set (match_operand:DI 1 "register_operand" "=&d,&d")
        (mod:DI (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && !optimize_size && !TARGET_USE_CLTD"
+  "TARGET_64BIT && optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD"
   "#"
   [(set_attr "type" "multi")])
 
    (set (match_operand:DI 1 "register_operand" "=&d")
        (mod:DI (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (optimize_size || TARGET_USE_CLTD)"
+  "TARGET_64BIT && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)"
   "#"
   [(set_attr "type" "multi")])
 
              (clobber (reg:CC FLAGS_REG))])]
 {
   /* Avoid use of cltd in favor of a mov+shift.  */
-  if (!TARGET_USE_CLTD && !optimize_size)
+  if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun))
     {
       if (true_regnum (operands[1]))
         emit_move_insn (operands[0], operands[1]);
    (set (match_operand:SI 1 "register_operand" "=&d,&d")
        (mod:SI (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "!optimize_size && !TARGET_USE_CLTD"
+  "optimize_function_for_speed_p (cfun) && !TARGET_USE_CLTD"
   "#"
   [(set_attr "type" "multi")])
 
    (set (match_operand:SI 1 "register_operand" "=&d")
        (mod:SI (match_dup 2) (match_dup 3)))
    (clobber (reg:CC FLAGS_REG))]
-  "optimize_size || TARGET_USE_CLTD"
+  "optimize_function_for_size_p (cfun) || TARGET_USE_CLTD"
   "#"
   [(set_attr "type" "multi")])
 
              (clobber (reg:CC FLAGS_REG))])]
 {
   /* Avoid use of cltd in favor of a mov+shift.  */
-  if (!TARGET_USE_CLTD && !optimize_size)
+  if (!TARGET_USE_CLTD && optimize_function_for_speed_p (cfun))
     {
       if (true_regnum (operands[1]))
         emit_move_insn (operands[0], operands[1]);
   [(set (reg FLAGS_REG)
        (compare
          (and:SI (match_operand:SI 0 "nonimmediate_operand" "%!*a,r,rm")
-                 (match_operand:SI 1 "general_operand" "in,in,rin"))
+                 (match_operand:SI 1 "general_operand" "i,i,ri"))
          (const_int 0)))]
   "ix86_match_ccmode (insn, CCNOmode)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 (define_expand "anddi3"
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
        (and:DI (match_operand:DI 1 "nonimmediate_operand" "")
-               (match_operand:DI 2 "x86_64_szext_general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:DI 2 "x86_64_szext_general_operand" "")))]
   "TARGET_64BIT"
   "ix86_expand_binary_operator (AND, DImode, operands); DONE;")
 
 (define_expand "andsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (and:SI (match_operand:SI 1 "nonimmediate_operand" "")
-               (match_operand:SI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:SI 2 "general_operand" "")))]
   ""
   "ix86_expand_binary_operator (AND, SImode, operands); DONE;")
 
        (and (match_dup 0)
             (const_int -65536)))
    (clobber (reg:CC FLAGS_REG))]
-  "optimize_size || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)"
+  "optimize_function_for_size_p (cfun) || (TARGET_FAST_PREFIX && !TARGET_PARTIAL_REG_STALL)"
   [(set (strict_low_part (match_dup 1)) (const_int 0))]
   "operands[1] = gen_lowpart (HImode, operands[0]);")
 
        (and (match_dup 0)
             (const_int -256)))
    (clobber (reg:CC FLAGS_REG))]
-  "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed"
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed"
   [(set (strict_low_part (match_dup 1)) (const_int 0))]
   "operands[1] = gen_lowpart (QImode, operands[0]);")
 
        (and (match_dup 0)
             (const_int -65281)))
    (clobber (reg:CC FLAGS_REG))]
-  "(optimize_size || !TARGET_PARTIAL_REG_STALL) && reload_completed"
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_REG_STALL) && reload_completed"
   [(parallel [(set (zero_extract:SI (match_dup 0)
                                    (const_int 8)
                                    (const_int 8))
 (define_expand "andhi3"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (and:HI (match_operand:HI 1 "nonimmediate_operand" "")
-               (match_operand:HI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:HI 2 "general_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (AND, HImode, operands); DONE;")
 
 (define_insn "*andhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,r,r")
        (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0,qm")
-               (match_operand:HI 2 "general_operand" "ri,rm,L")))
+               (match_operand:HI 2 "general_operand" "rn,rm,L")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (AND, HImode, operands)"
 {
 (define_insn "*andhi_2"
   [(set (reg FLAGS_REG)
        (compare (and:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:HI 2 "general_operand" "g,ri"))
+                        (match_operand:HI 2 "general_operand" "rmn,rn"))
                 (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
        (and:HI (match_dup 1) (match_dup 2)))]
 (define_expand "andqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (and:QI (match_operand:QI 1 "nonimmediate_operand" "")
-               (match_operand:QI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:QI 2 "general_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (AND, QImode, operands); DONE;")
 
 (define_insn "*andqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,q,r")
        (and:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-               (match_operand:QI 2 "general_operand" "qi,qmi,ri")))
+               (match_operand:QI 2 "general_operand" "qn,qmn,rn")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (AND, QImode, operands)"
   "@
 (define_insn "*andqi_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
        (and:QI (match_dup 0)
-               (match_operand:QI 1 "general_operand" "qi,qmi")))
+               (match_operand:QI 1 "general_operand" "qn,qmn")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "and{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
   [(set (reg FLAGS_REG)
        (compare (and:QI
                      (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-                     (match_operand:QI 2 "general_operand" "qim,qi,i"))
+                     (match_operand:QI 2 "general_operand" "qmn,qn,n"))
                 (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm,*r")
        (and:QI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare (and:QI
                   (match_operand:QI 1 "nonimmediate_operand" "%0,0")
-                  (match_operand:QI 2 "general_operand" "qim,qi"))
+                  (match_operand:QI 2 "general_operand" "qmn,qn"))
                 (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
        (and:QI (match_dup 1) (match_dup 2)))]
   [(set (reg FLAGS_REG)
        (compare (and:QI
                   (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
-                  (match_operand:QI 1 "nonimmediate_operand" "qmi,qi"))
+                  (match_operand:QI 1 "nonimmediate_operand" "qmn,qn"))
                 (const_int 0)))
    (set (strict_low_part (match_dup 0))
        (and:QI (match_dup 0) (match_dup 1)))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCNOmode)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "and{b}\t{%1, %0|%0, %1}"
    (clobber (reg:CC FLAGS_REG))]
    "reload_completed
     && QI_REG_P (operands[0])
-    && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
     && !(~INTVAL (operands[2]) & ~(255 << 8))
     && GET_MODE (operands[0]) != QImode"
   [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
    (clobber (reg:CC FLAGS_REG))]
    "reload_completed
     && ANY_QI_REG_P (operands[0])
-    && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
     && !(~INTVAL (operands[2]) & ~255)
     && !(INTVAL (operands[2]) & 128)
     && GET_MODE (operands[0]) != QImode"
 (define_expand "iordi3"
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
        (ior:DI (match_operand:DI 1 "nonimmediate_operand" "")
-               (match_operand:DI 2 "x86_64_general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:DI 2 "x86_64_general_operand" "")))]
   "TARGET_64BIT"
   "ix86_expand_binary_operator (IOR, DImode, operands); DONE;")
 
 (define_expand "iorsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (ior:SI (match_operand:SI 1 "nonimmediate_operand" "")
-               (match_operand:SI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:SI 2 "general_operand" "")))]
   ""
   "ix86_expand_binary_operator (IOR, SImode, operands); DONE;")
 
 (define_expand "iorhi3"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (ior:HI (match_operand:HI 1 "nonimmediate_operand" "")
-               (match_operand:HI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:HI 2 "general_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (IOR, HImode, operands); DONE;")
 
 (define_insn "*iorhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
        (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-               (match_operand:HI 2 "general_operand" "g,ri")))
+               (match_operand:HI 2 "general_operand" "rmn,rn")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (IOR, HImode, operands)"
   "or{w}\t{%2, %0|%0, %2}"
 (define_insn "*iorhi_2"
   [(set (reg FLAGS_REG)
        (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:HI 2 "general_operand" "g,ri"))
+                        (match_operand:HI 2 "general_operand" "rmn,rn"))
                 (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
        (ior:HI (match_dup 1) (match_dup 2)))]
 (define_insn "*iorhi_3"
   [(set (reg FLAGS_REG)
        (compare (ior:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
-                        (match_operand:HI 2 "general_operand" "g"))
+                        (match_operand:HI 2 "general_operand" "rmn"))
                 (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCNOmode)
 (define_expand "iorqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (ior:QI (match_operand:QI 1 "nonimmediate_operand" "")
-               (match_operand:QI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:QI 2 "general_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (IOR, QImode, operands); DONE;")
 
 (define_insn "*iorqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
        (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-               (match_operand:QI 2 "general_operand" "qmi,qi,ri")))
+               (match_operand:QI 2 "general_operand" "qmn,qn,rn")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (IOR, QImode, operands)"
   "@
 (define_insn "*iorqi_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+q,m"))
        (ior:QI (match_dup 0)
-               (match_operand:QI 1 "general_operand" "qmi,qi")))
+               (match_operand:QI 1 "general_operand" "qmn,qn")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "or{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
 (define_insn "*iorqi_2"
   [(set (reg FLAGS_REG)
        (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:QI 2 "general_operand" "qim,qi"))
+                        (match_operand:QI 2 "general_operand" "qmn,qn"))
                 (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
        (ior:QI (match_dup 1) (match_dup 2)))]
 (define_insn "*iorqi_2_slp"
   [(set (reg FLAGS_REG)
        (compare (ior:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
-                        (match_operand:QI 1 "general_operand" "qim,qi"))
+                        (match_operand:QI 1 "general_operand" "qmn,qn"))
                 (const_int 0)))
    (set (strict_low_part (match_dup 0))
        (ior:QI (match_dup 0) (match_dup 1)))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCNOmode)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "or{b}\t{%1, %0|%0, %1}"
 (define_insn "*iorqi_3"
   [(set (reg FLAGS_REG)
        (compare (ior:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
-                        (match_operand:QI 2 "general_operand" "qim"))
+                        (match_operand:QI 2 "general_operand" "qmn"))
                 (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
   "ix86_match_ccmode (insn, CCNOmode)
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
-(define_insn "iorqi_ext_0"
+(define_insn "*iorqi_ext_0"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
                         (const_int 8)
                         (const_int 8))
            (const_int 8))
          (match_operand 2 "const_int_operand" "n")))
    (clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
   "or{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "1")
            (match_operand:QI 2 "general_operand" "Qm"))))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT
-   && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
   "or{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
            (match_operand 2 "ext_register_operand" "Q"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
   "or{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
                           (const_int 8)
                           (const_int 8))))
    (clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
   "ior{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
    (clobber (reg:CC FLAGS_REG))]
    "reload_completed
     && QI_REG_P (operands[0])
-    && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
     && !(INTVAL (operands[2]) & ~(255 << 8))
     && GET_MODE (operands[0]) != QImode"
   [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
    (clobber (reg:CC FLAGS_REG))]
    "reload_completed
     && ANY_QI_REG_P (operands[0])
-    && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
     && !(INTVAL (operands[2]) & ~255)
     && (INTVAL (operands[2]) & 128)
     && GET_MODE (operands[0]) != QImode"
 (define_expand "xordi3"
   [(set (match_operand:DI 0 "nonimmediate_operand" "")
        (xor:DI (match_operand:DI 1 "nonimmediate_operand" "")
-               (match_operand:DI 2 "x86_64_general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:DI 2 "x86_64_general_operand" "")))]
   "TARGET_64BIT"
   "ix86_expand_binary_operator (XOR, DImode, operands); DONE;")
 
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
    && ix86_binary_operator_ok (XOR, DImode, operands)"
-  "@
-   xor{q}\t{%2, %0|%0, %2}
-   xor{q}\t{%2, %0|%0, %2}"
+  "xor{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "DI,DI")])
+   (set_attr "mode" "DI")])
 
 (define_insn "*xordi_2_rex64"
   [(set (reg FLAGS_REG)
   "TARGET_64BIT
    && ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (XOR, DImode, operands)"
-  "@
-   xor{q}\t{%2, %0|%0, %2}
-   xor{q}\t{%2, %0|%0, %2}"
+  "xor{q}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
-   (set_attr "mode" "DI,DI")])
+   (set_attr "mode" "DI")])
 
 (define_insn "*xordi_3_rex64"
   [(set (reg FLAGS_REG)
 (define_expand "xorsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (xor:SI (match_operand:SI 1 "nonimmediate_operand" "")
-               (match_operand:SI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:SI 2 "general_operand" "")))]
   ""
   "ix86_expand_binary_operator (XOR, SImode, operands); DONE;")
 
 (define_expand "xorhi3"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (xor:HI (match_operand:HI 1 "nonimmediate_operand" "")
-               (match_operand:HI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:HI 2 "general_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (XOR, HImode, operands); DONE;")
 
 (define_insn "*xorhi_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=r,m")
        (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-               (match_operand:HI 2 "general_operand" "g,ri")))
+               (match_operand:HI 2 "general_operand" "rmn,rn")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (XOR, HImode, operands)"
   "xor{w}\t{%2, %0|%0, %2}"
 (define_insn "*xorhi_2"
   [(set (reg FLAGS_REG)
        (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:HI 2 "general_operand" "g,ri"))
+                        (match_operand:HI 2 "general_operand" "rmn,rn"))
                 (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=r,rm")
        (xor:HI (match_dup 1) (match_dup 2)))]
 (define_insn "*xorhi_3"
   [(set (reg FLAGS_REG)
        (compare (xor:HI (match_operand:HI 1 "nonimmediate_operand" "%0")
-                        (match_operand:HI 2 "general_operand" "g"))
+                        (match_operand:HI 2 "general_operand" "rmn"))
                 (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
   "ix86_match_ccmode (insn, CCNOmode)
 (define_expand "xorqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (xor:QI (match_operand:QI 1 "nonimmediate_operand" "")
-               (match_operand:QI 2 "general_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+               (match_operand:QI 2 "general_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (XOR, QImode, operands); DONE;")
 
 (define_insn "*xorqi_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=q,m,r")
        (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0,0")
-               (match_operand:QI 2 "general_operand" "qmi,qi,ri")))
+               (match_operand:QI 2 "general_operand" "qmn,qn,rn")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (XOR, QImode, operands)"
   "@
 (define_insn "*xorqi_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
        (xor:QI (match_dup 0)
-               (match_operand:QI 1 "general_operand" "qi,qmi")))
+               (match_operand:QI 1 "general_operand" "qn,qmn")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "xor{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "mode" "QI")])
 
-(define_insn "xorqi_ext_0"
+(define_insn "*xorqi_ext_0"
   [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
                         (const_int 8)
                         (const_int 8))
            (const_int 8))
          (match_operand 2 "const_int_operand" "n")))
    (clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
   "xor{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "1")
            (match_operand:QI 2 "general_operand" "Qm"))))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT
-   && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
   "xor{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
            (match_operand 2 "ext_register_operand" "Q"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (!TARGET_PARTIAL_REG_STALL || optimize_size)"
+   && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
   "xor{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
                           (const_int 8)
                           (const_int 8))))
    (clobber (reg:CC FLAGS_REG))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_size)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
   "xor{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
   [(set (reg FLAGS_REG)
        (compare
          (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0,0")
-                 (match_operand:QI 2 "general_operand" "qim,qi"))
+                 (match_operand:QI 2 "general_operand" "qmn,qn"))
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=q,qm")
        (xor:QI (match_dup 1) (match_dup 2)))]
 (define_insn "*xorqi_2_slp"
   [(set (reg FLAGS_REG)
        (compare (xor:QI (match_operand:QI 0 "nonimmediate_operand" "+q,qm")
-                        (match_operand:QI 1 "general_operand" "qim,qi"))
+                        (match_operand:QI 1 "general_operand" "qmn,qn"))
                 (const_int 0)))
    (set (strict_low_part (match_dup 0))
        (xor:QI (match_dup 0) (match_dup 1)))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCNOmode)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "xor{b}\t{%1, %0|%0, %1}"
   [(set (reg FLAGS_REG)
        (compare
          (xor:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
-                 (match_operand:QI 2 "general_operand" "qim"))
+                 (match_operand:QI 2 "general_operand" "qmn"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
   "ix86_match_ccmode (insn, CCNOmode)
    (clobber (reg:CC FLAGS_REG))]
    "reload_completed
     && QI_REG_P (operands[0])
-    && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
     && !(INTVAL (operands[2]) & ~(255 << 8))
     && GET_MODE (operands[0]) != QImode"
   [(parallel [(set (zero_extract:SI (match_dup 0) (const_int 8) (const_int 8))
    (clobber (reg:CC FLAGS_REG))]
    "reload_completed
     && ANY_QI_REG_P (operands[0])
-    && (!TARGET_PARTIAL_REG_STALL || optimize_size)
+    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
     && !(INTVAL (operands[2]) & ~255)
     && (INTVAL (operands[2]) & 128)
     && GET_MODE (operands[0]) != QImode"
 ;; Negation instructions
 
 (define_expand "negti2"
-  [(parallel [(set (match_operand:TI 0 "nonimmediate_operand" "")
-                  (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+       (neg:TI (match_operand:TI 1 "nonimmediate_operand" "")))]
   "TARGET_64BIT"
   "ix86_expand_unary_operator (NEG, TImode, operands); DONE;")
 
   "TARGET_64BIT && reload_completed"
   [(parallel
     [(set (reg:CCZ FLAGS_REG)
-         (compare:CCZ (neg:DI (match_dup 2)) (const_int 0)))
-     (set (match_dup 0) (neg:DI (match_dup 2)))])
+         (compare:CCZ (neg:DI (match_dup 1)) (const_int 0)))
+     (set (match_dup 0) (neg:DI (match_dup 1)))])
    (parallel
-    [(set (match_dup 1)
+    [(set (match_dup 2)
          (plus:DI (plus:DI (ltu:DI (reg:CC FLAGS_REG) (const_int 0))
                            (match_dup 3))
                   (const_int 0)))
      (clobber (reg:CC FLAGS_REG))])
    (parallel
-    [(set (match_dup 1)
-         (neg:DI (match_dup 1)))
+    [(set (match_dup 2)
+         (neg:DI (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
-  "split_ti (operands+1, 1, operands+2, operands+3);
-   split_ti (operands+0, 1, operands+0, operands+1);")
+  "split_ti (&operands[0], 2, &operands[0], &operands[2]);")
 
 (define_expand "negdi2"
-  [(parallel [(set (match_operand:DI 0 "nonimmediate_operand" "")
-                  (neg:DI (match_operand:DI 1 "nonimmediate_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+       (neg:DI (match_operand:DI 1 "nonimmediate_operand" "")))]
   ""
   "ix86_expand_unary_operator (NEG, DImode, operands); DONE;")
 
   "!TARGET_64BIT && reload_completed"
   [(parallel
     [(set (reg:CCZ FLAGS_REG)
-         (compare:CCZ (neg:SI (match_dup 2)) (const_int 0)))
-     (set (match_dup 0) (neg:SI (match_dup 2)))])
+         (compare:CCZ (neg:SI (match_dup 1)) (const_int 0)))
+     (set (match_dup 0) (neg:SI (match_dup 1)))])
    (parallel
-    [(set (match_dup 1)
+    [(set (match_dup 2)
          (plus:SI (plus:SI (ltu:SI (reg:CC FLAGS_REG) (const_int 0))
                            (match_dup 3))
                   (const_int 0)))
      (clobber (reg:CC FLAGS_REG))])
    (parallel
-    [(set (match_dup 1)
-         (neg:SI (match_dup 1)))
+    [(set (match_dup 2)
+         (neg:SI (match_dup 2)))
      (clobber (reg:CC FLAGS_REG))])]
-  "split_di (operands+1, 1, operands+2, operands+3);
-   split_di (operands+0, 1, operands+0, operands+1);")
+  "split_di (&operands[0], 2, &operands[0], &operands[2]);");
 
 (define_insn "*negdi2_1_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 
 
 (define_expand "negsi2"
-  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
-                  (neg:SI (match_operand:SI 1 "nonimmediate_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+       (neg:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
   ""
   "ix86_expand_unary_operator (NEG, SImode, operands); DONE;")
 
    (set_attr "mode" "SI")])
 
 (define_expand "neghi2"
-  [(parallel [(set (match_operand:HI 0 "nonimmediate_operand" "")
-                  (neg:HI (match_operand:HI 1 "nonimmediate_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "")
+       (neg:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_unary_operator (NEG, HImode, operands); DONE;")
 
    (set_attr "mode" "HI")])
 
 (define_expand "negqi2"
-  [(parallel [(set (match_operand:QI 0 "nonimmediate_operand" "")
-                  (neg:QI (match_operand:QI 1 "nonimmediate_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "")
+       (neg:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_unary_operator (NEG, QImode, operands); DONE;")
 
 
 ;; Changing of sign for FP values is doable using integer unit too.
 
-(define_expand "negsf2"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "")
-       (neg:SF (match_operand:SF 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_fp_absneg_operator (NEG, SFmode, operands); DONE;")
-
-(define_expand "abssf2"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "")
-       (abs:SF (match_operand:SF 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_fp_absneg_operator (ABS, SFmode, operands); DONE;")
-
-(define_insn "*absnegsf2_mixed"
-  [(set (match_operand:SF 0 "nonimmediate_operand"    "=x  ,x,f,rm")
-       (match_operator:SF 3 "absneg_operator"
-         [(match_operand:SF 1 "nonimmediate_operand" "0   ,x,0,0 ")]))
-   (use (match_operand:V4SF 2 "nonimmediate_operand"  "xm  ,0,X,X "))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE_MATH && TARGET_MIX_SSE_I387
-   && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
-  "#")
-
-(define_insn "*absnegsf2_sse"
-  [(set (match_operand:SF 0 "nonimmediate_operand"    "=x,x,rm")
-       (match_operator:SF 3 "absneg_operator"
-         [(match_operand:SF 1 "nonimmediate_operand" "0 ,x,0")]))
-   (use (match_operand:V4SF 2 "nonimmediate_operand"  "xm,0,X"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE_MATH
-   && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
-  "#")
-
-(define_insn "*absnegsf2_i387"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,rm")
-       (match_operator:SF 3 "absneg_operator"
-         [(match_operand:SF 1 "nonimmediate_operand" "0,0")]))
-   (use (match_operand 2 "" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_80387 && !TARGET_SSE_MATH
-   && ix86_unary_operator_ok (GET_CODE (operands[3]), SFmode, operands)"
-  "#")
-
-(define_expand "negdf2"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-       (neg:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-  "ix86_expand_fp_absneg_operator (NEG, DFmode, operands); DONE;")
-
-(define_expand "absdf2"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-       (abs:DF (match_operand:DF 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
-  "ix86_expand_fp_absneg_operator (ABS, DFmode, operands); DONE;")
-
-(define_insn "*absnegdf2_mixed"
-  [(set (match_operand:DF 0 "nonimmediate_operand"    "=x,x,f,rm")
-       (match_operator:DF 3 "absneg_operator"
-         [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0,0")]))
-   (use (match_operand:V2DF 2 "nonimmediate_operand"  "xm,0,X,X"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
-   && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
-  "#")
+(define_expand "<code><mode>2"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "")))]
+  "TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
 
-(define_insn "*absnegdf2_sse"
-  [(set (match_operand:DF 0 "nonimmediate_operand"    "=x,x,rm")
-       (match_operator:DF 3 "absneg_operator"
-         [(match_operand:DF 1 "nonimmediate_operand" "0 ,x,0 ")]))
-   (use (match_operand:V2DF 2 "nonimmediate_operand"  "xm,0,X "))
+(define_insn "*absneg<mode>2_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,f,!r")
+       (match_operator:MODEF 3 "absneg_operator"
+         [(match_operand:MODEF 1 "register_operand" "0,x,0,0")]))
+   (use (match_operand:<ssevecmode> 2 "nonimmediate_operand" "xm,0,X,X"))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
+  "TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (<MODE>mode)"
   "#")
 
-(define_insn "*absnegdf2_i387"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,rm")
-       (match_operator:DF 3 "absneg_operator"
-         [(match_operand:DF 1 "nonimmediate_operand" "0,0")]))
-   (use (match_operand 2 "" ""))
+(define_insn "*absneg<mode>2_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x,!r")
+       (match_operator:MODEF 3 "absneg_operator"
+         [(match_operand:MODEF 1 "register_operand" "0 ,x,0")]))
+   (use (match_operand:<ssevecmode> 2 "register_operand" "xm,0,X"))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
-   && ix86_unary_operator_ok (GET_CODE (operands[3]), DFmode, operands)"
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
   "#")
 
-(define_expand "negxf2"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "")
-       (neg:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
-  "TARGET_80387"
-  "ix86_expand_fp_absneg_operator (NEG, XFmode, operands); DONE;")
-
-(define_expand "absxf2"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "")
-       (abs:XF (match_operand:XF 1 "nonimmediate_operand" "")))]
-  "TARGET_80387"
-  "ix86_expand_fp_absneg_operator (ABS, XFmode, operands); DONE;")
-
-(define_insn "*absnegxf2_i387"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,?rm")
-       (match_operator:XF 3 "absneg_operator"
-         [(match_operand:XF 1 "nonimmediate_operand" "0,0")]))
+(define_insn "*absneg<mode>2_i387"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f,!r")
+       (match_operator:X87MODEF 3 "absneg_operator"
+         [(match_operand:X87MODEF 1 "register_operand" "0,0")]))
    (use (match_operand 2 "" ""))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_80387
-   && ix86_unary_operator_ok (GET_CODE (operands[3]), XFmode, operands)"
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "#")
 
-(define_expand "negtf2"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-       (neg:TF (match_operand:TF 1 "nonimmediate_operand" "")))]
-  "TARGET_64BIT"
-  "ix86_expand_fp_absneg_operator (NEG, TFmode, operands); DONE;")
-
-(define_expand "abstf2"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-       (abs:TF (match_operand:TF 1 "nonimmediate_operand" "")))]
-  "TARGET_64BIT"
-  "ix86_expand_fp_absneg_operator (ABS, TFmode, operands); DONE;")
+(define_expand "<code>tf2"
+  [(set (match_operand:TF 0 "register_operand" "")
+       (absneg:TF (match_operand:TF 1 "register_operand" "")))]
+  "TARGET_SSE2"
+  "ix86_expand_fp_absneg_operator (<CODE>, TFmode, operands); DONE;")
 
 (define_insn "*absnegtf2_sse"
-  [(set (match_operand:TF 0 "nonimmediate_operand"    "=x,x,m")
+  [(set (match_operand:TF 0 "register_operand" "=x,x")
        (match_operator:TF 3 "absneg_operator"
-         [(match_operand:TF 1 "nonimmediate_operand" "0, x,0")]))
-   (use (match_operand:TF 2 "nonimmediate_operand"    "xm,0,X"))
+         [(match_operand:TF 1 "register_operand" "0,x")]))
+   (use (match_operand:TF 2 "nonimmediate_operand" "xm,0"))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && ix86_unary_operator_ok (GET_CODE (operands[3]), TFmode, operands)"
+  "TARGET_SSE2"
   "#")
 
 ;; Splitters for fp abs and neg.
   operands[1] = tmp;
 })
 
-(define_split
-  [(set (match_operand 0 "memory_operand" "")
-       (match_operator 1 "absneg_operator" [(match_dup 0)]))
-   (use (match_operand 2 "" ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "reload_completed"
-  [(parallel [(set (match_dup 0) (match_dup 1))
-             (clobber (reg:CC FLAGS_REG))])]
-{
-  enum machine_mode mode = GET_MODE (operands[0]);
-  int size = mode == XFmode ? 10 : GET_MODE_SIZE (mode);
-  rtx tmp;
-
-  operands[0] = adjust_address (operands[0], QImode, size - 1);
-  if (GET_CODE (operands[1]) == ABS)
-    {
-      tmp = gen_int_mode (0x7f, QImode);
-      tmp = gen_rtx_AND (QImode, operands[0], tmp);
-    }
-  else
-    {
-      tmp = gen_int_mode (0x80, QImode);
-      tmp = gen_rtx_XOR (QImode, operands[0], tmp);
-    }
-  operands[1] = tmp;
-})
-
 ;; Conditionalize these after reload. If they match before reload, we
 ;; lose the clobber and ability to use integer instructions.
 
-(define_insn "*negsf2_1"
-  [(set (match_operand:SF 0 "register_operand" "=f")
-       (neg:SF (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)"
-  "fchs"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "SF")])
-
-(define_insn "*negdf2_1"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-       (neg:DF (match_operand:DF 1 "register_operand" "0")))]
-  "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))"
-  "fchs"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "DF")])
-
-(define_insn "*negxf2_1"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (neg:XF (match_operand:XF 1 "register_operand" "0")))]
-  "TARGET_80387"
-  "fchs"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "XF")])
-
-(define_insn "*abssf2_1"
-  [(set (match_operand:SF 0 "register_operand" "=f")
-       (abs:SF (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_80387 && (reload_completed || !TARGET_SSE_MATH)"
-  "fabs"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "SF")])
-
-(define_insn "*absdf2_1"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-       (abs:DF (match_operand:DF 1 "register_operand" "0")))]
-  "TARGET_80387 && (reload_completed || !(TARGET_SSE2 && TARGET_SSE_MATH))"
-  "fabs"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "DF")])
-
-(define_insn "*absxf2_1"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (abs:XF (match_operand:XF 1 "register_operand" "0")))]
-  "TARGET_80387"
-  "fabs"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "DF")])
-
-(define_insn "*negextendsfdf2"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-       (neg:DF (float_extend:DF
-                 (match_operand:SF 1 "register_operand" "0"))))]
-  "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
-  "fchs"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "DF")])
-
-(define_insn "*negextenddfxf2"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (neg:XF (float_extend:XF
-                 (match_operand:DF 1 "register_operand" "0"))))]
-  "TARGET_80387"
-  "fchs"
-  [(set_attr "type" "fsgn")
-   (set_attr "mode" "XF")])
-
-(define_insn "*negextendsfxf2"
-  [(set (match_operand:XF 0 "register_operand" "=f")
-       (neg:XF (float_extend:XF
-                 (match_operand:SF 1 "register_operand" "0"))))]
-  "TARGET_80387"
-  "fchs"
+(define_insn "*<code><mode>2_1"
+  [(set (match_operand:X87MODEF 0 "register_operand" "=f")
+       (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand" "0")))]
+  "TARGET_80387
+   && (reload_completed
+       || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
+  "f<absnegprefix>"
   [(set_attr "type" "fsgn")
-   (set_attr "mode" "XF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*absextendsfdf2"
+(define_insn "*<code>extendsfdf2"
   [(set (match_operand:DF 0 "register_operand" "=f")
-       (abs:DF (float_extend:DF
-                 (match_operand:SF 1 "register_operand" "0"))))]
+       (absneg:DF (float_extend:DF
+                    (match_operand:SF 1 "register_operand" "0"))))]
   "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
-  "fabs"
+  "f<absnegprefix>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "DF")])
 
-(define_insn "*absextenddfxf2"
+(define_insn "*<code>extendsfxf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
-       (abs:XF (float_extend:XF
-         (match_operand:DF 1 "register_operand" "0"))))]
+       (absneg:XF (float_extend:XF
+                    (match_operand:SF 1 "register_operand" "0"))))]
   "TARGET_80387"
-  "fabs"
+  "f<absnegprefix>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "XF")])
 
-(define_insn "*absextendsfxf2"
+(define_insn "*<code>extenddfxf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
-       (abs:XF (float_extend:XF
-         (match_operand:SF 1 "register_operand" "0"))))]
+       (absneg:XF (float_extend:XF
+                     (match_operand:DF 1 "register_operand" "0"))))]
   "TARGET_80387"
-  "fabs"
+  "f<absnegprefix>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "XF")])
 
    (match_operand:CSGNMODE 1 "nonmemory_operand" "")
    (match_operand:CSGNMODE 2 "register_operand" "")]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-   || (TARGET_64BIT && (<MODE>mode == TFmode))"
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
 {
   ix86_expand_copysign (operands);
   DONE;
           (match_operand:<CSGNVMODE> 3 "nonimmediate_operand" "xm")]
          UNSPEC_COPYSIGN))]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-   || (TARGET_64BIT && (<MODE>mode == TFmode))"
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
   "#"
   "&& reload_completed"
   [(const_int 0)]
          UNSPEC_COPYSIGN))
    (clobber (match_scratch:<CSGNVMODE> 1 "=x,x,x,x,x"))]
   "(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-   || (TARGET_64BIT && (<MODE>mode == TFmode))"
+   || (TARGET_SSE2 && (<MODE>mode == TFmode))"
   "#")
 
 (define_split
          UNSPEC_COPYSIGN))
    (clobber (match_scratch:<CSGNVMODE> 1 ""))]
   "((SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
-    || (TARGET_64BIT && (<MODE>mode == TFmode)))
+    || (TARGET_SSE2 && (<MODE>mode == TFmode)))
    && reload_completed"
   [(const_int 0)]
 {
 ;; than 31.
 
 (define_expand "ashlti3"
-  [(parallel [(set (match_operand:TI 0 "register_operand" "")
-                  (ashift:TI (match_operand:TI 1 "register_operand" "")
-                             (match_operand:QI 2 "nonmemory_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:TI 0 "register_operand" "")
+       (ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "")
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_64BIT"
-{
-  if (! immediate_operand (operands[2], QImode))
-    {
-      emit_insn (gen_ashlti3_1 (operands[0], operands[1], operands[2]));
-      DONE;
-    }
-  ix86_expand_binary_operator (ASHIFT, TImode, operands);
-  DONE;
-})
+  "ix86_expand_binary_operator (ASHIFT, TImode, operands); DONE;")
 
-(define_insn "ashlti3_1"
-  [(set (match_operand:TI 0 "register_operand" "=r")
-       (ashift:TI (match_operand:TI 1 "register_operand" "0")
-                  (match_operand:QI 2 "register_operand" "c")))
-   (clobber (match_scratch:DI 3 "=&r"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "#"
-  [(set_attr "type" "multi")])
+;; This pattern must be defined before *ashlti3_1 to prevent
+;; combine pass from converting sse2_ashlti3 to *ashlti3_1.
 
-;; This pattern must be defined before *ashlti3_2 to prevent
-;; combine pass from converting sse2_ashlti3 to *ashlti3_2.
+(define_insn "*avx_ashlti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+       (ashift:TI (match_operand:TI 1 "register_operand" "x")
+                  (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
 
 (define_insn "sse2_ashlti3"
   [(set (match_operand:TI 0 "register_operand" "=x")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*ashlti3_2"
-  [(set (match_operand:TI 0 "register_operand" "=r")
-       (ashift:TI (match_operand:TI 1 "register_operand" "0")
-                  (match_operand:QI 2 "immediate_operand" "O")))
+(define_insn "*ashlti3_1"
+  [(set (match_operand:TI 0 "register_operand" "=&r,r")
+       (ashift:TI (match_operand:TI 1 "reg_or_pm1_operand" "n,0")
+                  (match_operand:QI 2 "nonmemory_operand" "Oc,Oc")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "#"
   [(set_attr "type" "multi")])
 
-(define_split
-  [(set (match_operand:TI 0 "register_operand" "")
-       (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
-                  (match_operand:QI 2 "register_operand" "")))
-   (clobber (match_scratch:DI 3 ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed"
+(define_peephole2
+  [(match_scratch:DI 3 "r")
+   (parallel [(set (match_operand:TI 0 "register_operand" "")
+                  (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
+                             (match_operand:QI 2 "nonmemory_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_64BIT"
   [(const_int 0)]
   "ix86_split_ashl (operands, operands[3], TImode); DONE;")
 
 (define_split
   [(set (match_operand:TI 0 "register_operand" "")
-       (ashift:TI (match_operand:TI 1 "register_operand" "")
-                  (match_operand:QI 2 "immediate_operand" "")))
+       (ashift:TI (match_operand:TI 1 "nonmemory_operand" "")
+                  (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed"
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+                   ? epilogue_completed : reload_completed)"
   [(const_int 0)]
   "ix86_split_ashl (operands, NULL_RTX, TImode); DONE;")
 
 (define_insn "x86_64_shld"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
-                 (match_operand:QI 2 "nonmemory_operand" "J,c"))
-               (lshiftrt:DI (match_operand:DI 1 "register_operand" "r,r")
+                 (match_operand:QI 2 "nonmemory_operand" "Jc"))
+               (lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
                  (minus:QI (const_int 64) (match_dup 2)))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "@
-   shld{q}\t{%2, %1, %0|%0, %1, %2}
-   shld{q}\t{%s2%1, %0|%0, %1, %2}"
+  "shld{q}\t{%s2%1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
    (set_attr "athlon_decode" "vector")
    (set_attr "amdfam10_decode" "vector")])
 
-(define_expand "x86_64_shift_adj"
+(define_expand "x86_64_shift_adj_1"
   [(set (reg:CCZ FLAGS_REG)
        (compare:CCZ (and:QI (match_operand:QI 2 "register_operand" "")
                             (const_int 64))
   "TARGET_64BIT"
   "")
 
+(define_expand "x86_64_shift_adj_2"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  "TARGET_64BIT"
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64)));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+                             gen_rtx_LABEL_REF (VOIDmode, label),
+                             pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  ix86_expand_clear (operands[1]);
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
 (define_expand "ashldi3"
   [(set (match_operand:DI 0 "shiftdi_operand" "")
        (ashift:DI (match_operand:DI 1 "ashldi_input_operand" "")
       if (REG_P (operands[2]))
        return "sal{q}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{q}\t%0";
       else
        return "sal{q}\t{%2, %0|%0, %2}";
   [(set (reg FLAGS_REG)
        (compare
          (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "immediate_operand" "e"))
+                    (match_operand:QI 2 "const_1_to_63_operand" "J"))
          (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (ashift:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT
-   && (optimize_size
+   && (optimize_function_for_size_p (cfun)
        || !TARGET_PARTIAL_FLAG_REG_STALL
        || (operands[2] == const1_rtx
           && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{q}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{q}\t%0";
       else
        return "sal{q}\t{%2, %0|%0, %2}";
   [(set (reg FLAGS_REG)
        (compare
          (ashift:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "immediate_operand" "e"))
+                    (match_operand:QI 2 "const_1_to_63_operand" "J"))
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
   "TARGET_64BIT
-   && (optimize_size
+   && (optimize_function_for_size_p (cfun)
        || !TARGET_PARTIAL_FLAG_REG_STALL
        || (operands[2] == const1_rtx
           && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{q}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{q}\t%0";
       else
        return "sal{q}\t{%2, %0|%0, %2}";
   [(const_int 0)]
   "ix86_split_ashl (operands, NULL_RTX, DImode); DONE;")
 
-(define_insn "x86_shld_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m")
+(define_insn "x86_shld"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0)
-                 (match_operand:QI 2 "nonmemory_operand" "I,c"))
-               (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r")
+                 (match_operand:QI 2 "nonmemory_operand" "Ic"))
+               (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
                  (minus:QI (const_int 32) (match_dup 2)))))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "@
-   shld{l}\t{%2, %1, %0|%0, %1, %2}
-   shld{l}\t{%s2%1, %0|%0, %1, %2}"
+  "shld{l}\t{%s2%1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")
 (define_expand "ashlsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
   "ix86_expand_binary_operator (ASHIFT, SImode, operands); DONE;")
 
       if (REG_P (operands[2]))
        return "sal{l}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{l}\t%0";
       else
        return "sal{l}\t{%2, %0|%0, %2}";
       if (REG_P (operands[2]))
        return "sal{l}\t{%b2, %k0|%k0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{l}\t%k0";
       else
        return "sal{l}\t{%2, %k0|%k0, %2}";
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashift:SI (match_dup 1) (match_dup 2)))]
-   "(optimize_size
+   "(optimize_function_for_size_p (cfun)
      || !TARGET_PARTIAL_FLAG_REG_STALL
      || (operands[2] == const1_rtx
         && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{l}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{l}\t%0";
       else
        return "sal{l}\t{%2, %0|%0, %2}";
                     (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "(optimize_size
+  "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
        && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{l}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{l}\t%0";
       else
        return "sal{l}\t{%2, %0|%0, %2}";
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT
-   && (optimize_size
+   && (optimize_function_for_size_p (cfun)
        || !TARGET_PARTIAL_FLAG_REG_STALL
        || (operands[2] == const1_rtx
           && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{l}\t{%b2, %k0|%k0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{l}\t%k0";
       else
        return "sal{l}\t{%2, %k0|%k0, %2}";
 (define_expand "ashlhi3"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (ASHIFT, HImode, operands); DONE;")
 
       if (REG_P (operands[2]))
        return "sal{w}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{w}\t%0";
       else
        return "sal{w}\t{%2, %0|%0, %2}";
       if (REG_P (operands[2]))
        return "sal{w}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{w}\t%0";
       else
        return "sal{w}\t{%2, %0|%0, %2}";
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashift:HI (match_dup 1) (match_dup 2)))]
-  "(optimize_size
+  "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
        && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{w}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{w}\t%0";
       else
        return "sal{w}\t{%2, %0|%0, %2}";
                     (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "(optimize_size
+  "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
        && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{w}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{w}\t%0";
       else
        return "sal{w}\t{%2, %0|%0, %2}";
 (define_expand "ashlqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (ASHIFT, QImode, operands); DONE;")
 
            return "sal{b}\t{%b2, %0|%0, %b2}";
        }
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        {
          if (get_attr_mode (insn) == MODE_SI)
            return "sal{l}\t%0";
            return "sal{b}\t{%b2, %0|%0, %b2}";
        }
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        {
          if (get_attr_mode (insn) == MODE_SI)
            return "sal{l}\t%0";
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashift:QI (match_dup 1) (match_dup 2)))]
-  "(optimize_size
+  "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
        && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{b}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{b}\t%0";
       else
        return "sal{b}\t{%2, %0|%0, %2}";
                     (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "(optimize_size
+  "(optimize_function_for_size_p (cfun)
     || !TARGET_PARTIAL_FLAG_REG_STALL
     || (operands[2] == const1_rtx
        && (TARGET_SHIFT1
       if (REG_P (operands[2]))
        return "sal{b}\t{%b2, %0|%0, %b2}";
       else if (operands[2] == const1_rtx
-              && (TARGET_SHIFT1 || optimize_size))
+              && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
        return "sal{b}\t%0";
       else
        return "sal{b}\t{%2, %0|%0, %2}";
 ;; See comment above `ashldi3' about how this works.
 
 (define_expand "ashrti3"
-  [(parallel [(set (match_operand:TI 0 "register_operand" "")
-                  (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
-                               (match_operand:QI 2 "nonmemory_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:TI 0 "register_operand" "")
+       (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_64BIT"
-{
-  if (! immediate_operand (operands[2], QImode))
-    {
-      emit_insn (gen_ashrti3_1 (operands[0], operands[1], operands[2]));
-      DONE;
-    }
-  ix86_expand_binary_operator (ASHIFTRT, TImode, operands);
-  DONE;
-})
+  "ix86_expand_binary_operator (ASHIFTRT, TImode, operands); DONE;")
 
-(define_insn "ashrti3_1"
+(define_insn "*ashrti3_1"
   [(set (match_operand:TI 0 "register_operand" "=r")
        (ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
-                    (match_operand:QI 2 "register_operand" "c")))
-   (clobber (match_scratch:DI 3 "=&r"))
+                    (match_operand:QI 2 "nonmemory_operand" "Oc")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "#"
   [(set_attr "type" "multi")])
 
-(define_insn "*ashrti3_2"
-  [(set (match_operand:TI 0 "register_operand" "=r")
-       (ashiftrt:TI (match_operand:TI 1 "register_operand" "0")
-                    (match_operand:QI 2 "immediate_operand" "O")))
-   (clobber (reg:CC FLAGS_REG))]
+(define_peephole2
+  [(match_scratch:DI 3 "r")
+   (parallel [(set (match_operand:TI 0 "register_operand" "")
+                  (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
+                               (match_operand:QI 2 "nonmemory_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
   "TARGET_64BIT"
-  "#"
-  [(set_attr "type" "multi")])
-
-(define_split
-  [(set (match_operand:TI 0 "register_operand" "")
-       (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
-                    (match_operand:QI 2 "register_operand" "")))
-   (clobber (match_scratch:DI 3 ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed"
   [(const_int 0)]
   "ix86_split_ashr (operands, operands[3], TImode); DONE;")
 
 (define_split
   [(set (match_operand:TI 0 "register_operand" "")
        (ashiftrt:TI (match_operand:TI 1 "register_operand" "")
-                    (match_operand:QI 2 "immediate_operand" "")))
+                    (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed"
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+                   ? epilogue_completed : reload_completed)"
   [(const_int 0)]
   "ix86_split_ashr (operands, NULL_RTX, TImode); DONE;")
 
 (define_insn "x86_64_shrd"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m,r*m")
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashiftrt:DI (match_dup 0)
-                 (match_operand:QI 2 "nonmemory_operand" "J,c"))
-               (ashift:DI (match_operand:DI 1 "register_operand" "r,r")
+                 (match_operand:QI 2 "nonmemory_operand" "Jc"))
+               (ashift:DI (match_operand:DI 1 "register_operand" "r")
                  (minus:QI (const_int 64) (match_dup 2)))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "@
-   shrd{q}\t{%2, %1, %0|%0, %1, %2}
-   shrd{q}\t{%s2%1, %0|%0, %1, %2}"
+  "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
   ""
   "ix86_expand_binary_operator (ASHIFTRT, DImode, operands); DONE;")
 
-(define_insn "*ashrdi3_63_rex64"
+(define_expand "x86_64_shift_adj_3"
+  [(use (match_operand:DI 0 "register_operand" ""))
+   (use (match_operand:DI 1 "register_operand" ""))
+   (use (match_operand:QI 2 "register_operand" ""))]
+  ""
+{
+  rtx label = gen_label_rtx ();
+  rtx tmp;
+
+  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (64)));
+
+  tmp = gen_rtx_REG (CCZmode, FLAGS_REG);
+  tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
+  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
+                             gen_rtx_LABEL_REF (VOIDmode, label),
+                             pc_rtx);
+  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
+  JUMP_LABEL (tmp) = label;
+
+  emit_move_insn (operands[0], operands[1]);
+  emit_insn (gen_ashrdi3_63_rex64 (operands[1], operands[1], GEN_INT (63)));
+
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  DONE;
+})
+
+(define_insn "ashrdi3_63_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=*d,rm")
        (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "*a,0")
                     (match_operand:DI 2 "const_int_operand" "i,i")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && INTVAL (operands[2]) == 63
-   && (TARGET_USE_CLTD || optimize_size)
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "@
    {cqto|cqo}
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t%0"
   [(set_attr "type" "ishift")
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t%0"
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t%0"
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_operand" "n"))
+                      (match_operand:QI 2 "const_1_to_63_operand" "J"))
          (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT
-   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t{%2, %0|%0, %2}"
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_operand" "n"))
+                      (match_operand:QI 2 "const_1_to_63_operand" "J"))
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
   "TARGET_64BIT
-   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, DImode, operands)"
   "sar{q}\t{%2, %0|%0, %2}"
   [(const_int 0)]
   "ix86_split_ashr (operands, NULL_RTX, DImode); DONE;")
 
-(define_insn "x86_shrd_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m,r*m")
+(define_insn "x86_shrd"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashiftrt:SI (match_dup 0)
-                 (match_operand:QI 2 "nonmemory_operand" "I,c"))
-               (ashift:SI (match_operand:SI 1 "register_operand" "r,r")
+                 (match_operand:QI 2 "nonmemory_operand" "Ic"))
+               (ashift:SI (match_operand:SI 1 "register_operand" "r")
                  (minus:QI (const_int 32) (match_dup 2)))))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "@
-   shrd{l}\t{%2, %1, %0|%0, %1, %2}
-   shrd{l}\t{%s2%1, %0|%0, %1, %2}"
+  "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "pent_pair" "np")
   DONE;
 })
 
-(define_insn "ashrsi3_31"
+(define_expand "ashrsi3_31"
+  [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
+                  (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
+                               (match_operand:SI 2 "const_int_operand" "i,i")))
+              (clobber (reg:CC FLAGS_REG))])]
+  "")
+
+(define_insn "*ashrsi3_31"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=*d,rm")
        (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "*a,0")
                     (match_operand:SI 2 "const_int_operand" "i,i")))
    (clobber (reg:CC FLAGS_REG))]
-  "INTVAL (operands[2]) == 31 && (TARGET_USE_CLTD || optimize_size)
+  "INTVAL (operands[2]) == 31
+   && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "@
    {cltd|cdq}
        (zero_extend:DI (ashiftrt:SI (match_operand:SI 1 "register_operand" "*a,0")
                                     (match_operand:SI 2 "const_int_operand" "i,i"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (TARGET_USE_CLTD || optimize_size)
+  "TARGET_64BIT && (TARGET_USE_CLTD || optimize_function_for_size_p (cfun))
    && INTVAL (operands[2]) == 31
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "@
 (define_expand "ashrsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
   "ix86_expand_binary_operator (ASHIFTRT, SImode, operands); DONE;")
 
        (ashiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%0"
   [(set_attr "type" "ishift")
                                     (match_operand:QI 2 "const1_operand" ""))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%k0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:SI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%0"
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%0"
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t%k0"
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:SI (match_dup 1) (match_dup 2)))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t{%2, %0|%0, %2}"
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t{%2, %0|%0, %2}"
    (set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI (ashiftrt:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT
-   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, SImode, operands)"
   "sar{l}\t{%2, %k0|%k0, %2}"
 (define_expand "ashrhi3"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (ASHIFTRT, HImode, operands); DONE;")
 
        (ashiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:HI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t%0"
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t%0"
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (ashiftrt:HI (match_dup 1) (match_dup 2)))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t{%2, %0|%0, %2}"
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, HImode, operands)"
   "sar{w}\t{%2, %0|%0, %2}"
 (define_expand "ashrqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (ASHIFTRT, QImode, operands); DONE;")
 
        (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t%0"
   [(set_attr "type" "ishift")
        (ashiftrt:QI (match_dup 0)
                     (match_operand:QI 1 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
-   && (TARGET_SHIFT1 || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t%0"
   [(set_attr "type" "ishift1")
        (ashiftrt:QI (match_dup 0)
                     (match_operand:QI 1 "nonmemory_operand" "I,c")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    sar{b}\t{%1, %0|%0, %1}
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashiftrt:QI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t%0"
   [(set (reg FLAGS_REG)
        (compare
          (ashiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const1_operand" "I"))
+                      (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t%0"
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (ashiftrt:QI (match_dup 1) (match_dup 2)))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t{%2, %0|%0, %2}"
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (ASHIFTRT, QImode, operands)"
   "sar{b}\t{%2, %0|%0, %2}"
 ;; See comment above `ashldi3' about how this works.
 
 (define_expand "lshrti3"
-  [(parallel [(set (match_operand:TI 0 "register_operand" "")
-                  (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
-                               (match_operand:QI 2 "nonmemory_operand" "")))
-             (clobber (reg:CC FLAGS_REG))])]
+  [(set (match_operand:TI 0 "register_operand" "")
+       (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_64BIT"
-{
-  if (! immediate_operand (operands[2], QImode))
-    {
-      emit_insn (gen_lshrti3_1 (operands[0], operands[1], operands[2]));
-      DONE;
-    }
-  ix86_expand_binary_operator (LSHIFTRT, TImode, operands);
-  DONE;
-})
+  "ix86_expand_binary_operator (LSHIFTRT, TImode, operands); DONE;")
 
-(define_insn "lshrti3_1"
-  [(set (match_operand:TI 0 "register_operand" "=r")
-       (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
-                    (match_operand:QI 2 "register_operand" "c")))
-   (clobber (match_scratch:DI 3 "=&r"))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "#"
-  [(set_attr "type" "multi")])
+;; This pattern must be defined before *lshrti3_1 to prevent
+;; combine pass from converting sse2_lshrti3 to *lshrti3_1.
 
-;; This pattern must be defined before *lshrti3_2 to prevent
-;; combine pass from converting sse2_lshrti3 to *lshrti3_2.
+(define_insn "*avx_lshrti3"
+  [(set (match_operand:TI 0 "register_operand" "=x")
+       (lshiftrt:TI (match_operand:TI 1 "register_operand" "x")
+                    (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
+  "TARGET_AVX"
+{
+  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
+  return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
+}
+  [(set_attr "type" "sseishft")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
 
 (define_insn "sse2_lshrti3"
   [(set (match_operand:TI 0 "register_operand" "=x")
    (set_attr "prefix_data16" "1")
    (set_attr "mode" "TI")])
 
-(define_insn "*lshrti3_2"
+(define_insn "*lshrti3_1"
   [(set (match_operand:TI 0 "register_operand" "=r")
        (lshiftrt:TI (match_operand:TI 1 "register_operand" "0")
-                    (match_operand:QI 2 "immediate_operand" "O")))
+                    (match_operand:QI 2 "nonmemory_operand" "Oc")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "#"
   [(set_attr "type" "multi")])
 
-(define_split
-  [(set (match_operand:TI 0 "register_operand" "")
-       (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
-                    (match_operand:QI 2 "register_operand" "")))
-   (clobber (match_scratch:DI 3 ""))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed"
+(define_peephole2
+  [(match_scratch:DI 3 "r")
+   (parallel [(set (match_operand:TI 0 "register_operand" "")
+                  (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
+                               (match_operand:QI 2 "nonmemory_operand" "")))
+             (clobber (reg:CC FLAGS_REG))])
+   (match_dup 3)]
+  "TARGET_64BIT"
   [(const_int 0)]
   "ix86_split_lshr (operands, operands[3], TImode); DONE;")
 
 (define_split
   [(set (match_operand:TI 0 "register_operand" "")
        (lshiftrt:TI (match_operand:TI 1 "register_operand" "")
-                    (match_operand:QI 2 "immediate_operand" "")))
+                    (match_operand:QI 2 "nonmemory_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && reload_completed"
+  "TARGET_64BIT && ((optimize > 0 && flag_peephole2)
+                   ? epilogue_completed : reload_completed)"
   [(const_int 0)]
   "ix86_split_lshr (operands, NULL_RTX, TImode); DONE;")
 
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t%0"
   [(set_attr "type" "ishift")
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t%0"
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t%0"
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_operand" "e"))
+                      (match_operand:QI 2 "const_1_to_63_operand" "J"))
          (const_int 0)))
    (set (match_operand:DI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:DI (match_dup 1) (match_dup 2)))]
   "TARGET_64BIT
-   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t{%2, %0|%0, %2}"
   [(set (reg FLAGS_REG)
        (compare
          (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                      (match_operand:QI 2 "const_int_operand" "e"))
+                      (match_operand:QI 2 "const_1_to_63_operand" "J"))
          (const_int 0)))
    (clobber (match_scratch:DI 0 "=r"))]
   "TARGET_64BIT
-   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{q}\t{%2, %0|%0, %2}"
 (define_expand "lshrsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
   "ix86_expand_binary_operator (LSHIFTRT, SImode, operands); DONE;")
 
        (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%0"
   [(set_attr "type" "ishift")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%k0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:SI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%0"
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%0"
    (set (match_operand:DI 0 "register_operand" "=r")
        (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t%k0"
          (const_int 0)))
    (set (match_operand:SI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:SI (match_dup 1) (match_dup 2)))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t{%2, %0|%0, %2}"
                     (match_operand:QI 2 "const_1_to_31_operand" "I"))
         (const_int 0)))
    (clobber (match_scratch:SI 0 "=r"))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t{%2, %0|%0, %2}"
    (set (match_operand:DI 0 "register_operand" "=r")
        (lshiftrt:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
   "TARGET_64BIT
-   && (optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+   && (optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{l}\t{%2, %k0|%k0, %2}"
 (define_expand "lshrhi3"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (LSHIFTRT, HImode, operands); DONE;")
 
        (lshiftrt:HI (match_operand:HI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t%0"
   [(set_attr "type" "ishift")
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:HI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t%0"
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t%0"
          (const_int 0)))
    (set (match_operand:HI 0 "nonimmediate_operand" "=rm")
        (lshiftrt:HI (match_dup 1) (match_dup 2)))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t{%2, %0|%0, %2}"
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:HI 0 "=r"))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
   "shr{w}\t{%2, %0|%0, %2}"
 (define_expand "lshrqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (LSHIFTRT, QImode, operands); DONE;")
 
        (lshiftrt:QI (match_operand:QI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t%0"
   [(set_attr "type" "ishift")
        (lshiftrt:QI (match_dup 0)
                     (match_operand:QI 1 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
   "shr{b}\t%0"
   [(set_attr "type" "ishift1")
    (set (attr "length")
        (lshiftrt:QI (match_dup 0)
                     (match_operand:QI 1 "nonmemory_operand" "I,c")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    shr{b}\t{%1, %0|%0, %1}
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (lshiftrt:QI (match_dup 1) (match_dup 2)))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t%0"
                       (match_operand:QI 2 "const1_operand" ""))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t%0"
          (const_int 0)))
    (set (match_operand:QI 0 "nonimmediate_operand" "=qm")
        (lshiftrt:QI (match_dup 1) (match_dup 2)))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t{%2, %0|%0, %2}"
                       (match_operand:QI 2 "const_1_to_31_operand" "I"))
          (const_int 0)))
    (clobber (match_scratch:QI 0 "=q"))]
-  "(optimize_size || !TARGET_PARTIAL_FLAG_REG_STALL)
+  "(optimize_function_for_size_p (cfun) || !TARGET_PARTIAL_FLAG_REG_STALL)
    && ix86_match_ccmode (insn, CCGOCmode)
    && ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
   "shr{b}\t{%2, %0|%0, %2}"
 (define_expand "rotldi3"
   [(set (match_operand:DI 0 "shiftdi_operand" "")
        (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
  ""
 {
   if (TARGET_64BIT)
                  (lshiftrt:SI (match_dup 3)
                               (minus:QI (const_int 32) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])]
- "split_di (operands, 1, operands + 4, operands + 5);")
+ "split_di (&operands[0], 1, &operands[4], &operands[5]);")
 
 (define_insn "*rotlsi3_1_one_bit_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
                   (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATE, DImode, operands)"
   "rol{q}\t%0"
   [(set_attr "type" "rotate")
 (define_expand "rotlsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
   "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;")
 
        (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                   (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATE, SImode, operands)"
   "rol{l}\t%0"
   [(set_attr "type" "rotate")
                     (match_operand:QI 2 "const1_operand" ""))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATE, SImode, operands)"
   "rol{l}\t%k0"
   [(set_attr "type" "rotate")
 (define_expand "rotlhi3"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;")
 
        (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0")
                   (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATE, HImode, operands)"
   "rol{w}\t%0"
   [(set_attr "type" "rotate")
 (define_expand "rotlqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;")
 
        (rotate:QI (match_dup 0)
                   (match_operand:QI 1 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
   "rol{b}\t%0"
   [(set_attr "type" "rotate1")
    (set (attr "length")
        (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0")
                   (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATE, QImode, operands)"
   "rol{b}\t%0"
   [(set_attr "type" "rotate")
        (rotate:QI (match_dup 0)
                   (match_operand:QI 1 "nonmemory_operand" "I,c")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    rol{b}\t{%1, %0|%0, %1}
 (define_expand "rotrdi3"
   [(set (match_operand:DI 0 "shiftdi_operand" "")
        (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                  (match_operand:QI 2 "nonmemory_operand" "")))]
  ""
 {
   if (TARGET_64BIT)
                  (ashift:SI (match_dup 3)
                             (minus:QI (const_int 32) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])]
- "split_di (operands, 1, operands + 4, operands + 5);")
+ "split_di (&operands[0], 1, &operands[4], &operands[5]);")
 
 (define_insn "*rotrdi3_1_one_bit_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
   "ror{q}\t%0"
   [(set_attr "type" "rotate")
 (define_expand "rotrsi3"
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
   "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;")
 
        (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
   "ror{l}\t%0"
   [(set_attr "type" "rotate")
                       (match_operand:QI 2 "const1_operand" ""))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_size)
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
   "ror{l}\t%k0"
   [(set_attr "type" "rotate")
 (define_expand "rotrhi3"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_HIMODE_MATH"
   "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;")
 
        (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATERT, HImode, operands)"
   "ror{w}\t%0"
   [(set_attr "type" "rotate")
 (define_expand "rotrqi3"
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
+                    (match_operand:QI 2 "nonmemory_operand" "")))]
   "TARGET_QIMODE_MATH"
   "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;")
 
        (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0")
                     (match_operand:QI 2 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_size)
+  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
    && ix86_binary_operator_ok (ROTATERT, QImode, operands)"
   "ror{b}\t%0"
   [(set_attr "type" "rotate")
        (rotatert:QI (match_dup 0)
                     (match_operand:QI 1 "const1_operand" "")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
-   && (TARGET_SHIFT1 || optimize_size)"
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
   "ror{b}\t%0"
   [(set_attr "type" "rotate1")
    (set (attr "length")
        (rotatert:QI (match_dup 0)
                     (match_operand:QI 1 "nonmemory_operand" "I,c")))
    (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
+  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    ror{b}\t{%1, %0|%0, %1}
        (const_int 1))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
-  "bts{q} %1,%0"
+  "bts{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")])
 
 (define_insn "*btrq"
        (const_int 0))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
-  "btr{q} %1,%0"
+  "btr{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")])
 
 (define_insn "*btcq"
        (not:DI (zero_extract:DI (match_dup 0) (const_int 1) (match_dup 1))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && (TARGET_USE_BT || reload_completed)"
-  "btc{q} %1,%0"
+  "btc{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")])
 
 ;; Allow Nocona to avoid these instructions if a register is available.
 
   if (HOST_BITS_PER_WIDE_INT >= 64)
     lo = (HOST_WIDE_INT)1 << i, hi = 0;
-  else if (i < HOST_BITS_PER_WIDE_INT)
-    lo = (HOST_WIDE_INT)1 << i, hi = 0;
-  else
-    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
-
-  op1 = immed_double_const (lo, hi, DImode);
-  if (i >= 31)
-    {
-      emit_move_insn (operands[2], op1);
-      op1 = operands[2];
-    }
-
-  emit_insn (gen_xordi3 (operands[0], operands[0], op1));
-  DONE;
-})
-\f
-;; Store-flag instructions.
-
-;; For all sCOND expanders, also expand the compare or test insn that
-;; generates cc0.  Generate an equality comparison if `seq' or `sne'.
-
-;; %%% Do the expansion to SImode.  If PII, do things the xor+setcc way
-;; to avoid partial register stalls.  Otherwise do things the setcc+movzx
-;; way, which can later delete the movzx if only QImode is needed.
-
-(define_expand "seq"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (eq:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (EQ, operands[0])) DONE; else FAIL;")
-
-(define_expand "sne"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (ne:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (NE, operands[0])) DONE; else FAIL;")
-
-(define_expand "sgt"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (gt:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (GT, operands[0])) DONE; else FAIL;")
-
-(define_expand "sgtu"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (gtu:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (GTU, operands[0])) DONE; else FAIL;")
-
-(define_expand "slt"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (lt:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (LT, operands[0])) DONE; else FAIL;")
-
-(define_expand "sltu"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (ltu:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (LTU, operands[0])) DONE; else FAIL;")
-
-(define_expand "sge"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (ge:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (GE, operands[0])) DONE; else FAIL;")
-
-(define_expand "sgeu"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (geu:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (GEU, operands[0])) DONE; else FAIL;")
-
-(define_expand "sle"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (le:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (LE, operands[0])) DONE; else FAIL;")
-
-(define_expand "sleu"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (leu:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  ""
-  "if (ix86_expand_setcc (LEU, operands[0])) DONE; else FAIL;")
+  else if (i < HOST_BITS_PER_WIDE_INT)
+    lo = (HOST_WIDE_INT)1 << i, hi = 0;
+  else
+    lo = 0, hi = (HOST_WIDE_INT)1 << (i - HOST_BITS_PER_WIDE_INT);
 
-(define_expand "sunordered"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (unordered:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  "TARGET_80387 || TARGET_SSE"
-  "if (ix86_expand_setcc (UNORDERED, operands[0])) DONE; else FAIL;")
+  op1 = immed_double_const (lo, hi, DImode);
+  if (i >= 31)
+    {
+      emit_move_insn (operands[2], op1);
+      op1 = operands[2];
+    }
 
-(define_expand "sordered"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (ordered:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  "TARGET_80387"
-  "if (ix86_expand_setcc (ORDERED, operands[0])) DONE; else FAIL;")
+  emit_insn (gen_xordi3 (operands[0], operands[0], op1));
+  DONE;
+})
 
-(define_expand "suneq"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (uneq:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  "TARGET_80387 || TARGET_SSE"
-  "if (ix86_expand_setcc (UNEQ, operands[0])) DONE; else FAIL;")
+(define_insn "*btdi_rex64"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:DI
+           (match_operand:DI 0 "register_operand" "r")
+           (const_int 1)
+           (match_operand:DI 1 "nonmemory_operand" "rN"))
+         (const_int 0)))]
+  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
+  "bt{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")])
 
-(define_expand "sunge"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (unge:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  "TARGET_80387 || TARGET_SSE"
-  "if (ix86_expand_setcc (UNGE, operands[0])) DONE; else FAIL;")
+(define_insn "*btsi"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:SI
+           (match_operand:SI 0 "register_operand" "r")
+           (const_int 1)
+           (match_operand:SI 1 "nonmemory_operand" "rN"))
+         (const_int 0)))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "bt{l}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")])
+\f
+;; Store-flag instructions.
 
-(define_expand "sungt"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (ungt:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  "TARGET_80387 || TARGET_SSE"
-  "if (ix86_expand_setcc (UNGT, operands[0])) DONE; else FAIL;")
+;; For all sCOND expanders, also expand the compare or test insn that
+;; generates cc0.  Generate an equality comparison if `seq' or `sne'.
 
-(define_expand "sunle"
-  [(set (match_operand:QI 0 "register_operand" "")
-        (unle:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  "TARGET_80387 || TARGET_SSE"
-  "if (ix86_expand_setcc (UNLE, operands[0])) DONE; else FAIL;")
+;; %%% Do the expansion to SImode.  If PII, do things the xor+setcc way
+;; to avoid partial register stalls.  Otherwise do things the setcc+movzx
+;; way, which can later delete the movzx if only QImode is needed.
 
-(define_expand "sunlt"
+(define_expand "s<code>"
   [(set (match_operand:QI 0 "register_operand" "")
-        (unlt:QI (reg:CC FLAGS_REG) (const_int 0)))]
-  "TARGET_80387 || TARGET_SSE"
-  "if (ix86_expand_setcc (UNLT, operands[0])) DONE; else FAIL;")
+        (int_cond:QI (reg:CC FLAGS_REG) (const_int 0)))]
+  ""
+  "if (ix86_expand_setcc (<CODE>, operands[0])) DONE; else FAIL;")
 
-(define_expand "sltgt"
+(define_expand "s<code>"
   [(set (match_operand:QI 0 "register_operand" "")
-        (ltgt:QI (reg:CC FLAGS_REG) (const_int 0)))]
+        (fp_cond:QI (reg:CC FLAGS_REG) (const_int 0)))]
   "TARGET_80387 || TARGET_SSE"
-  "if (ix86_expand_setcc (LTGT, operands[0])) DONE; else FAIL;")
+  "if (ix86_expand_setcc (<CODE>, operands[0])) DONE; else FAIL;")
 
 (define_insn "*setcc_1"
   [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
 ;; 0xffffffff is NaN, but not in normalized form, so we can't represent
 ;; it directly.
 
-(define_insn "*sse_setccsf"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (match_operator:SF 1 "sse_comparison_operator"
-         [(match_operand:SF 2 "register_operand" "0")
-          (match_operand:SF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE && !TARGET_SSE5"
-  "cmp%D1ss\t{%3, %0|%0, %3}"
+(define_insn "*avx_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (match_operator:MODEF 1 "avx_comparison_float_operator"
+         [(match_operand:MODEF 2 "register_operand" "x")
+          (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "TARGET_AVX"
+  "vcmp%D1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssecmp")
-   (set_attr "mode" "SF")])
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*sse_setccdf"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (match_operator:DF 1 "sse_comparison_operator"
-         [(match_operand:DF 2 "register_operand" "0")
-          (match_operand:DF 3 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE2 && !TARGET_SSE5"
-  "cmp%D1sd\t{%3, %0|%0, %3}"
+(define_insn "*sse_setcc<mode>"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (match_operator:MODEF 1 "sse_comparison_operator"
+         [(match_operand:MODEF 2 "register_operand" "0")
+          (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5"
+  "cmp%D1s<ssemodefsuffix>\t{%3, %0|%0, %3}"
   [(set_attr "type" "ssecmp")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*sse5_setcc<mode>"
   [(set (match_operand:MODEF 0 "register_operand" "=x")
          [(match_operand:MODEF 2 "register_operand" "x")
           (match_operand:MODEF 3 "nonimmediate_operand" "xm")]))]
   "TARGET_SSE5"
-  "com%Y1ss\t{%3, %2, %0|%0, %2, %3}"
+  "com%Y1s<ssemodefsuffix>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "sse4arg")
    (set_attr "mode" "<MODE>")])
 
 ;; For all bCOND expanders, also expand the compare or test insn that
 ;; generates reg FLAGS_REG.  Generate an equality comparison if `beq' or `bne'.
 
-(define_expand "beq"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (EQ, operands[0]); DONE;")
-
-(define_expand "bne"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (NE, operands[0]); DONE;")
-
-(define_expand "bgt"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (GT, operands[0]); DONE;")
-
-(define_expand "bgtu"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (GTU, operands[0]); DONE;")
-
-(define_expand "blt"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (LT, operands[0]); DONE;")
-
-(define_expand "bltu"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (LTU, operands[0]); DONE;")
-
-(define_expand "bge"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (GE, operands[0]); DONE;")
-
-(define_expand "bgeu"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (GEU, operands[0]); DONE;")
-
-(define_expand "ble"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  ""
-  "ix86_expand_branch (LE, operands[0]); DONE;")
-
-(define_expand "bleu"
+(define_expand "b<code>"
   [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
+       (if_then_else (int_cond:CC (reg:CC FLAGS_REG)
+                                  (const_int 0))
+                     (label_ref (match_operand 0 ""))
                      (pc)))]
   ""
-  "ix86_expand_branch (LEU, operands[0]); DONE;")
-
-(define_expand "bunordered"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_branch (UNORDERED, operands[0]); DONE;")
-
-(define_expand "bordered"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_branch (ORDERED, operands[0]); DONE;")
-
-(define_expand "buneq"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_branch (UNEQ, operands[0]); DONE;")
-
-(define_expand "bunge"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_branch (UNGE, operands[0]); DONE;")
-
-(define_expand "bungt"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_branch (UNGT, operands[0]); DONE;")
-
-(define_expand "bunle"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_branch (UNLE, operands[0]); DONE;")
-
-(define_expand "bunlt"
-  [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
-                     (pc)))]
-  "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_branch (UNLT, operands[0]); DONE;")
+  "ix86_expand_branch (<CODE>, operands[0]); DONE;")
 
-(define_expand "bltgt"
+(define_expand "b<code>"
   [(set (pc)
-       (if_then_else (match_dup 1)
-                     (label_ref (match_operand 0 "" ""))
+       (if_then_else (fp_cond:CC (reg:CC FLAGS_REG)
+                                 (const_int 0))
+                     (label_ref (match_operand 0 ""))
                      (pc)))]
   "TARGET_80387 || TARGET_SSE_MATH"
-  "ix86_expand_branch (LTGT, operands[0]); DONE;")
+  "ix86_expand_branch (<CODE>, operands[0]); DONE;")
 
 (define_insn "*jcc_1"
   [(set (pc)
     FAIL;
 })
 
+;; zero_extend in SImode is correct, since this is what combine pass
+;; generates from shift insn with QImode operand.  Actually, the mode of
+;; operand 2 (bit offset operand) doesn't matter since bt insn takes
+;; appropriate modulo of the bit offset value.
+
+(define_insn_and_split "*jcc_btdi_rex64"
+  [(set (pc)
+       (if_then_else (match_operator 0 "bt_comparison_operator"
+                       [(zero_extract:DI
+                          (match_operand:DI 1 "register_operand" "r")
+                          (const_int 1)
+                          (zero_extend:SI
+                            (match_operand:QI 2 "register_operand" "r")))
+                        (const_int 0)])
+                     (label_ref (match_operand 3 "" ""))
+                     (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:DI
+           (match_dup 1)
+           (const_int 1)
+           (match_dup 2))
+         (const_int 0)))
+   (set (pc)
+       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+                     (label_ref (match_dup 3))
+                     (pc)))]
+{
+  operands[2] = simplify_gen_subreg (DImode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btdi_mask_rex64"
+  [(set (pc)
+       (if_then_else (match_operator 0 "bt_comparison_operator"
+                       [(zero_extract:DI
+                          (match_operand:DI 1 "register_operand" "r")
+                          (const_int 1)
+                          (and:SI
+                            (match_operand:SI 2 "register_operand" "r")
+                            (match_operand:SI 3 "const_int_operand" "n")))])
+                     (label_ref (match_operand 4 "" ""))
+                     (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & 0x3f) == 0x3f"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:DI
+           (match_dup 1)
+           (const_int 1)
+           (match_dup 2))
+         (const_int 0)))
+   (set (pc)
+       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+                     (label_ref (match_dup 4))
+                     (pc)))]
+{
+  operands[2] = simplify_gen_subreg (DImode, operands[2], SImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+(define_insn_and_split "*jcc_btsi"
+  [(set (pc)
+       (if_then_else (match_operator 0 "bt_comparison_operator"
+                       [(zero_extract:SI
+                          (match_operand:SI 1 "register_operand" "r")
+                          (const_int 1)
+                          (zero_extend:SI
+                            (match_operand:QI 2 "register_operand" "r")))
+                        (const_int 0)])
+                     (label_ref (match_operand 3 "" ""))
+                     (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:SI
+           (match_dup 1)
+           (const_int 1)
+           (match_dup 2))
+         (const_int 0)))
+   (set (pc)
+       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+                     (label_ref (match_dup 3))
+                     (pc)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btsi_mask"
+  [(set (pc)
+       (if_then_else (match_operator 0 "bt_comparison_operator"
+                       [(zero_extract:SI
+                          (match_operand:SI 1 "register_operand" "r")
+                          (const_int 1)
+                          (and:SI
+                            (match_operand:SI 2 "register_operand" "r")
+                            (match_operand:SI 3 "const_int_operand" "n")))])
+                     (label_ref (match_operand 4 "" ""))
+                     (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:SI
+           (match_dup 1)
+           (const_int 1)
+           (match_dup 2))
+         (const_int 0)))
+   (set (pc)
+       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+                     (label_ref (match_dup 4))
+                     (pc)))]
+  "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+
+(define_insn_and_split "*jcc_btsi_1"
+  [(set (pc)
+       (if_then_else (match_operator 0 "bt_comparison_operator"
+                       [(and:SI
+                          (lshiftrt:SI
+                            (match_operand:SI 1 "register_operand" "r")
+                            (match_operand:QI 2 "register_operand" "r"))
+                          (const_int 1))
+                        (const_int 0)])
+                     (label_ref (match_operand 3 "" ""))
+                     (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:SI
+           (match_dup 1)
+           (const_int 1)
+           (match_dup 2))
+         (const_int 0)))
+   (set (pc)
+       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+                     (label_ref (match_dup 3))
+                     (pc)))]
+{
+  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
+
+  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+})
+
+;; avoid useless masking of bit offset operand
+(define_insn_and_split "*jcc_btsi_mask_1"
+  [(set (pc)
+       (if_then_else
+         (match_operator 0 "bt_comparison_operator"
+           [(and:SI
+              (lshiftrt:SI
+                (match_operand:SI 1 "register_operand" "r")
+                (subreg:QI
+                  (and:SI
+                    (match_operand:SI 2 "register_operand" "r")
+                    (match_operand:SI 3 "const_int_operand" "n")) 0))
+              (const_int 1))
+            (const_int 0)])
+         (label_ref (match_operand 4 "" ""))
+         (pc)))
+   (clobber (reg:CC FLAGS_REG))]
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & 0x1f) == 0x1f"
+  "#"
+  "&& 1"
+  [(set (reg:CCC FLAGS_REG)
+       (compare:CCC
+         (zero_extract:SI
+           (match_dup 1)
+           (const_int 1)
+           (match_dup 2))
+         (const_int 0)))
+   (set (pc)
+       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
+                     (label_ref (match_dup 4))
+                     (pc)))]
+  "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
+
 ;; Define combination compare-and-branch fp compare instructions to use
 ;; during early optimization.  Splitting the operation apart early makes
 ;; for bad code when we want to reverse the operation.
   [(set (pc)
        (if_then_else (match_operator 0 "comparison_operator"
                        [(match_operand 1 "register_operand" "f")
-                        (match_operand 2 "const0_operand" "X")])
+                        (match_operand 2 "const0_operand" "")])
          (label_ref (match_operand 3 "" ""))
          (pc)))
    (clobber (reg:CCFP FPSR_REG))
    (clobber (reg:CCFP FLAGS_REG))
    (clobber (match_scratch:HI 5 "=a,a"))]
   "X87_FLOAT_MODE_P (GET_MODE (operands[3]))
-   && TARGET_USE_<MODE>MODE_FIOP
+   && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))
    && GET_MODE (operands[1]) == GET_MODE (operands[3])
    && !ix86_use_fcomi_compare (swap_condition (GET_CODE (operands[0])))
    && ix86_fp_compare_mode (swap_condition (GET_CODE (operands[0]))) == CCFPmode
   "")
 
 (define_insn "*indirect_jump"
-  [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))]
-  "!TARGET_64BIT"
-  "jmp\t%A0"
-  [(set_attr "type" "ibr")
-   (set_attr "length_immediate" "0")])
-
-(define_insn "*indirect_jump_rtx64"
-  [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm"))]
-  "TARGET_64BIT"
+  [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))]
+  ""
   "jmp\t%A0"
   [(set_attr "type" "ibr")
    (set_attr "length_immediate" "0")])
 })
 
 (define_insn "*tablejump_1"
-  [(set (pc) (match_operand:SI 0 "nonimmediate_operand" "rm"))
-   (use (label_ref (match_operand 1 "" "")))]
-  "!TARGET_64BIT"
-  "jmp\t%A0"
-  [(set_attr "type" "ibr")
-   (set_attr "length_immediate" "0")])
-
-(define_insn "*tablejump_1_rtx64"
-  [(set (pc) (match_operand:DI 0 "nonimmediate_operand" "rm"))
+  [(set (pc) (match_operand:P 0 "nonimmediate_operand" "rm"))
    (use (label_ref (match_operand 1 "" "")))]
-  "TARGET_64BIT"
+  ""
   "jmp\t%A0"
   [(set_attr "type" "ibr")
    (set_attr "length_immediate" "0")])
 }
   [(set_attr "type" "call")])
 
+(define_insn "*call_1_rex64_ms_sysv"
+  [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rsm"))
+        (match_operand 1 "" ""))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[0], Pmode))
+    return "call\t%P0";
+  return "call\t%A0";
+}
+  [(set_attr "type" "call")])
+
 (define_insn "*call_1_rex64_large"
   [(call (mem:QI (match_operand:DI 0 "call_insn_operand" "rm"))
         (match_operand 1 "" ""))]
   [(call (mem:QI (reg:DI R11_REG))
         (match_operand 0 "" ""))]
   "SIBLING_CALL_P (insn) && TARGET_64BIT"
-  "jmp\t*%%r11"
+  "jmp\t{*%%}r11"
   [(set_attr "type" "call")])
 
 
 
   ix86_expand_call ((TARGET_FLOAT_RETURNS_IN_80387
                     ? gen_rtx_REG (XCmode, FIRST_FLOAT_REG) : NULL),
-                   operands[0], const0_rtx, GEN_INT (SSE_REGPARM_MAX - 1),
+                   operands[0], const0_rtx,
+                   GEN_INT ((DEFAULT_ABI == SYSV_ABI ? X86_64_SSE_REGPARM_MAX
+                                                     : X64_SSE_REGPARM_MAX)
+                            - 1),
                    NULL, 0);
 
   for (i = 0; i < XVECLEN (operands[2], 0); i++)
   [(return)]
   "ix86_can_use_return_insn_p ()"
 {
-  if (current_function_pops_args)
+  if (crtl->args.pops_args)
     {
-      rtx popc = GEN_INT (current_function_pops_args);
+      rtx popc = GEN_INT (crtl->args.pops_args);
       emit_jump_insn (gen_return_pop_internal (popc));
       DONE;
     }
   [(return)
    (unspec [(const_int 0)] UNSPEC_REP)]
   "reload_completed"
-  "rep{\;| }ret"
+  "rep\;ret"
   [(set_attr "length" "1")
    (set_attr "length_immediate" "0")
    (set_attr "prefix_rep" "1")
   [(set (match_operand:DI 0 "register_operand" "=r")
        (unspec:DI [(const_int 0)] UNSPEC_SET_GOT))]
   "TARGET_64BIT"
-  "lea{q}\t_GLOBAL_OFFSET_TABLE_(%%rip), %0"
+  "lea{q}\t{_GLOBAL_OFFSET_TABLE_(%%rip), %0|%0, _GLOBAL_OFFSET_TABLE_[rip]}"
   [(set_attr "type" "lea")
    (set_attr "length" "6")])
 
   [(set (match_operand:DI 0 "register_operand" "=r")
        (unspec:DI [(match_operand:DI 1 "" "")] UNSPEC_SET_RIP))]
   "TARGET_64BIT"
-  "lea{q}\t%l1(%%rip), %0"
+  "lea{q}\t{%l1(%%rip), %0|%0, %l1[rip]}"
   [(set_attr "type" "lea")
    (set_attr "length" "6")])
 
   [(set (match_operand:DI 0 "register_operand" "=r")
        (unspec:DI [(match_operand:DI 1 "" "")] UNSPEC_SET_GOT_OFFSET))]
   "TARGET_64BIT"
-  "movabs{q}\t$_GLOBAL_OFFSET_TABLE_-%l1, %0"
+  "movabs{q}\t{$_GLOBAL_OFFSET_TABLE_-%l1, %0|%0, OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-%l1}"
   [(set_attr "type" "imov")
    (set_attr "length" "11")])
 
   DONE;
 })
 
-(define_insn_and_split "eh_return_si"
-  [(set (pc)
-        (unspec [(match_operand:SI 0 "register_operand" "c")]
-                UNSPEC_EH_RETURN))]
-  "!TARGET_64BIT"
-  "#"
-  "reload_completed"
-  [(const_int 0)]
-  "ix86_expand_epilogue (2); DONE;")
-
-(define_insn_and_split "eh_return_di"
+(define_insn_and_split "eh_return_<mode>"
   [(set (pc)
-        (unspec [(match_operand:DI 0 "register_operand" "c")]
+        (unspec [(match_operand:P 0 "register_operand" "c")]
                 UNSPEC_EH_RETURN))]
-  "TARGET_64BIT"
+  ""
   "#"
   "reload_completed"
   [(const_int 0)]
 (define_expand "ffs_cmove"
   [(set (match_dup 2) (const_int -1))
    (parallel [(set (reg:CCZ FLAGS_REG)
-                  (compare:CCZ (match_operand:SI 1 "register_operand" "")
+                  (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "")
                                (const_int 0)))
-             (set (match_operand:SI 0 "nonimmediate_operand" "")
+             (set (match_operand:SI 0 "register_operand" "")
                   (ctz:SI (match_dup 1)))])
    (set (match_dup 0) (if_then_else:SI
                        (eq (reg:CCZ FLAGS_REG) (const_int 0))
   "operands[2] = gen_reg_rtx (SImode);")
 
 (define_insn_and_split "*ffs_no_cmove"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r")
        (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
    (clobber (match_scratch:SI 2 "=&q"))
    (clobber (reg:CC FLAGS_REG))]
 (define_expand "ffsdi2"
   [(set (match_dup 2) (const_int -1))
    (parallel [(set (reg:CCZ FLAGS_REG)
-                  (compare:CCZ (match_operand:DI 1 "register_operand" "")
+                  (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "")
                                (const_int 0)))
-             (set (match_operand:DI 0 "nonimmediate_operand" "")
+             (set (match_operand:DI 0 "register_operand" "")
                   (ctz:DI (match_dup 1)))])
    (set (match_dup 0) (if_then_else:DI
                        (eq (reg:CCZ FLAGS_REG) (const_int 0))
 
 (define_insn "clzsi2_abm"
   [(set (match_operand:SI 0 "register_operand" "=r")
-        (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+        (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_ABM"
   "lzcnt{l}\t{%1, %0|%0, %1}"
   [(set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")])
 
-(define_insn "popcountsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+(define_insn "popcount<mode>2"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+       (popcount:SWI248
+         (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_POPCNT"
-  "popcnt{l}\t{%1, %0|%0, %1}"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
   [(set_attr "prefix_rep" "1")
    (set_attr "type" "bitmanip")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*popcountsi2_cmp"
+(define_insn "*popcount<mode>2_cmp"
   [(set (reg FLAGS_REG)
        (compare
-         (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+         (popcount:SWI248
+           (match_operand:SWI248 1 "nonimmediate_operand" "rm"))
          (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-       (popcount:SI (match_dup 1)))]
+   (set (match_operand:SWI248 0 "register_operand" "=r")
+       (popcount:SWI248 (match_dup 1)))]
   "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-  "popcnt{l}\t{%1, %0|%0, %1}"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
   [(set_attr "prefix_rep" "1")
    (set_attr "type" "bitmanip")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*popcountsi2_cmp_zext"
   [(set (reg FLAGS_REG)
    (set (match_operand:DI 0 "register_operand" "=r")
         (zero_extend:DI(popcount:SI (match_dup 1))))]
   "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-  "popcnt{l}\t{%1, %0|%0, %1}"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %0|%0, %1}";
+#else
+  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
   [(set_attr "prefix_rep" "1")
    (set_attr "type" "bitmanip")
    (set_attr "mode" "SI")])
   [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
        (bswap:HI (match_dup 0)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_XCHGB || optimize_size"
+  "TARGET_USE_XCHGB || optimize_function_for_size_p (cfun)"
   "@
     xchg{b}\t{%h0, %b0|%b0, %h0}
     rol{w}\t{$8, %0|%0, 8}"
 
 (define_insn "clzdi2_abm"
   [(set (match_operand:DI 0 "register_operand" "=r")
-       (clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+       (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && TARGET_ABM"
   "lzcnt{q}\t{%1, %0|%0, %1}"
   [(set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")])
 
-(define_insn "popcountdi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_POPCNT"
-  "popcnt{q}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "DI")])
-
-(define_insn "*popcountdi2_cmp"
-  [(set (reg FLAGS_REG)
-       (compare
-         (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
-         (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-       (popcount:DI (match_dup 1)))]
-  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-  "popcnt{q}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "DI")])
-
 (define_expand "clzhi2"
   [(parallel
      [(set (match_operand:HI 0 "register_operand" "")
 
 (define_insn "clzhi2_abm"
   [(set (match_operand:HI 0 "register_operand" "=r")
-       (clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))
+       (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_ABM"
   "lzcnt{w}\t{%1, %0|%0, %1}"
   [(set_attr "prefix_0f" "1")
    (set_attr "mode" "HI")])
 
-(define_insn "popcounthi2"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-       (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_POPCNT"
-  "popcnt{w}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "HI")])
-
-(define_insn "*popcounthi2_cmp"
-  [(set (reg FLAGS_REG)
-        (compare
-          (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))
-          (const_int 0)))
-   (set (match_operand:HI 0 "register_operand" "=r")
-        (popcount:HI (match_dup 1)))]
-  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
-  "popcnt{w}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "HI")])
-
 (define_expand "paritydi2"
   [(set (match_operand:DI 0 "register_operand" "")
-       (parity:DI (match_operand:DI 1 "nonimmediate_operand" "")))]
+       (parity:DI (match_operand:DI 1 "register_operand" "")))]
   "! TARGET_POPCNT"
 {
   rtx scratch = gen_reg_rtx (QImode);
 
 (define_insn_and_split "paritydi2_cmp"
   [(set (reg:CC FLAGS_REG)
-       (parity:CC (match_operand:DI 3 "nonimmediate_operand" "0,m")))
-   (clobber (match_scratch:DI 0 "=r,X"))
-   (clobber (match_scratch:SI 1 "=r,r"))
-   (clobber (match_scratch:HI 2 "=Q,Q"))]
+       (parity:CC (match_operand:DI 3 "register_operand" "0")))
+   (clobber (match_scratch:DI 0 "=r"))
+   (clobber (match_scratch:SI 1 "=&r"))
+   (clobber (match_scratch:HI 2 "=Q"))]
   "! TARGET_POPCNT"
   "#"
   "&& reload_completed"
 {
   operands[4] = gen_lowpart (SImode, operands[3]);
 
-  if (MEM_P (operands[3]))
-    emit_move_insn (operands[1], gen_highpart (SImode, operands[3]));
-  else if (! TARGET_64BIT)
-    operands[1] = gen_highpart (SImode, operands[3]);
-  else
+  if (TARGET_64BIT)
     {
       emit_move_insn (operands[1], gen_lowpart (SImode, operands[3]));
       emit_insn (gen_lshrdi3 (operands[3], operands[3], GEN_INT (32)));
     }
+  else
+    operands[1] = gen_highpart (SImode, operands[3]);
 })
 
 (define_expand "paritysi2"
   [(set (match_operand:SI 0 "register_operand" "")
-       (parity:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
+       (parity:SI (match_operand:SI 1 "register_operand" "")))]
   "! TARGET_POPCNT"
 {
   rtx scratch = gen_reg_rtx (QImode);
 
 (define_insn_and_split "paritysi2_cmp"
   [(set (reg:CC FLAGS_REG)
-       (parity:CC (match_operand:SI 2 "nonimmediate_operand" "0,m")))
-   (clobber (match_scratch:SI 0 "=r,X"))
-   (clobber (match_scratch:HI 1 "=Q,Q"))]
+       (parity:CC (match_operand:SI 2 "register_operand" "0")))
+   (clobber (match_scratch:SI 0 "=r"))
+   (clobber (match_scratch:HI 1 "=&Q"))]
   "! TARGET_POPCNT"
   "#"
   "&& reload_completed"
 {
   operands[3] = gen_lowpart (HImode, operands[2]);
 
-  if (MEM_P (operands[2]))
-    emit_move_insn (operands[1], gen_highpart (HImode, operands[2]));
-  else
-    {
-      emit_move_insn (operands[1], gen_lowpart (HImode, operands[2]));
-      emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16)));
-    }
+  emit_move_insn (operands[1], gen_lowpart (HImode, operands[2]));
+  emit_insn (gen_lshrsi3 (operands[2], operands[2], GEN_INT (16)));
 })
 
 (define_insn "*parityhi2_cmp"
    (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
              UNSPEC_TLS_GD)]
   "TARGET_64BIT"
-  ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|%%rdi, %a1@TLSGD[%%rip]}\;.word\t0x6666\;rex64\;call\t%P2"
+  ".byte\t0x66\;lea{q}\t{%a1@TLSGD(%%rip), %%rdi|rdi, %a1@TLSGD[rip]}\;.word\t0x6666\;rex64\;call\t%P2"
   [(set_attr "type" "multi")
    (set_attr "length" "16")])
 
                 (match_operand:DI 2 "" "")))
    (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)]
   "TARGET_64BIT"
-  "lea{q}\t{%&@TLSLD(%%rip), %%rdi|%%rdi, %&@TLSLD[%%rip]}\;call\t%P1"
+  "lea{q}\t{%&@TLSLD(%%rip), %%rdi|rdi, %&@TLSLD[rip]}\;call\t%P1"
   [(set_attr "type" "multi")
    (set_attr "length" "12")])
 
   [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI [(const_int 0)] UNSPEC_TP))]
   "!TARGET_64BIT"
-  "mov{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}"
+  "mov{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}"
   [(set_attr "type" "imov")
    (set_attr "modrm" "0")
    (set_attr "length" "7")
                 (match_operand:SI 1 "register_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT"
-  "add{l}\t{%%gs:0, %0|%0, DWORD PTR %%gs:0}"
+  "add{l}\t{%%gs:0, %0|%0, DWORD PTR gs:0}"
   [(set_attr "type" "alu")
    (set_attr "modrm" "0")
    (set_attr "length" "7")
   [(set (match_operand:DI 0 "register_operand" "=r")
        (unspec:DI [(const_int 0)] UNSPEC_TP))]
   "TARGET_64BIT"
-  "mov{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}"
+  "mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}"
   [(set_attr "type" "imov")
    (set_attr "modrm" "0")
    (set_attr "length" "7")
                 (match_operand:DI 1 "register_operand" "0")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "add{q}\t{%%fs:0, %0|%0, QWORD PTR %%fs:0}"
+  "add{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}"
   [(set_attr "type" "alu")
    (set_attr "modrm" "0")
    (set_attr "length" "7")
        (unspec:DI [(match_operand:DI 1 "tls_symbolic_operand" "")]
                   UNSPEC_TLSDESC))]
   "TARGET_64BIT && TARGET_GNU2_TLS"
-  "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[%%rip]}"
+  "lea{q}\t{%a1@TLSDESC(%%rip), %0|%0, %a1@TLSDESC[rip]}"
   [(set_attr "type" "lea")
    (set_attr "mode" "DI")
    (set_attr "length" "7")
 ;; Gcc is slightly more smart about handling normal two address instructions
 ;; so use special patterns for add and mull.
 
-(define_insn "*fop_sf_comm_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,x")
-       (match_operator:SF 3 "binary_fp_operator"
-                       [(match_operand:SF 1 "nonimmediate_operand" "%0,0")
-                        (match_operand:SF 2 "nonimmediate_operand" "fm,xm")]))]
-  "TARGET_MIX_SSE_I387
+(define_insn "*fop_<mode>_comm_mixed_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "nonimmediate_operand" "%0,x")
+          (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
    && COMMUTATIVE_ARITH_P (operands[3])
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
        (if_then_else (eq_attr "alternative" "1")
-          (if_then_else (match_operand:SF 3 "mult_operator" "")
+          (if_then_else (match_operand:MODEF 3 "mult_operator" "")
              (const_string "ssemul")
              (const_string "sseadd"))
-          (if_then_else (match_operand:SF 3 "mult_operator" "")
+          (if_then_else (match_operand:MODEF 3 "mult_operator" "")
              (const_string "fmul")
              (const_string "fop"))))
-   (set_attr "mode" "SF")])
+   (set_attr "prefix" "orig,maybe_vex")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_sf_comm_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (match_operator:SF 3 "binary_fp_operator"
-                       [(match_operand:SF 1 "nonimmediate_operand" "%0")
-                        (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE_MATH
+(define_insn "*fop_<mode>_comm_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,x")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "nonimmediate_operand" "%0,0")
+          (match_operand:MODEF 2 "nonimmediate_operand" "fm,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+       (if_then_else (eq_attr "alternative" "1")
+          (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+             (const_string "ssemul")
+             (const_string "sseadd"))
+          (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+             (const_string "fmul")
+             (const_string "fop"))))
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "nonimmediate_operand" "%x")
+          (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
    && COMMUTATIVE_ARITH_P (operands[3])
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (if_then_else (match_operand:SF 3 "mult_operator" "")
+        (if_then_else (match_operand:MODEF 3 "mult_operator" "")
           (const_string "ssemul")
           (const_string "sseadd")))
-   (set_attr "mode" "SF")])
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*fop_<mode>_comm_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+          (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && COMMUTATIVE_ARITH_P (operands[3])
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "* return output_387_binary_op (insn, operands);"
+  [(set (attr "type")
+        (if_then_else (match_operand:MODEF 3 "mult_operator" "")
+          (const_string "ssemul")
+          (const_string "sseadd")))
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_sf_comm_i387"
-  [(set (match_operand:SF 0 "register_operand" "=f")
-       (match_operator:SF 3 "binary_fp_operator"
-                       [(match_operand:SF 1 "nonimmediate_operand" "%0")
-                        (match_operand:SF 2 "nonimmediate_operand" "fm")]))]
+(define_insn "*fop_<mode>_comm_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "nonimmediate_operand" "%0")
+          (match_operand:MODEF 2 "nonimmediate_operand" "fm")]))]
   "TARGET_80387
    && COMMUTATIVE_ARITH_P (operands[3])
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-       (if_then_else (match_operand:SF 3 "mult_operator" "")
+       (if_then_else (match_operand:MODEF 3 "mult_operator" "")
           (const_string "fmul")
           (const_string "fop")))
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_sf_1_mixed"
-  [(set (match_operand:SF 0 "register_operand" "=f,f,x")
-       (match_operator:SF 3 "binary_fp_operator"
-                       [(match_operand:SF 1 "nonimmediate_operand" "0,fm,0")
-                        (match_operand:SF 2 "nonimmediate_operand" "fm,0,xm")]))]
-  "TARGET_MIX_SSE_I387
+(define_insn "*fop_<mode>_1_mixed_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,x")
+          (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
    && !COMMUTATIVE_ARITH_P (operands[3])
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
         (cond [(and (eq_attr "alternative" "2")
-                   (match_operand:SF 3 "mult_operator" ""))
+                   (match_operand:MODEF 3 "mult_operator" ""))
                  (const_string "ssemul")
               (and (eq_attr "alternative" "2")
-                   (match_operand:SF 3 "div_operator" ""))
+                   (match_operand:MODEF 3 "div_operator" ""))
                  (const_string "ssediv")
               (eq_attr "alternative" "2")
                  (const_string "sseadd")
-              (match_operand:SF 3 "mult_operator" "")
+              (match_operand:MODEF 3 "mult_operator" "")
                  (const_string "fmul")
-               (match_operand:SF 3 "div_operator" "")
+               (match_operand:MODEF 3 "div_operator" "")
                  (const_string "fdiv")
               ]
               (const_string "fop")))
-   (set_attr "mode" "SF")])
-
-(define_insn "*rcpsf2_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
-                  UNSPEC_RCP))]
-  "TARGET_SSE_MATH"
-  "rcpss\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sse")
-   (set_attr "mode" "SF")])
-
-(define_insn "*fop_sf_1_sse"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (match_operator:SF 3 "binary_fp_operator"
-                       [(match_operand:SF 1 "register_operand" "0")
-                        (match_operand:SF 2 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE_MATH
-   && !COMMUTATIVE_ARITH_P (operands[3])"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:SF 3 "mult_operator" "")
-                 (const_string "ssemul")
-              (match_operand:SF 3 "div_operator" "")
-                 (const_string "ssediv")
-              ]
-              (const_string "sseadd")))
-   (set_attr "mode" "SF")])
+   (set_attr "prefix" "orig,orig,maybe_vex")
+   (set_attr "mode" "<MODE>")])
 
-;; This pattern is not fully shadowed by the pattern above.
-(define_insn "*fop_sf_1_i387"
-  [(set (match_operand:SF 0 "register_operand" "=f,f")
-       (match_operator:SF 3 "binary_fp_operator"
-                       [(match_operand:SF 1 "nonimmediate_operand" "0,fm")
-                        (match_operand:SF 2 "nonimmediate_operand" "fm,0")]))]
-  "TARGET_80387 && !TARGET_SSE_MATH
+(define_insn "*fop_<mode>_1_mixed"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f,x")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm,0")
+          (match_operand:MODEF 2 "nonimmediate_operand" "fm,0,xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_MIX_SSE_I387
    && !COMMUTATIVE_ARITH_P (operands[3])
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:SF 3 "mult_operator" "")
-                 (const_string "fmul")
-               (match_operand:SF 3 "div_operator" "")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "mode" "SF")])
-
-;; ??? Add SSE splitters for these!
-(define_insn "*fop_sf_2<mode>_i387"
-  [(set (match_operand:SF 0 "register_operand" "=f,f")
-       (match_operator:SF 3 "binary_fp_operator"
-         [(float:SF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
-          (match_operand:SF 2 "register_operand" "0,0")]))]
-  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP && !TARGET_SSE_MATH"
-  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:SF 3 "mult_operator" "")
-                 (const_string "fmul")
-               (match_operand:SF 3 "div_operator" "")
-                 (const_string "fdiv")
-              ]
-              (const_string "fop")))
-   (set_attr "fp_int_src" "true")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "*fop_sf_3<mode>_i387"
-  [(set (match_operand:SF 0 "register_operand" "=f,f")
-       (match_operator:SF 3 "binary_fp_operator"
-         [(match_operand:SF 1 "register_operand" "0,0")
-          (float:SF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
-  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP && !TARGET_SSE_MATH"
-  "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (cond [(match_operand:SF 3 "mult_operator" "")
+        (cond [(and (eq_attr "alternative" "2")
+                   (match_operand:MODEF 3 "mult_operator" ""))
+                 (const_string "ssemul")
+              (and (eq_attr "alternative" "2")
+                   (match_operand:MODEF 3 "div_operator" ""))
+                 (const_string "ssediv")
+              (eq_attr "alternative" "2")
+                 (const_string "sseadd")
+              (match_operand:MODEF 3 "mult_operator" "")
                  (const_string "fmul")
-               (match_operand:SF 3 "div_operator" "")
+               (match_operand:MODEF 3 "div_operator" "")
                  (const_string "fdiv")
               ]
               (const_string "fop")))
-   (set_attr "fp_int_src" "true")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_df_comm_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f,x")
-       (match_operator:DF 3 "binary_fp_operator"
-         [(match_operand:DF 1 "nonimmediate_operand" "%0,0")
-          (match_operand:DF 2 "nonimmediate_operand" "fm,xm")]))]
-  "TARGET_SSE2 && TARGET_MIX_SSE_I387
-   && COMMUTATIVE_ARITH_P (operands[3])
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-       (if_then_else (eq_attr "alternative" "1")
-          (if_then_else (match_operand:DF 3 "mult_operator" "")
-             (const_string "ssemul")
-             (const_string "sseadd"))
-          (if_then_else (match_operand:DF 3 "mult_operator" "")
-             (const_string "fmul")
-             (const_string "fop"))))
-   (set_attr "mode" "DF")])
-
-(define_insn "*fop_df_comm_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (match_operator:DF 3 "binary_fp_operator"
-         [(match_operand:DF 1 "nonimmediate_operand" "%0")
-          (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
-   && COMMUTATIVE_ARITH_P (operands[3])
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-        (if_then_else (match_operand:DF 3 "mult_operator" "")
-          (const_string "ssemul")
-          (const_string "sseadd")))
-   (set_attr "mode" "DF")])
-
-(define_insn "*fop_df_comm_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f")
-       (match_operator:DF 3 "binary_fp_operator"
-                       [(match_operand:DF 1 "nonimmediate_operand" "%0")
-                        (match_operand:DF 2 "nonimmediate_operand" "fm")]))]
-  "TARGET_80387
-   && COMMUTATIVE_ARITH_P (operands[3])
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "* return output_387_binary_op (insn, operands);"
-  [(set (attr "type")
-       (if_then_else (match_operand:DF 3 "mult_operator" "")
-          (const_string "fmul")
-          (const_string "fop")))
-   (set_attr "mode" "DF")])
+(define_insn "*rcpsf2_sse"
+  [(set (match_operand:SF 0 "register_operand" "=x")
+       (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
+                  UNSPEC_RCP))]
+  "TARGET_SSE_MATH"
+  "%vrcpss\t{%1, %d0|%d0, %1}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "SF")])
 
-(define_insn "*fop_df_1_mixed"
-  [(set (match_operand:DF 0 "register_operand" "=f,f,x")
-       (match_operator:DF 3 "binary_fp_operator"
-         [(match_operand:DF 1 "nonimmediate_operand" "0,fm,0")
-          (match_operand:DF 2 "nonimmediate_operand" "fm,0,xm")]))]
-  "TARGET_SSE2 && TARGET_SSE_MATH && TARGET_MIX_SSE_I387
-   && !COMMUTATIVE_ARITH_P (operands[3])
-   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+(define_insn "*fop_<mode>_1_avx"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "register_operand" "x")
+          (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
+   && !COMMUTATIVE_ARITH_P (operands[3])"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(and (eq_attr "alternative" "2")
-                   (match_operand:DF 3 "mult_operator" ""))
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
                  (const_string "ssemul")
-              (and (eq_attr "alternative" "2")
-                   (match_operand:DF 3 "div_operator" ""))
+              (match_operand:MODEF 3 "div_operator" "")
                  (const_string "ssediv")
-              (eq_attr "alternative" "2")
-                 (const_string "sseadd")
-              (match_operand:DF 3 "mult_operator" "")
-                 (const_string "fmul")
-               (match_operand:DF 3 "div_operator" "")
-                 (const_string "fdiv")
               ]
-              (const_string "fop")))
-   (set_attr "mode" "DF")])
+              (const_string "sseadd")))
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_df_1_sse"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (match_operator:DF 3 "binary_fp_operator"
-         [(match_operand:DF 1 "register_operand" "0")
-          (match_operand:DF 2 "nonimmediate_operand" "xm")]))]
-  "TARGET_SSE2 && TARGET_SSE_MATH
+(define_insn "*fop_<mode>_1_sse"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "register_operand" "0")
+          (match_operand:MODEF 2 "nonimmediate_operand" "xm")]))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
    && !COMMUTATIVE_ARITH_P (operands[3])"
   "* return output_387_binary_op (insn, operands);"
-  [(set_attr "mode" "DF")
-   (set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator" "")
+  [(set (attr "type")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
                  (const_string "ssemul")
-              (match_operand:DF 3 "div_operator" "")
+              (match_operand:MODEF 3 "div_operator" "")
                  (const_string "ssediv")
               ]
-              (const_string "sseadd")))])
+              (const_string "sseadd")))
+   (set_attr "mode" "<MODE>")])
 
 ;; This pattern is not fully shadowed by the pattern above.
-(define_insn "*fop_df_1_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (match_operator:DF 3 "binary_fp_operator"
-                       [(match_operand:DF 1 "nonimmediate_operand" "0,fm")
-                        (match_operand:DF 2 "nonimmediate_operand" "fm,0")]))]
-  "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
+(define_insn "*fop_<mode>_1_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "nonimmediate_operand" "0,fm")
+          (match_operand:MODEF 2 "nonimmediate_operand" "fm,0")]))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
    && !COMMUTATIVE_ARITH_P (operands[3])
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "* return output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator" "")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
                  (const_string "fmul")
-               (match_operand:DF 3 "div_operator" "")
+               (match_operand:MODEF 3 "div_operator" "")
                  (const_string "fdiv")
               ]
               (const_string "fop")))
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
 ;; ??? Add SSE splitters for these!
-(define_insn "*fop_df_2<mode>_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (match_operator:DF 3 "binary_fp_operator"
-          [(float:DF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
-           (match_operand:DF 2 "register_operand" "0,0")]))]
-  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
-   && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+(define_insn "*fop_<MODEF:mode>_2_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(float:MODEF
+            (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+          (match_operand:MODEF 2 "register_operand" "0,0")]))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+   && (TARGET_USE_<X87MODEI12:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator" "")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
                  (const_string "fmul")
-               (match_operand:DF 3 "div_operator" "")
+               (match_operand:MODEF 3 "div_operator" "")
                  (const_string "fdiv")
               ]
               (const_string "fop")))
    (set_attr "fp_int_src" "true")
-   (set_attr "mode" "<MODE>")])
+   (set_attr "mode" "<X87MODEI12:MODE>")])
 
-(define_insn "*fop_df_3<mode>_i387"
-  [(set (match_operand:DF 0 "register_operand" "=f,f")
-       (match_operator:DF 3 "binary_fp_operator"
-          [(match_operand:DF 1 "register_operand" "0,0")
-           (float:DF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
-  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP
-   && !(TARGET_SSE2 && TARGET_SSE_MATH)"
+(define_insn "*fop_<MODEF:mode>_3_i387"
+  [(set (match_operand:MODEF 0 "register_operand" "=f,f")
+       (match_operator:MODEF 3 "binary_fp_operator"
+         [(match_operand:MODEF 1 "register_operand" "0,0")
+          (float:MODEF
+            (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && !(SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH)
+   && (TARGET_USE_<X87MODEI12:MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type")
-        (cond [(match_operand:DF 3 "mult_operator" "")
+        (cond [(match_operand:MODEF 3 "mult_operator" "")
                  (const_string "fmul")
-               (match_operand:DF 3 "div_operator" "")
+               (match_operand:MODEF 3 "div_operator" "")
                  (const_string "fdiv")
               ]
               (const_string "fop")))
 (define_insn "*fop_df_4_i387"
   [(set (match_operand:DF 0 "register_operand" "=f,f")
        (match_operator:DF 3 "binary_fp_operator"
-          [(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
+          [(float_extend:DF
+            (match_operand:SF 1 "nonimmediate_operand" "fm,0"))
            (match_operand:DF 2 "register_operand" "0,f")]))]
   "TARGET_80387 && !(TARGET_SSE2 && TARGET_SSE_MATH)
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
               (const_string "fop")))
    (set_attr "mode" "XF")])
 
-(define_insn "*fop_xf_2<mode>_i387"
+(define_insn "*fop_xf_2_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
        (match_operator:XF 3 "binary_fp_operator"
-          [(float:XF (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
-           (match_operand:XF 2 "register_operand" "0,0")]))]
-  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP"
+         [(float:XF
+            (match_operand:X87MODEI12 1 "nonimmediate_operand" "m,?r"))
+          (match_operand:XF 2 "register_operand" "0,0")]))]
+  "TARGET_80387 && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type")
         (cond [(match_operand:XF 3 "mult_operator" "")
    (set_attr "fp_int_src" "true")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*fop_xf_3<mode>_i387"
+(define_insn "*fop_xf_3_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
        (match_operator:XF 3 "binary_fp_operator"
          [(match_operand:XF 1 "register_operand" "0,0")
-          (float:XF (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
-  "TARGET_80387 && TARGET_USE_<MODE>MODE_FIOP"
+          (float:XF
+            (match_operand:X87MODEI12 2 "nonimmediate_operand" "m,?r"))]))]
+  "TARGET_80387 && (TARGET_USE_<MODE>MODE_FIOP || optimize_function_for_size_p (cfun))"
   "* return which_alternative ? \"#\" : output_387_binary_op (insn, operands);"
   [(set (attr "type")
         (cond [(match_operand:XF 3 "mult_operator" "")
                  (const_string "fdiv")
               ]
               (const_string "fop")))
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*fop_xf_5_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
                  (const_string "fdiv")
               ]
               (const_string "fop")))
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*fop_xf_6_i387"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
                  (const_string "fdiv")
               ]
               (const_string "fop")))
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
 (define_split
   [(set (match_operand 0 "register_operand" "")
        (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
                   UNSPEC_RSQRT))]
   "TARGET_SSE_MATH"
-  "rsqrtss\t{%1, %0|%0, %1}"
+  "%vrsqrtss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
 
 (define_expand "rsqrtsf2"
   [(set (match_operand:SF 0 "register_operand" "")
        (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "")]
                   UNSPEC_RSQRT))]
-  "TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
-   && flag_finite_math_only && !flag_trapping_math
-   && flag_unsafe_math_optimizations"
+  "TARGET_SSE_MATH"
 {
   ix86_emit_swsqrtsf (operands[0], operands[1], SFmode, 1);
   DONE;
        (sqrt:MODEF
          (match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "sqrts<ssemodefsuffix>\t{%1, %0|%0, %1}"
+  "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "*")
    (set_attr "amdfam10_decode" "*")])
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
 {
   if (<MODE>mode == SFmode
-      && TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
+      && TARGET_SSE_MATH && TARGET_RECIP && !optimize_function_for_size_p (cfun)
       && flag_finite_math_only && !flag_trapping_math
       && flag_unsafe_math_optimizations)
     {
 
 (define_expand "fmodxf3"
   [(use (match_operand:XF 0 "register_operand" ""))
-   (use (match_operand:XF 1 "register_operand" ""))
-   (use (match_operand:XF 2 "register_operand" ""))]
+   (use (match_operand:XF 1 "general_operand" ""))
+   (use (match_operand:XF 2 "general_operand" ""))]
   "TARGET_USE_FANCY_MATH_387"
 {
   rtx label = gen_label_rtx ();
 
-  rtx op2;
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
 
-  if (rtx_equal_p (operands[1], operands[2]))
-    {
-      op2 = gen_reg_rtx (XFmode);
-      emit_move_insn (op2, operands[2]);
-    }
-  else
-    op2 = operands[2];
+  emit_move_insn (op2, operands[2]);
+  emit_move_insn (op1, operands[1]);
 
   emit_label (label);
-  emit_insn (gen_fpremxf4_i387 (operands[1], op2, operands[1], op2));
+  emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
   LABEL_NUSES (label) = 1;
 
-  emit_move_insn (operands[0], operands[1]);
+  emit_move_insn (operands[0], op1);
   DONE;
 })
 
   rtx op1 = gen_reg_rtx (XFmode);
   rtx op2 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
 
   emit_label (label);
   emit_insn (gen_fpremxf4_i387 (op1, op2, op1, op2));
 
 (define_expand "remainderxf3"
   [(use (match_operand:XF 0 "register_operand" ""))
-   (use (match_operand:XF 1 "register_operand" ""))
-   (use (match_operand:XF 2 "register_operand" ""))]
+   (use (match_operand:XF 1 "general_operand" ""))
+   (use (match_operand:XF 2 "general_operand" ""))]
   "TARGET_USE_FANCY_MATH_387"
 {
   rtx label = gen_label_rtx ();
 
-  rtx op2;
+  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op2 = gen_reg_rtx (XFmode);
 
-  if (rtx_equal_p (operands[1], operands[2]))
-    {
-      op2 = gen_reg_rtx (XFmode);
-      emit_move_insn (op2, operands[2]);
-    }
-  else
-    op2 = operands[2];
+  emit_move_insn (op2, operands[2]);
+  emit_move_insn (op1, operands[1]);
 
   emit_label (label);
-  emit_insn (gen_fprem1xf4_i387 (operands[1], op2, operands[1], op2));
+  emit_insn (gen_fprem1xf4_i387 (op1, op2, op1, op2));
   ix86_emit_fp_unordered_jump (label);
   LABEL_NUSES (label) = 1;
 
-  emit_move_insn (operands[0], operands[1]);
+  emit_move_insn (operands[0], op1);
   DONE;
 })
 
   rtx op1 = gen_reg_rtx (XFmode);
   rtx op2 = gen_reg_rtx (XFmode);
 
-  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
+  emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
 
   emit_label (label);
 
                              UNSPEC_FPATAN))
              (clobber (match_scratch:XF 6 ""))])]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
   int i;
 
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   for (i = 2; i < 6; i++)
     operands[i] = gen_reg_rtx (XFmode);
 
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_asinxf2 (op0, op1));
   emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
                              UNSPEC_FPATAN))
              (clobber (match_scratch:XF 6 ""))])]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
   int i;
 
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   for (i = 2; i < 6; i++)
     operands[i] = gen_reg_rtx (XFmode);
 
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
   rtx op0 = gen_reg_rtx (XFmode);
   rtx op1 = gen_reg_rtx (XFmode);
 
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_acosxf2 (op0, op1));
   emit_insn (gen_truncxf<mode>2_i387_noop (operands[0], op0));
   [(use (match_operand:XF 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   ix86_emit_i387_log1p (operands[0], operands[1]);
   DONE;
 })
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
+  rtx op0;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
 
   operands[1] = gen_rtx_FLOAT_EXTEND (XFmode, operands[1]);
 
   [(use (match_operand:SI 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_fxtractxf3_i387 (op0, op1, operands[1]));
   emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
   "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_fxtract_extend<mode>xf3_i387 (op0, op1, operands[1]));
   emit_insn (gen_fix_truncxfsi2 (operands[0], op1));
                   (unspec:XF [(match_dup 8) (match_dup 4)]
                              UNSPEC_FSCALE_EXP))])]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
   int i;
 
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   for (i = 3; i < 10; i++)
     operands[i] = gen_reg_rtx (XFmode);
 
   [(use (match_operand:XF 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op2 = gen_reg_rtx (XFmode);
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
   emit_move_insn (op2, standard_80387_constant_rtx (5)); /* fldl2e */
 
   emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_expxf2 (op0, op1));
   [(use (match_operand:XF 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op2 = gen_reg_rtx (XFmode);
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
   emit_move_insn (op2, standard_80387_constant_rtx (6)); /* fldl2t */
 
   emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_exp10xf2 (op0, op1));
   [(use (match_operand:XF 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op2 = gen_reg_rtx (XFmode);
+  rtx op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op2 = gen_reg_rtx (XFmode);
   emit_move_insn (op2, CONST1_RTX (XFmode));  /* fld1 */
 
   emit_insn (gen_expNcorexf3 (operands[0], operands[1], op2));
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_exp2xf2 (op0, op1));
    (set (match_operand:XF 0 "register_operand" "")
        (plus:XF (match_dup 12) (match_dup 7)))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
   int i;
 
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   for (i = 2; i < 13; i++)
     operands[i] = gen_reg_rtx (XFmode);
 
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_expm1xf2 (op0, op1));
                   (unspec:XF [(match_dup 1) (match_dup 3)]
                              UNSPEC_FSCALE_EXP))])]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   operands[3] = gen_reg_rtx (XFmode);
   operands[4] = gen_reg_rtx (XFmode);
 })
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
+  rtx op0, op1;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_ldexpxf3 (op0, op1, operands[2]));
                   (unspec:XF [(match_dup 1) (match_dup 2)]
                              UNSPEC_FSCALE_EXP))])]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
   operands[3] = gen_reg_rtx (XFmode);
 })
 
  "TARGET_USE_FANCY_MATH_387
    && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
-  rtx op0 = gen_reg_rtx (XFmode);
-  rtx op1 = gen_reg_rtx (XFmode);
-  rtx op2 = gen_reg_rtx (XFmode);
+  rtx op0, op1, op2;
+
+  if (optimize_insn_for_size_p ())
+    FAIL;
+
+  op0 = gen_reg_rtx (XFmode);
+  op1 = gen_reg_rtx (XFmode);
+  op2 = gen_reg_rtx (XFmode);
 
   emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
   emit_insn (gen_extend<mode>xf2 (op2, operands[2]));
                       (match_operand:SI 2 "const_0_to_15_operand" "n")]
                      UNSPEC_ROUND))]
   "TARGET_ROUND"
-  "rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "%vrounds<ssemodefsuffix>\t{%2, %1, %d0|%d0, %1, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "rintxf2"
        || TARGET_MIX_SSE_I387)
     && flag_unsafe_math_optimizations)
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-       && !flag_trapping_math
-       && (TARGET_ROUND || !optimize_size))"
+       && !flag_trapping_math)"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-      && !flag_trapping_math
-      && (TARGET_ROUND || !optimize_size))
+      && !flag_trapping_math)
     {
+      if (!TARGET_ROUND && optimize_insn_for_size_p ())
+       FAIL;
       if (TARGET_ROUND)
        emit_insn (gen_sse4_1_round<mode>2
                   (operands[0], operands[1], GEN_INT (0x04)));
   [(match_operand:MODEF 0 "register_operand" "")
    (match_operand:MODEF 1 "nonimmediate_operand" "")]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-   && !flag_trapping_math && !flag_rounding_math
-   && !optimize_size"
+   && !flag_trapping_math && !flag_rounding_math"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
   if (TARGET_64BIT || (<MODE>mode != DFmode))
     ix86_expand_round (operand0, operand1);
   else
   [(set (match_operand:DI 0 "nonimmediate_operand" "=m,?r")
        (unspec:DI [(match_operand:XF 1 "register_operand" "f,f")]
                   UNSPEC_FIST))
-   (clobber (match_operand:DI 2 "memory_operand" "=m,m"))
+   (clobber (match_operand:DI 2 "memory_operand" "=X,m"))
    (clobber (match_scratch:XF 3 "=&1f,&1f"))]
   "TARGET_USE_FANCY_MATH_387"
   "#"
    (match_operand:MODEF 1 "register_operand" "")]
   "SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && ((<SSEMODEI24:MODE>mode != DImode) || TARGET_64BIT)
-   && !flag_trapping_math && !flag_rounding_math
-   && !optimize_size"
+   && !flag_trapping_math && !flag_rounding_math"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
   ix86_expand_lround (operand0, operand1);
   DONE;
 })
   [(use (match_operand:XF 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
   emit_insn (gen_frndintxf2_floor (operands[0], operands[1]));
   DONE;
 })
   "(TARGET_USE_FANCY_MATH_387
     && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-    && flag_unsafe_math_optimizations && !optimize_size)
+    && flag_unsafe_math_optimizations)
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-       && !flag_trapping_math
-       && (TARGET_ROUND || !optimize_size))"
+       && !flag_trapping_math)"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !flag_trapping_math
-      && (TARGET_ROUND || !optimize_size))
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
     {
+      if (!TARGET_ROUND && optimize_insn_for_size_p ())
+       FAIL;
       if (TARGET_ROUND)
        emit_insn (gen_sse4_1_round<mode>2
                   (operands[0], operands[1], GEN_INT (0x01)));
     }
   else
     {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+       FAIL;
 
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
       emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_floor (op0, op1));
 
         UNSPEC_FIST_FLOOR))
    (use (match_operand:HI 2 "memory_operand" "m,m"))
    (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
    (clobber (match_scratch:XF 5 "=&1f,&1f"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
         UNSPEC_FIST_FLOOR))
    (use (match_operand:HI 2 "memory_operand" "m,m"))
    (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
   "#"
   [(match_operand:DI 0 "nonimmediate_operand" "")
    (match_operand:MODEF 1 "register_operand" "")]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH && TARGET_64BIT
-   && !flag_trapping_math
-   && !optimize_size"
+   && !flag_trapping_math"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
   ix86_expand_lfloorceil (operand0, operand1, true);
   DONE;
 })
   [(match_operand:SI 0 "nonimmediate_operand" "")
    (match_operand:MODEF 1 "register_operand" "")]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-   && !flag_trapping_math
-   && (!optimize_size || !TARGET_64BIT)"
+   && !flag_trapping_math"
 {
+  if (optimize_insn_for_size_p () && TARGET_64BIT)
+    FAIL;
   ix86_expand_lfloorceil (operand0, operand1, true);
   DONE;
 })
   [(use (match_operand:XF 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
   emit_insn (gen_frndintxf2_ceil (operands[0], operands[1]));
   DONE;
 })
   "(TARGET_USE_FANCY_MATH_387
     && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-    && flag_unsafe_math_optimizations && !optimize_size)
+    && flag_unsafe_math_optimizations)
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-       && !flag_trapping_math
-       && (TARGET_ROUND || !optimize_size))"
+       && !flag_trapping_math)"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !flag_trapping_math
-      && (TARGET_ROUND || !optimize_size))
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
     {
       if (TARGET_ROUND)
        emit_insn (gen_sse4_1_round<mode>2
                   (operands[0], operands[1], GEN_INT (0x02)));
+      else if (optimize_insn_for_size_p ())
+       FAIL;
       else if (TARGET_64BIT || (<MODE>mode != DFmode))
        ix86_expand_floorceil (operand0, operand1, false);
       else
     }
   else
     {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+       FAIL;
 
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
       emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_ceil (op0, op1));
 
         UNSPEC_FIST_CEIL))
    (use (match_operand:HI 2 "memory_operand" "m,m"))
    (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:DI 4 "memory_operand" "=m,m"))
+   (clobber (match_operand:DI 4 "memory_operand" "=X,m"))
    (clobber (match_scratch:XF 5 "=&1f,&1f"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
         UNSPEC_FIST_CEIL))
    (use (match_operand:HI 2 "memory_operand" "m,m"))
    (use (match_operand:HI 3 "memory_operand" "m,m"))
-   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=m,m"))]
+   (clobber (match_operand:X87MODEI12 4 "memory_operand" "=X,m"))]
   "TARGET_USE_FANCY_MATH_387
    && flag_unsafe_math_optimizations"
   "#"
   [(use (match_operand:XF 0 "register_operand" ""))
    (use (match_operand:XF 1 "register_operand" ""))]
   "TARGET_USE_FANCY_MATH_387
-   && flag_unsafe_math_optimizations && !optimize_size"
+   && flag_unsafe_math_optimizations"
 {
+  if (optimize_insn_for_size_p ())
+    FAIL;
   emit_insn (gen_frndintxf2_trunc (operands[0], operands[1]));
   DONE;
 })
   "(TARGET_USE_FANCY_MATH_387
     && (!(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)
        || TARGET_MIX_SSE_I387)
-    && flag_unsafe_math_optimizations && !optimize_size)
+    && flag_unsafe_math_optimizations)
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
-       && !flag_trapping_math
-       && (TARGET_ROUND || !optimize_size))"
+       && !flag_trapping_math)"
 {
   if (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
       && !flag_trapping_math
-      && (TARGET_ROUND || !optimize_size))
+      && (TARGET_ROUND || optimize_insn_for_speed_p ()))
     {
       if (TARGET_ROUND)
        emit_insn (gen_sse4_1_round<mode>2
                   (operands[0], operands[1], GEN_INT (0x03)));
+      else if (optimize_insn_for_size_p ())
+       FAIL;
       else if (TARGET_64BIT || (<MODE>mode != DFmode))
        ix86_expand_trunc (operand0, operand1);
       else
     }
   else
     {
-      rtx op0 = gen_reg_rtx (XFmode);
-      rtx op1 = gen_reg_rtx (XFmode);
+      rtx op0, op1;
+
+      if (optimize_insn_for_size_p ())
+       FAIL;
 
+      op0 = gen_reg_rtx (XFmode);
+      op1 = gen_reg_rtx (XFmode);
       emit_insn (gen_extend<mode>xf2 (op1, operands[1]));
       emit_insn (gen_frndintxf2_trunc (op0, op1));
 
 \f
 ;; Block operation instructions
 
+(define_insn "cld"
+  [(unspec_volatile [(const_int 0)] UNSPECV_CLD)]
+  ""
+  "cld"
+  [(set_attr "length" "1")
+   (set_attr "length_immediate" "0")
+   (set_attr "modrm" "0")])
+
 (define_expand "movmemsi"
   [(use (match_operand:BLK 0 "memory_operand" ""))
    (use (match_operand:BLK 1 "memory_operand" ""))
   operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
 
   /* Can't use this if the user has appropriated esi or edi.  */
-  if ((TARGET_SINGLE_STRINGOP || optimize_size)
-      && !(global_regs[SI_REG] || global_regs[DI_REG]))
+  if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
+      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
     {
       emit_insn (gen_strmov_singleop (operands[0], operands[1],
                                      operands[2], operands[3],
                   (match_operand 4 "" ""))
              (set (match_operand 2 "register_operand" "")
                   (match_operand 5 "" ""))])]
-  "TARGET_SINGLE_STRINGOP || optimize_size"
-  "")
+  ""
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*strmovdi_rex_1"
   [(set (mem:DI (match_operand:DI 2 "register_operand" "0"))
    (set (match_operand:DI 1 "register_operand" "=S")
        (plus:DI (match_dup 3)
                 (const_int 8)))]
-  "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "TARGET_64BIT"
   "movsq"
   [(set_attr "type" "str")
    (set_attr "mode" "DI")
    (set (match_operand:SI 1 "register_operand" "=S")
        (plus:SI (match_dup 3)
                 (const_int 4)))]
-  "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
-  "{movsl|movsd}"
+  "!TARGET_64BIT"
+  "movs{l|d}"
   [(set_attr "type" "str")
    (set_attr "mode" "SI")
    (set_attr "memory" "both")])
    (set (match_operand:DI 1 "register_operand" "=S")
        (plus:DI (match_dup 3)
                 (const_int 4)))]
-  "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
-  "{movsl|movsd}"
+  "TARGET_64BIT"
+  "movs{l|d}"
   [(set_attr "type" "str")
    (set_attr "mode" "SI")
    (set_attr "memory" "both")])
    (set (match_operand:SI 1 "register_operand" "=S")
        (plus:SI (match_dup 3)
                 (const_int 2)))]
-  "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "!TARGET_64BIT"
   "movsw"
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set (match_operand:DI 1 "register_operand" "=S")
        (plus:DI (match_dup 3)
                 (const_int 2)))]
-  "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "TARGET_64BIT"
   "movsw"
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set (match_operand:SI 1 "register_operand" "=S")
        (plus:SI (match_dup 3)
                 (const_int 1)))]
-  "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "!TARGET_64BIT"
   "movsb"
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set (match_operand:DI 1 "register_operand" "=S")
        (plus:DI (match_dup 3)
                 (const_int 1)))]
-  "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "TARGET_64BIT"
   "movsb"
   [(set_attr "type" "str")
    (set_attr "memory" "both")
                   (match_operand 3 "memory_operand" ""))
              (use (match_dup 4))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*rep_movdi_rex64"
   [(set (match_operand:DI 2 "register_operand" "=c") (const_int 0))
   operands[3] = gen_rtx_PLUS (Pmode, operands[0],
                              GEN_INT (GET_MODE_SIZE (GET_MODE
                                                      (operands[2]))));
-  if (TARGET_SINGLE_STRINGOP || optimize_size)
+  if (TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
     {
       emit_insn (gen_strset_singleop (operands[0], operands[1], operands[2],
                                      operands[3]));
                   (match_operand 2 "register_operand" ""))
              (set (match_operand 0 "register_operand" "")
                   (match_operand 3 "" ""))])]
-  "TARGET_SINGLE_STRINGOP || optimize_size"
-  "")
+  ""
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*strsetdi_rex_1"
   [(set (mem:DI (match_operand:DI 1 "register_operand" "0"))
    (set (match_operand:DI 0 "register_operand" "=D")
        (plus:DI (match_dup 1)
                 (const_int 8)))]
-  "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "TARGET_64BIT"
   "stosq"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
    (set (match_operand:SI 0 "register_operand" "=D")
        (plus:SI (match_dup 1)
                 (const_int 4)))]
-  "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
-  "{stosl|stosd}"
+  "!TARGET_64BIT"
+  "stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
    (set_attr "mode" "SI")])
    (set (match_operand:DI 0 "register_operand" "=D")
        (plus:DI (match_dup 1)
                 (const_int 4)))]
-  "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
-  "{stosl|stosd}"
+  "TARGET_64BIT"
+  "stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
    (set_attr "mode" "SI")])
    (set (match_operand:SI 0 "register_operand" "=D")
        (plus:SI (match_dup 1)
                 (const_int 2)))]
-  "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "!TARGET_64BIT"
   "stosw"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
    (set (match_operand:DI 0 "register_operand" "=D")
        (plus:DI (match_dup 1)
                 (const_int 2)))]
-  "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "TARGET_64BIT"
   "stosw"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
    (set (match_operand:SI 0 "register_operand" "=D")
        (plus:SI (match_dup 1)
                 (const_int 1)))]
-  "!TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "!TARGET_64BIT"
   "stosb"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
    (set (match_operand:DI 0 "register_operand" "=D")
        (plus:DI (match_dup 1)
                 (const_int 1)))]
-  "TARGET_64BIT && (TARGET_SINGLE_STRINGOP || optimize_size)"
+  "TARGET_64BIT"
   "stosb"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
              (use (match_operand 3 "register_operand" ""))
              (use (match_dup 1))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*rep_stosdi_rex64"
   [(set (match_operand:DI 1 "register_operand" "=c") (const_int 0))
                    (match_operand:BLK 2 "general_operand" "")))
    (use (match_operand 3 "general_operand" ""))
    (use (match_operand 4 "immediate_operand" ""))]
-  "! optimize_size || TARGET_INLINE_ALL_STRINGOPS"
+  ""
 {
   rtx addr1, addr2, out, outlow, count, countreg, align;
 
+  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+    FAIL;
+
   /* Can't use this if the user has appropriated esi or edi.  */
-  if (global_regs[SI_REG] || global_regs[DI_REG])
+  if (fixed_regs[SI_REG] || fixed_regs[DI_REG])
     FAIL;
 
   out = operands[0];
              (clobber (match_operand 1 "register_operand" ""))
              (clobber (match_dup 2))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*cmpstrnqi_nz_1"
   [(set (reg:CC FLAGS_REG)
              (clobber (match_operand 1 "register_operand" ""))
              (clobber (match_dup 2))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*cmpstrnqi_1"
   [(set (reg:CC FLAGS_REG)
              (clobber (match_operand 1 "register_operand" ""))
              (clobber (reg:CC FLAGS_REG))])]
   ""
-  "")
+  "ix86_current_function_needs_cld = 1;")
 
 (define_insn "*strlenqi_1"
   [(set (match_operand:SI 0 "register_operand" "=&c")
                         (match_operand:DI 2 "general_operand" "")
                         (match_operand:DI 3 "general_operand" "")))]
   "TARGET_64BIT"
-  "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
 (define_insn "x86_movdicc_0_m1_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r")
    (set_attr "mode" "DI")
    (set_attr "length_immediate" "0")])
 
+(define_insn "*x86_movdicc_0_m1_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (sign_extract:DI (match_operand 1 "ix86_carry_flag_operator" "")
+                        (const_int 1)
+                        (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{q}\t%0, %0"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "DI")
+   (set_attr "length_immediate" "0")])
+
 (define_insn "*movdicc_c_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
        (if_then_else:DI (match_operator 1 "ix86_comparison_operator"
                         (match_operand:SI 2 "general_operand" "")
                         (match_operand:SI 3 "general_operand" "")))]
   ""
-  "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
 ;; Data flow gets confused by our desire for `sbbl reg,reg', and clearing
 ;; the register first winds up with `sbbl $0,reg', which is also weird.
    (set_attr "mode" "SI")
    (set_attr "length_immediate" "0")])
 
+(define_insn "*x86_movsicc_0_m1_se"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+       (sign_extract:SI (match_operand 1 "ix86_carry_flag_operator" "")
+                        (const_int 1)
+                        (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "sbb{l}\t%0, %0"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "memory" "none")
+   (set_attr "imm_disp" "false")
+   (set_attr "mode" "SI")
+   (set_attr "length_immediate" "0")])
+
 (define_insn "*movsicc_noc"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
        (if_then_else:SI (match_operator 1 "ix86_comparison_operator"
                         (match_operand:HI 2 "general_operand" "")
                         (match_operand:HI 3 "general_operand" "")))]
   "TARGET_HIMODE_MATH"
-  "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
 (define_insn "*movhicc_noc"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
                         (match_operand:QI 2 "general_operand" "")
                         (match_operand:QI 3 "general_operand" "")))]
   "TARGET_QIMODE_MATH"
-  "if (!ix86_expand_int_movcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
 (define_insn_and_split "*movqicc_noc"
   [(set (match_operand:QI 0 "register_operand" "=r,r")
   [(set_attr "type" "icmov")
    (set_attr "mode" "SI")])
 
-(define_expand "movsfcc"
-  [(set (match_operand:SF 0 "register_operand" "")
-       (if_then_else:SF (match_operand 1 "comparison_operator" "")
-                        (match_operand:SF 2 "register_operand" "")
-                        (match_operand:SF 3 "register_operand" "")))]
-  "(TARGET_80387 && TARGET_CMOVE) || TARGET_SSE_MATH"
-  "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
+(define_expand "mov<mode>cc"
+  [(set (match_operand:X87MODEF 0 "register_operand" "")
+       (if_then_else:X87MODEF
+         (match_operand 1 "comparison_operator" "")
+         (match_operand:X87MODEF 2 "register_operand" "")
+         (match_operand:X87MODEF 3 "register_operand" "")))]
+  "(TARGET_80387 && TARGET_CMOVE)
+   || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
+  "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
 
 (define_insn "*movsfcc_1_387"
   [(set (match_operand:SF 0 "register_operand" "=f,f,r,r")
   [(set_attr "type" "fcmov,fcmov,icmov,icmov")
    (set_attr "mode" "SF,SF,SI,SI")])
 
-(define_expand "movdfcc"
-  [(set (match_operand:DF 0 "register_operand" "")
-       (if_then_else:DF (match_operand 1 "comparison_operator" "")
-                        (match_operand:DF 2 "register_operand" "")
-                        (match_operand:DF 3 "register_operand" "")))]
-  "(TARGET_80387 && TARGET_CMOVE) || (TARGET_SSE2 && TARGET_SSE_MATH)"
-  "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
-
 (define_insn "*movdfcc_1"
   [(set (match_operand:DF 0 "register_operand" "=f,f,&r,&r")
        (if_then_else:DF (match_operator 1 "fcmov_comparison_operator"
   [(set (match_dup 2)
        (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
                      (match_dup 5)
-                     (match_dup 7)))
+                     (match_dup 6)))
    (set (match_dup 3)
        (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
-                     (match_dup 6)
+                     (match_dup 7)
                      (match_dup 8)))]
-  "split_di (operands+2, 1, operands+5, operands+6);
-   split_di (operands+3, 1, operands+7, operands+8);
-   split_di (operands, 1, operands+2, operands+3);")
-
-(define_expand "movxfcc"
-  [(set (match_operand:XF 0 "register_operand" "")
-       (if_then_else:XF (match_operand 1 "comparison_operator" "")
-                        (match_operand:XF 2 "register_operand" "")
-                        (match_operand:XF 3 "register_operand" "")))]
-  "TARGET_80387 && TARGET_CMOVE"
-  "if (! ix86_expand_fp_movcc (operands)) FAIL; DONE;")
+  "split_di (&operands[2], 2, &operands[5], &operands[7]);
+   split_di (&operands[0], 1, &operands[2], &operands[3]);")
 
 (define_insn "*movxfcc_1"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
   [(set_attr "type" "fcmov")
    (set_attr "mode" "XF")])
 
+;; All moves in SSE5 pcmov instructions are 128 bits and hence we restrict
+;; the scalar versions to have only XMM registers as operands.
+
 ;; SSE5 conditional move
 (define_insn "*sse5_pcmov_<mode>"
-  [(set (match_operand:MODEF 0 "register_operand" "=x,x,x,x")
+  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
        (if_then_else:MODEF
-         (match_operand:MODEF 1 "nonimmediate_operand" "xm,x,0,0")
-         (match_operand:MODEF 2 "nonimmediate_operand" "0,0,x,xm")
-         (match_operand:MODEF 3 "vector_move_operand" "x,xm,xm,x")))]
-  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1)"
+         (match_operand:MODEF 1 "register_operand" "x,0")
+         (match_operand:MODEF 2 "register_operand" "0,x")
+         (match_operand:MODEF 3 "register_operand" "x,x")))]
+  "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
   "pcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}"
   [(set_attr "type" "sse4arg")])
 
 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
 ;; are undefined in this condition, we're certain this is correct.
 
-(define_insn "sminsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (smin:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
-                (match_operand:SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE_MATH"
-  "minss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "smaxsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (smax:SF (match_operand:SF 1 "nonimmediate_operand" "%0")
-                (match_operand:SF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE_MATH"
-  "maxss\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
-
-(define_insn "smindf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (smin:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
-                (match_operand:DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "minsd\t{%2, %0|%0, %2}"
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (smaxmin:MODEF
+         (match_operand:MODEF 1 "nonimmediate_operand" "%x")
+         (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "v<maxminfprefix>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "smaxdf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (smax:DF (match_operand:DF 1 "nonimmediate_operand" "%0")
-                (match_operand:DF 2 "nonimmediate_operand" "xm")))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "maxsd\t{%2, %0|%0, %2}"
+(define_insn "<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (smaxmin:MODEF
+         (match_operand:MODEF 1 "nonimmediate_operand" "%0")
+         (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "<maxminfprefix>s<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
 ;; These versions of the min/max patterns implement exactly the operations
 ;;   min = (op1 < op2 ? op1 : op2)
 ;; Their operands are not commutative, and thus they may be used in the
 ;; presence of -0.0 and NaN.
 
-(define_insn "*ieee_sminsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (unspec:SF [(match_operand:SF 1 "register_operand" "0")
-                   (match_operand:SF 2 "nonimmediate_operand" "xm")]
-                  UNSPEC_IEEE_MIN))]
-  "TARGET_SSE_MATH"
-  "minss\t{%2, %0|%0, %2}"
+(define_insn "*avx_ieee_smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (unspec:MODEF
+         [(match_operand:MODEF 1 "register_operand" "x")
+          (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+        UNSPEC_IEEE_MIN))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "vmins<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*ieee_smaxsf3"
-  [(set (match_operand:SF 0 "register_operand" "=x")
-       (unspec:SF [(match_operand:SF 1 "register_operand" "0")
-                   (match_operand:SF 2 "nonimmediate_operand" "xm")]
-                  UNSPEC_IEEE_MAX))]
-  "TARGET_SSE_MATH"
-  "maxss\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (unspec:MODEF
+         [(match_operand:MODEF 1 "register_operand" "0")
+          (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+        UNSPEC_IEEE_MIN))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "mins<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "SF")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*ieee_smindf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (unspec:DF [(match_operand:DF 1 "register_operand" "0")
-                   (match_operand:DF 2 "nonimmediate_operand" "xm")]
-                  UNSPEC_IEEE_MIN))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "minsd\t{%2, %0|%0, %2}"
+(define_insn "*avx_ieee_smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (unspec:MODEF
+         [(match_operand:MODEF 1 "register_operand" "0")
+          (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+        UNSPEC_IEEE_MAX))]
+  "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "vmaxs<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*ieee_smaxdf3"
-  [(set (match_operand:DF 0 "register_operand" "=x")
-       (unspec:DF [(match_operand:DF 1 "register_operand" "0")
-                   (match_operand:DF 2 "nonimmediate_operand" "xm")]
-                  UNSPEC_IEEE_MAX))]
-  "TARGET_SSE2 && TARGET_SSE_MATH"
-  "maxsd\t{%2, %0|%0, %2}"
+(define_insn "*ieee_smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+       (unspec:MODEF
+         [(match_operand:MODEF 1 "register_operand" "0")
+          (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+        UNSPEC_IEEE_MAX))]
+  "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "maxs<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
-   (set_attr "mode" "DF")])
+   (set_attr "mode" "<MODE>")])
 
 ;; Make two stack loads independent:
 ;;   fld aa              fld aa
                                 operands[1], operands[0]);")
 
 ;; Conditional addition patterns
-(define_expand "addqicc"
-  [(match_operand:QI 0 "register_operand" "")
-   (match_operand 1 "comparison_operator" "")
-   (match_operand:QI 2 "register_operand" "")
-   (match_operand:QI 3 "const_int_operand" "")]
-  ""
-  "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
-
-(define_expand "addhicc"
-  [(match_operand:HI 0 "register_operand" "")
-   (match_operand 1 "comparison_operator" "")
-   (match_operand:HI 2 "register_operand" "")
-   (match_operand:HI 3 "const_int_operand" "")]
-  ""
-  "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
-
-(define_expand "addsicc"
-  [(match_operand:SI 0 "register_operand" "")
+(define_expand "add<mode>cc"
+  [(match_operand:SWI 0 "register_operand" "")
    (match_operand 1 "comparison_operator" "")
-   (match_operand:SI 2 "register_operand" "")
-   (match_operand:SI 3 "const_int_operand" "")]
+   (match_operand:SWI 2 "register_operand" "")
+   (match_operand:SWI 3 "const_int_operand" "")]
   ""
-  "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
-
-(define_expand "adddicc"
-  [(match_operand:DI 0 "register_operand" "")
-   (match_operand 1 "comparison_operator" "")
-   (match_operand:DI 2 "register_operand" "")
-   (match_operand:DI 3 "const_int_operand" "")]
-  "TARGET_64BIT"
-  "if (!ix86_expand_int_addcc (operands)) FAIL; DONE;")
+  "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
 
 \f
 ;; Misc patterns (?)
    (set_attr "mode" "DI")])
 
 (define_insn "allocate_stack_worker_32"
-  [(set (match_operand:SI 0 "register_operand" "+a")
-       (unspec_volatile:SI [(match_dup 0)] UNSPECV_STACK_PROBE))
-   (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 0)))
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "0")]
+                           UNSPECV_STACK_PROBE))
+   (set (reg:SI SP_REG) (minus:SI (reg:SI SP_REG) (match_dup 1)))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT && TARGET_STACK_PROBE"
-  "call\t__alloca"
+  "call\t___chkstk"
   [(set_attr "type" "multi")
    (set_attr "length" "5")])
 
 (define_insn "allocate_stack_worker_64"
   [(set (match_operand:DI 0 "register_operand" "=a")
-       (unspec_volatile:DI [(match_dup 0)] UNSPECV_STACK_PROBE))
-   (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 0)))
+       (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")]
+                           UNSPECV_STACK_PROBE))
+   (set (reg:DI SP_REG) (minus:DI (reg:DI SP_REG) (match_dup 1)))
    (clobber (reg:DI R10_REG))
    (clobber (reg:DI R11_REG))
    (clobber (reg:CC FLAGS_REG))]
     {
       x = copy_to_mode_reg (Pmode, operands[1]);
       if (TARGET_64BIT)
-       x = gen_allocate_stack_worker_64 (x);
+       x = gen_allocate_stack_worker_64 (x, x);
       else
-       x = gen_allocate_stack_worker_32 (x);
+       x = gen_allocate_stack_worker_32 (x, x);
       emit_insn (x);
     }
 
   [(label_ref (match_operand 0 "" ""))]
   "!TARGET_64BIT && flag_pic"
 {
+#if TARGET_MACHO
   if (TARGET_MACHO)
     {
       rtx xops[3];
       rtx label_rtx = gen_label_rtx ();
       emit_insn (gen_set_got_labelled (pic_offset_table_rtx, label_rtx));
       xops[0] = xops[1] = picreg;
-      xops[2] = gen_rtx_CONST (SImode,
-                 gen_rtx_MINUS (SImode,
-                   gen_rtx_LABEL_REF (SImode, label_rtx),
-                   gen_rtx_SYMBOL_REF (SImode, GOT_SYMBOL_NAME)));
+      xops[2] = machopic_gen_offset (gen_rtx_LABEL_REF (SImode, label_rtx));
       ix86_expand_binary_operator (MINUS, SImode, xops);
     }
   else
+#endif
     emit_insn (gen_set_got (pic_offset_table_rtx));
   DONE;
 })
    (clobber (reg:CC FLAGS_REG))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
    && ((GET_MODE (operands[0]) == HImode
-       && ((!optimize_size && !TARGET_FAST_PREFIX)
+       && ((optimize_function_for_speed_p (cfun) && !TARGET_FAST_PREFIX)
             /* ??? next two lines just !satisfies_constraint_K (...) */
            || !CONST_INT_P (operands[2])
            || satisfies_constraint_K (operands[2])))
        || (GET_MODE (operands[0]) == QImode
-          && (TARGET_PROMOTE_QImode || optimize_size)))"
+          && (TARGET_PROMOTE_QImode || optimize_function_for_size_p (cfun))))"
   [(parallel [(set (match_dup 0)
                   (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
              (clobber (reg:CC FLAGS_REG))])]
    (set (match_operand 1 "register_operand" "")
        (and (match_dup 3) (match_dup 4)))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
-   && ! optimize_size
+   && optimize_insn_for_speed_p ()
    && ((GET_MODE (operands[1]) == HImode && ! TARGET_FAST_PREFIX)
        || (GET_MODE (operands[1]) == QImode && TARGET_PROMOTE_QImode))
    /* Ensure that the operand will remain sign-extended immediate.  */
           (const_int 0)]))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
    && ! TARGET_FAST_PREFIX
-   && ! optimize_size
+   && optimize_insn_for_speed_p ()
    /* Ensure that the operand will remain sign-extended immediate.  */
    && ix86_match_ccmode (insn, INTVAL (operands[3]) >= 0 ? CCNOmode : CCZmode)"
   [(set (match_dup 0)
   "! TARGET_PARTIAL_REG_STALL && reload_completed
    && (GET_MODE (operands[0]) == HImode
        || (GET_MODE (operands[0]) == QImode
-          && (TARGET_PROMOTE_QImode || optimize_size)))"
+          && (TARGET_PROMOTE_QImode
+              || optimize_insn_for_size_p ())))"
   [(parallel [(set (match_dup 0)
                   (neg:SI (match_dup 1)))
              (clobber (reg:CC FLAGS_REG))])]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
    && (GET_MODE (operands[0]) == HImode
        || (GET_MODE (operands[0]) == QImode
-          && (TARGET_PROMOTE_QImode || optimize_size)))"
+          && (TARGET_PROMOTE_QImode
+              || optimize_insn_for_size_p ())))"
   [(set (match_dup 0)
        (not:SI (match_dup 1)))]
   "operands[0] = gen_lowpart (SImode, operands[0]);
   "! TARGET_PARTIAL_REG_STALL && TARGET_CMOVE
    && (GET_MODE (operands[0]) == HImode
        || (GET_MODE (operands[0]) == QImode
-          && (TARGET_PROMOTE_QImode || optimize_size)))"
+          && (TARGET_PROMOTE_QImode
+              || optimize_insn_for_size_p ())))"
   [(set (match_dup 0)
        (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))]
   "operands[0] = gen_lowpart (SImode, operands[0]);
   [(set (match_operand:SI 0 "push_operand" "")
        (match_operand:SI 1 "memory_operand" ""))
    (match_scratch:SI 2 "r")]
-  "!optimize_size && !TARGET_PUSH_MEMORY
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   [(set (match_operand:DI 0 "push_operand" "")
        (match_operand:DI 1 "memory_operand" ""))
    (match_scratch:DI 2 "r")]
-  "!optimize_size && !TARGET_PUSH_MEMORY
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   [(set (match_operand:SF 0 "push_operand" "")
        (match_operand:SF 1 "memory_operand" ""))
    (match_scratch:SF 2 "r")]
-  "!optimize_size && !TARGET_PUSH_MEMORY
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   [(set (match_operand:HI 0 "push_operand" "")
        (match_operand:HI 1 "memory_operand" ""))
    (match_scratch:HI 2 "r")]
-  "!optimize_size && !TARGET_PUSH_MEMORY
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   [(set (match_operand:QI 0 "push_operand" "")
        (match_operand:QI 1 "memory_operand" ""))
    (match_scratch:QI 2 "q")]
-  "!optimize_size && !TARGET_PUSH_MEMORY
+  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   [(match_scratch:SI 1 "r")
    (set (match_operand:SI 0 "memory_operand" "")
         (const_int 0))]
-  "! optimize_size
+  "optimize_insn_for_speed_p ()
    && ! TARGET_USE_MOV0
    && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cost->large_insn
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 1) (const_int 0))
              (clobber (reg:CC FLAGS_REG))])
   [(match_scratch:HI 1 "r")
    (set (match_operand:HI 0 "memory_operand" "")
         (const_int 0))]
-  "! optimize_size
+  "optimize_insn_for_speed_p ()
    && ! TARGET_USE_MOV0
    && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cost->large_insn
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 2) (const_int 0))
              (clobber (reg:CC FLAGS_REG))])
   [(match_scratch:QI 1 "q")
    (set (match_operand:QI 0 "memory_operand" "")
         (const_int 0))]
-  "! optimize_size
+  "optimize_insn_for_speed_p ()
    && ! TARGET_USE_MOV0
    && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cost->large_insn
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 2) (const_int 0))
              (clobber (reg:CC FLAGS_REG))])
   [(match_scratch:SI 2 "r")
    (set (match_operand:SI 0 "memory_operand" "")
         (match_operand:SI 1 "immediate_operand" ""))]
-  "! optimize_size
+  "optimize_insn_for_speed_p ()
    && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cost->large_insn"
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
   [(match_scratch:HI 2 "r")
    (set (match_operand:HI 0 "memory_operand" "")
         (match_operand:HI 1 "immediate_operand" ""))]
-  "! optimize_size
+  "optimize_insn_for_speed_p ()
    && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cost->large_insn"
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
   [(match_scratch:QI 2 "q")
    (set (match_operand:QI 0 "memory_operand" "")
         (match_operand:QI 1 "immediate_operand" ""))]
-  "! optimize_size
+  "optimize_insn_for_speed_p ()
    && TARGET_SPLIT_LONG_MOVES
-   && get_attr_length (insn) >= ix86_cost->large_insn"
+   && get_attr_length (insn) >= ix86_cur_cost ()->large_insn"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
          [(match_operand:SI 2 "memory_operand" "")
           (const_int 0)]))
    (match_scratch:SI 3 "r")]
-  " ! optimize_size && ix86_match_ccmode (insn, CCNOmode)"
+  "optimize_insn_for_speed_p () && ix86_match_ccmode (insn, CCNOmode)"
   [(set (match_dup 3) (match_dup 2))
    (set (match_dup 0) (match_op_dup 1 [(match_dup 3) (const_int 0)]))]
   "")
 (define_peephole2
   [(set (match_operand:SI 0 "nonimmediate_operand" "")
        (not:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
-  "!optimize_size
+  "optimize_insn_for_speed_p ()
    && ((TARGET_NOT_UNPAIRABLE
         && (!MEM_P (operands[0])
             || !memory_displacement_operand (operands[0], SImode)))
 (define_peephole2
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
        (not:HI (match_operand:HI 1 "nonimmediate_operand" "")))]
-  "!optimize_size
+  "optimize_insn_for_speed_p ()
    && ((TARGET_NOT_UNPAIRABLE
         && (!MEM_P (operands[0])
             || !memory_displacement_operand (operands[0], HImode)))
 (define_peephole2
   [(set (match_operand:QI 0 "nonimmediate_operand" "")
        (not:QI (match_operand:QI 1 "nonimmediate_operand" "")))]
-  "!optimize_size
+  "optimize_insn_for_speed_p ()
    && ((TARGET_NOT_UNPAIRABLE
         && (!MEM_P (operands[0])
             || !memory_displacement_operand (operands[0], QImode)))
                      [(match_dup 0)
                       (match_operand:SI 1 "memory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "! optimize_size && ! TARGET_READ_MODIFY"
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
                      [(match_operand:SI 1 "memory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "! optimize_size && ! TARGET_READ_MODIFY"
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
                      [(match_dup 0)
                       (match_operand:SI 1 "nonmemory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "! optimize_size && ! TARGET_READ_MODIFY_WRITE"
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
   [(set (match_dup 2) (match_dup 0))
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 2) (match_dup 1)]))
                      [(match_operand:SI 1 "nonmemory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "! optimize_size && ! TARGET_READ_MODIFY_WRITE"
+  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE"
   [(set (match_dup 2) (match_dup 0))
    (parallel [(set (match_dup 2)
                    (match_op_dup 3 [(match_dup 1) (match_dup 2)]))
   [(set (match_operand 0 "register_operand" "")
        (match_operand 1 "const0_operand" ""))]
   "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
-   && (! TARGET_USE_MOV0 || optimize_size)
+   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
    && GENERAL_REG_P (operands[0])
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (const_int 0))
        (const_int 0))]
   "(GET_MODE (operands[0]) == QImode
     || GET_MODE (operands[0]) == HImode)
-   && (! TARGET_USE_MOV0 || optimize_size)
+   && (! TARGET_USE_MOV0 || optimize_insn_for_size_p ())
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (strict_low_part (match_dup 0)) (const_int 0))
              (clobber (reg:CC FLAGS_REG))])])
   "(GET_MODE (operands[0]) == HImode
     || GET_MODE (operands[0]) == SImode
     || (GET_MODE (operands[0]) == DImode && TARGET_64BIT))
-   && (optimize_size || TARGET_MOVE_M1_VIA_OR)
+   && (optimize_insn_for_size_p () || TARGET_MOVE_M1_VIA_OR)
    && peep2_regno_dead_p (0, FLAGS_REG)"
   [(parallel [(set (match_dup 0) (const_int -1))
              (clobber (reg:CC FLAGS_REG))])]
    (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size || !TARGET_SUB_ESP_4"
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
   [(clobber (match_dup 0))
    (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
              (clobber (mem:BLK (scratch)))])])
    (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size || !TARGET_SUB_ESP_8"
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
   [(clobber (match_dup 0))
    (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
    (parallel [(set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
   [(match_scratch:SI 0 "r")
    (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -4)))
              (clobber (reg:CC FLAGS_REG))])]
-  "optimize_size || !TARGET_SUB_ESP_4"
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
   [(clobber (match_dup 0))
    (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
 
   [(match_scratch:SI 0 "r")
    (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int -8)))
              (clobber (reg:CC FLAGS_REG))])]
-  "optimize_size || !TARGET_SUB_ESP_8"
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
   [(clobber (match_dup 0))
    (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))
    (set (mem:SI (pre_dec:SI (reg:SI SP_REG))) (match_dup 0))])
    (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size || !TARGET_ADD_ESP_4"
+  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
   [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
              (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
              (clobber (mem:BLK (scratch)))])]
    (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size || !TARGET_ADD_ESP_8"
+  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
   [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
              (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
              (clobber (mem:BLK (scratch)))])
    (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size"
+  "optimize_insn_for_size_p ()"
   [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
              (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))
              (clobber (mem:BLK (scratch)))])
   [(match_scratch:SI 0 "r")
    (parallel [(set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 8)))
              (clobber (reg:CC FLAGS_REG))])]
-  "optimize_size"
+  "optimize_insn_for_size_p ()"
   [(parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
              (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (const_int 4)))])
    (parallel [(set (match_dup 0) (mem:SI (reg:SI SP_REG)))
   "")
 \f
 ;; Convert compares with 1 to shorter inc/dec operations when CF is not
-;; required and register dies.  Similarly for 128 to plus -128.
+;; required and register dies.  Similarly for 128 to -128.
 (define_peephole2
   [(set (match_operand 0 "flags_reg_operand" "")
        (match_operator 1 "compare_operator"
          [(match_operand 2 "register_operand" "")
           (match_operand 3 "const_int_operand" "")]))]
-  "(INTVAL (operands[3]) == -1
-    || INTVAL (operands[3]) == 1
-    || INTVAL (operands[3]) == 128)
+  "(((!TARGET_FUSE_CMP_AND_BRANCH || optimize_size)
+     && incdec_operand (operands[3], GET_MODE (operands[3])))
+    || (!TARGET_FUSE_CMP_AND_BRANCH
+       && INTVAL (operands[3]) == 128))
    && ix86_match_ccmode (insn, CCGCmode)
    && peep2_reg_dead_p (1, operands[2])"
   [(parallel [(set (match_dup 0)
    (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size || !TARGET_SUB_ESP_4"
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
   [(clobber (match_dup 0))
    (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
              (clobber (mem:BLK (scratch)))])])
    (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size || !TARGET_SUB_ESP_8"
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
   [(clobber (match_dup 0))
    (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
    (parallel [(set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
   [(match_scratch:DI 0 "r")
    (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -8)))
              (clobber (reg:CC FLAGS_REG))])]
-  "optimize_size || !TARGET_SUB_ESP_4"
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_4"
   [(clobber (match_dup 0))
    (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
 
   [(match_scratch:DI 0 "r")
    (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int -16)))
              (clobber (reg:CC FLAGS_REG))])]
-  "optimize_size || !TARGET_SUB_ESP_8"
+  "optimize_insn_for_size_p () || !TARGET_SUB_ESP_8"
   [(clobber (match_dup 0))
    (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))
    (set (mem:DI (pre_dec:DI (reg:DI SP_REG))) (match_dup 0))])
    (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size || !TARGET_ADD_ESP_4"
+  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_4"
   [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
              (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
              (clobber (mem:BLK (scratch)))])]
    (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size || !TARGET_ADD_ESP_8"
+  "optimize_insn_for_size_p () || !TARGET_ADD_ESP_8"
   [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
              (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
              (clobber (mem:BLK (scratch)))])
    (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
              (clobber (reg:CC FLAGS_REG))
              (clobber (mem:BLK (scratch)))])]
-  "optimize_size"
+  "optimize_insn_for_size_p ()"
   [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
              (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))
              (clobber (mem:BLK (scratch)))])
   [(match_scratch:DI 0 "r")
    (parallel [(set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 16)))
              (clobber (reg:CC FLAGS_REG))])]
-  "optimize_size"
+  "optimize_insn_for_size_p ()"
   [(parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
              (set (reg:DI SP_REG) (plus:DI (reg:DI SP_REG) (const_int 8)))])
    (parallel [(set (match_dup 0) (mem:DI (reg:DI SP_REG)))
           (mult:SI (match_operand:SI 1 "nonimmediate_operand" "")
                    (match_operand:SI 2 "const_int_operand" "")))
      (clobber (reg:CC FLAGS_REG))])]
-  "!optimize_size
+  "optimize_insn_for_speed_p ()
    && (INTVAL (operands[2]) == 3
        || INTVAL (operands[2]) == 5
        || INTVAL (operands[2]) == 9)"
                    (match_operand:DI 2 "const_int_operand" "")))
      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_64BIT
-   && !optimize_size
+   && optimize_insn_for_speed_p ()
    && (INTVAL (operands[2]) == 3
        || INTVAL (operands[2]) == 5
        || INTVAL (operands[2]) == 9)"
                   (mult:DI (match_operand:DI 1 "memory_operand" "")
                            (match_operand:DI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size
+  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
    && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (mult:DI (match_dup 3) (match_dup 2)))
                   (mult:SI (match_operand:SI 1 "memory_operand" "")
                            (match_operand:SI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size
+  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
    && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (mult:SI (match_dup 3) (match_dup 2)))
                     (mult:SI (match_operand:SI 1 "memory_operand" "")
                              (match_operand:SI 2 "immediate_operand" ""))))
              (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_SLOW_IMUL_IMM32_MEM && !optimize_size
+  "TARGET_SLOW_IMUL_IMM32_MEM && optimize_insn_for_speed_p ()
    && !satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 1))
    (parallel [(set (match_dup 0) (zero_extend:DI (mult:SI (match_dup 3) (match_dup 2))))
                            (match_operand:DI 2 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:DI 3 "r")]
-  "TARGET_SLOW_IMUL_IMM8 && !optimize_size
+  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
    && satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
                            (match_operand:SI 2 "const_int_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:SI 3 "r")]
-  "TARGET_SLOW_IMUL_IMM8 && !optimize_size
+  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()
    && satisfies_constraint_K (operands[2])"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
                            (match_operand:HI 2 "immediate_operand" "")))
              (clobber (reg:CC FLAGS_REG))])
    (match_scratch:HI 3 "r")]
-  "TARGET_SLOW_IMUL_IMM8 && !optimize_size"
+  "TARGET_SLOW_IMUL_IMM8 && optimize_insn_for_speed_p ()"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
              (clobber (reg:CC FLAGS_REG))])]
 }
   [(set_attr "type" "callv")])
 
+(define_insn "*call_value_0_rex64_ms_sysv"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:DI 1 "constant_call_address_operand" ""))
+             (match_operand:DI 2 "const_int_operand" "")))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+  if (SIBLING_CALL_P (insn))
+    return "jmp\t%P1";
+  else
+    return "call\t%P1";
+}
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:SI 1 "call_insn_operand" "rsm"))
 }
   [(set_attr "type" "callv")])
 
+(define_insn "*call_value_1_rex64_ms_sysv"
+  [(set (match_operand 0 "" "")
+       (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rsm"))
+             (match_operand:DI 2 "" "")))
+   (unspec [(const_int 0)] UNSPEC_MS_TO_SYSV_CALL)
+   (clobber (reg:DI SI_REG))
+   (clobber (reg:DI DI_REG))]
+  "!SIBLING_CALL_P (insn) && TARGET_64BIT"
+{
+  if (constant_call_address_operand (operands[1], Pmode))
+    return "call\t%P1";
+  return "call\t%A1";
+}
+  [(set_attr "type" "callv")])
+
 (define_insn "*call_value_1_rex64_large"
   [(set (match_operand 0 "" "")
        (call (mem:QI (match_operand:DI 1 "call_insn_operand" "rm"))
        (call (mem:QI (reg:DI R11_REG))
              (match_operand:DI 1 "" "")))]
   "SIBLING_CALL_P (insn) && TARGET_64BIT"
-  "jmp\t*%%r11"
+  "jmp\t{*%%}r11"
   [(set_attr "type" "callv")])
 \f
 ;; We used to use "int $5", in honor of #BR which maps to interrupt vector 5.
    (use (match_operand:DI 2 "const_int_operand" "i"))
    (use (label_ref:DI (match_operand 3 "" "X")))]
   "TARGET_64BIT
-   && INTVAL (operands[4]) + SSE_REGPARM_MAX * 16 - 16 < 128
+   && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
    && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
-  "*
 {
   int i;
   operands[0] = gen_rtx_MEM (Pmode,
                             gen_rtx_PLUS (Pmode, operands[0], operands[4]));
-  output_asm_insn (\"jmp\\t%A1\", operands);
-  for (i = SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
+  /* VEX instruction with a REX prefix will #UD.  */
+  if (TARGET_AVX && GET_CODE (XEXP (operands[0], 0)) != PLUS)
+    gcc_unreachable ();
+
+  output_asm_insn ("jmp\t%A1", operands);
+  for (i = X86_64_SSE_REGPARM_MAX - 1; i >= INTVAL (operands[2]); i--)
     {
       operands[4] = adjust_address (operands[0], DImode, i*16);
       operands[5] = gen_rtx_REG (TImode, SSE_REGNO (i));
       PUT_MODE (operands[4], TImode);
       if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
-        output_asm_insn (\"rex\", operands);
-      output_asm_insn (\"movaps\\t{%5, %4|%4, %5}\", operands);
+        output_asm_insn ("rex", operands);
+      output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
     }
-  (*targetm.asm_out.internal_label) (asm_out_file, \"L\",
-                            CODE_LABEL_NUMBER (operands[3]));
-  return \"\";
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+                                    CODE_LABEL_NUMBER (operands[3]));
+  return "";
 }
-  "
   [(set_attr "type" "other")
    (set_attr "length_immediate" "0")
    (set_attr "length_address" "0")
-   (set_attr "length" "135")
+   (set (attr "length")
+     (if_then_else
+       (eq (symbol_ref "TARGET_AVX") (const_int 0))
+       (const_string "34")
+       (const_string "42")))
    (set_attr "memory" "store")
    (set_attr "modrm" "0")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "DI")])
 
 (define_expand "prefetch"
    (set (match_scratch:SI 2 "=&r") (const_int 0))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR %%gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
+  "mov{l}\t{%%gs:%P1, %2|%2, DWORD PTR gs:%P1}\;mov{l}\t{%2, %0|%0, %2}\;xor{l}\t%2, %2"
   [(set_attr "type" "multi")])
 
 (define_insn "stack_tls_protect_set_di"
         system call would not have to trash the userspace segment register,
         which would be expensive */
      if (ix86_cmodel != CM_KERNEL)
-        return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR %%fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+        return "mov{q}\t{%%fs:%P1, %2|%2, QWORD PTR fs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
      else
-        return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR %%gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
+        return "mov{q}\t{%%gs:%P1, %2|%2, QWORD PTR gs:%P1}\;mov{q}\t{%2, %0|%0, %2}\;xor{l}\t%k2, %k2";
   }
   [(set_attr "type" "multi")])
 
                    UNSPEC_SP_TLS_TEST))
    (clobber (match_scratch:SI 3 "=r"))]
   ""
-  "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR %%gs:%P2}"
+  "mov{l}\t{%1, %3|%3, %1}\;xor{l}\t{%%gs:%P2, %3|%3, DWORD PTR gs:%P2}"
   [(set_attr "type" "multi")])
 
 (define_insn "stack_tls_protect_test_di"
         system call would not have to trash the userspace segment register,
         which would be expensive */
      if (ix86_cmodel != CM_KERNEL)
-        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR %%fs:%P2}";
+        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%fs:%P2, %3|%3, QWORD PTR fs:%P2}";
      else
-        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR %%gs:%P2}";
+        return "mov{q}\t{%1, %3|%3, %1}\;xor{q}\t{%%gs:%P2, %3|%3, QWORD PTR gs:%P2}";
   }
   [(set_attr "type" "multi")])