OSDN Git Service

* config/i386/i386.md (*ashr<mode>3_cconly): Fix wrong mode of
[pf3gnuchains/gcc-fork.git] / gcc / config / i386 / i386.md
index 8ff8643..a6bc762 100644 (file)
@@ -85,6 +85,7 @@
    (UNSPEC_SET_RIP             16)
    (UNSPEC_SET_GOT_OFFSET      17)
    (UNSPEC_MEMORY_BLOCKAGE     18)
+   (UNSPEC_SSE_PROLOGUE_SAVE_LOW 19)
 
    ; TLS support
    (UNSPEC_TP                  20)
 \f
 ;; Processor type.
 (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,atom,
-                   generic64,amdfam10"
+                   generic64,amdfam10,bdver1"
   (const (symbol_ref "ix86_schedule")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
 (define_code_iterator maxmin [smax smin umax umin])
 
 ;; Base name for integer and FP insn mnemonic
-(define_code_attr maxminiprefix [(smax "maxs") (smin "mins")
-                                (umax "maxu") (umin "minu")])
-(define_code_attr maxminfprefix [(smax "max") (smin "min")])
+(define_code_attr maxmin_int [(smax "maxs") (smin "mins")
+                             (umax "maxu") (umin "minu")])
+(define_code_attr maxmin_float [(smax "max") (smin "min")])
 
 ;; Mapping of logic operators
 (define_code_iterator any_logic [and ior xor])
 (define_code_iterator any_or [ior xor])
 
 ;; Base name for insn mnemonic.
-(define_code_attr logicprefix [(and "and") (ior "or") (xor "xor")])
+(define_code_attr logic [(and "and") (ior "or") (xor "xor")])
+
+;; Mapping of shift-right operators
+(define_code_iterator any_shiftrt [lshiftrt ashiftrt])
+
+;; Base name for define_insn
+(define_code_attr shiftrt_insn [(lshiftrt "lshr") (ashiftrt "ashr")])
+
+;; Base name for insn mnemonic.
+(define_code_attr shiftrt [(lshiftrt "shr") (ashiftrt "sar")])
+
+;; Mapping of rotate operators
+(define_code_iterator any_rotate [rotate rotatert])
+
+;; Base name for define_insn
+(define_code_attr rotate_insn [(rotate "rotl") (rotatert "rotr")])
+
+;; Base name for insn mnemonic.
+(define_code_attr rotate [(rotate "rol") (rotatert "ror")])
 
 ;; Mapping of abs neg operators
 (define_code_iterator absneg [abs neg])
 
 ;; Base name for x87 insn mnemonic.
-(define_code_attr absnegprefix [(abs "abs") (neg "chs")])
+(define_code_attr absneg_mnemonic [(abs "abs") (neg "chs")])
 
 ;; Used in signed and unsigned widening multiplications.
 (define_code_iterator any_extend [sign_extend zero_extend])
                            (HI "TARGET_HIMODE_MATH")
                            SI (DI "TARGET_64BIT")])
 
+;; Math-dependant single word integer modes without DImode.
+(define_mode_iterator SWIM124 [(QI "TARGET_QIMODE_MATH")
+                              (HI "TARGET_HIMODE_MATH")
+                              SI])
+
 ;; Math-dependant single word integer modes without QImode.
 (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH")
                               SI (DI "TARGET_64BIT")])
    && !x86_64_immediate_operand (operands[1], DImode) && 1"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
-  "split_di (&operands[1], 1, &operands[2], &operands[3]);
-   operands[1] = gen_lowpart (DImode, operands[2]);
-   operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
-                                                   GEN_INT (4)));
-  ")
+{
+  split_di (&operands[1], 1, &operands[2], &operands[3]);
+
+  operands[1] = gen_lowpart (DImode, operands[2]);
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+                                                  GEN_INT (4)));
+})
 
 (define_split
   [(set (match_operand:DI 0 "push_operand" "")
    && !x86_64_immediate_operand (operands[1], DImode)"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
-  "split_di (&operands[1], 1, &operands[2], &operands[3]);
-   operands[1] = gen_lowpart (DImode, operands[2]);
-   operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
-                                                   GEN_INT (4)));
-  ")
+{
+  split_di (&operands[1], 1, &operands[2], &operands[3]);
+
+  operands[1] = gen_lowpart (DImode, operands[2]);
+  operands[2] = gen_rtx_MEM (SImode, gen_rtx_PLUS (DImode, stack_pointer_rtx,
+                                                  GEN_INT (4)));
+})
 
 (define_insn "*pushdi2_prologue_rex64"
   [(set (match_operand:DI 0 "push_operand" "=<")
        case MODE_V4SF:
          return "%vxorps\t%0, %d0";
        case MODE_V2DF:
-         return "%vxorpd\t%0, %d0";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "%vxorps\t%0, %d0";
+         else
+           return "%vxorpd\t%0, %d0";
        case MODE_TI:
-         return "%vpxor\t%0, %d0";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "%vxorps\t%0, %d0";
+         else
+           return "%vpxor\t%0, %d0";
        default:
          gcc_unreachable ();
        }
        case MODE_V4SF:
          return "%vmovaps\t{%1, %0|%0, %1}";
        case MODE_V2DF:
-         return "%vmovapd\t{%1, %0|%0, %1}";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "%vmovaps\t{%1, %0|%0, %1}";
+         else
+           return "%vmovapd\t{%1, %0|%0, %1}";
        case MODE_TI:
-         return "%vmovdqa\t{%1, %0|%0, %1}";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "%vmovaps\t{%1, %0|%0, %1}";
+         else
+           return "%vmovdqa\t{%1, %0|%0, %1}";
        case MODE_DI:
          return "%vmovq\t{%1, %0|%0, %1}";
        case MODE_DF:
        case MODE_V4SF:
          return "%vxorps\t%0, %d0";
        case MODE_V2DF:
-         return "%vxorpd\t%0, %d0";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "%vxorps\t%0, %d0";
+         else
+           return "%vxorpd\t%0, %d0";
        case MODE_TI:
-         return "%vpxor\t%0, %d0";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "%vxorps\t%0, %d0";
+         else
+           return "%vpxor\t%0, %d0";
        default:
          gcc_unreachable ();
        }
        case MODE_V4SF:
          return "%vmovaps\t{%1, %0|%0, %1}";
        case MODE_V2DF:
-         return "%vmovapd\t{%1, %0|%0, %1}";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "%vmovaps\t{%1, %0|%0, %1}";
+         else
+           return "%vmovapd\t{%1, %0|%0, %1}";
        case MODE_TI:
-         return "%vmovdqa\t{%1, %0|%0, %1}";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "%vmovaps\t{%1, %0|%0, %1}";
+         else
+           return "%vmovdqa\t{%1, %0|%0, %1}";
        case MODE_DI:
          return "%vmovq\t{%1, %0|%0, %1}";
        case MODE_DF:
        case MODE_V4SF:
          return "xorps\t%0, %0";
        case MODE_V2DF:
-         return "xorpd\t%0, %0";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "xorps\t%0, %0";
+         else
+           return "xorpd\t%0, %0";
        case MODE_TI:
-         return "pxor\t%0, %0";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "xorps\t%0, %0";
+         else
+           return "pxor\t%0, %0";
        default:
          gcc_unreachable ();
        }
        case MODE_V4SF:
          return "movaps\t{%1, %0|%0, %1}";
        case MODE_V2DF:
-         return "movapd\t{%1, %0|%0, %1}";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "movaps\t{%1, %0|%0, %1}";
+         else
+           return "movapd\t{%1, %0|%0, %1}";
        case MODE_TI:
-         return "movdqa\t{%1, %0|%0, %1}";
+         if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
+           return "movaps\t{%1, %0|%0, %1}";
+         else
+           return "movdqa\t{%1, %0|%0, %1}";
        case MODE_DI:
          return "movq\t{%1, %0|%0, %1}";
        case MODE_DF:
 
   /* Generate a cltd if possible and doing so it profitable.  */
   if ((optimize_function_for_size_p (cfun) || TARGET_USE_CLTD)
-      && true_regnum (operands[3]) == AX_REG)
+      && true_regnum (operands[3]) == AX_REG
+      && true_regnum (operands[4]) == DX_REG)
     {
       emit_insn (gen_ashrsi3_cvt (operands[4], operands[3], GEN_INT (31)));
       DONE;
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
-  "&& reload_completed"
+  "reload_completed"
   [(parallel [(set (match_dup 1)
                   (ashiftrt:SWIM248 (match_dup 4) (match_dup 5)))
              (clobber (reg:CC FLAGS_REG))])
              (use (match_dup 1))
              (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
 
   if (<MODE>mode != HImode
       && (optimize_function_for_size_p (cfun) || TARGET_USE_CLTD))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "#"
-  "&& reload_completed"
+  "reload_completed"
   [(set (match_dup 1) (const_int 0))
    (parallel [(set (match_dup 0)
                   (udiv:SWIM248 (match_dup 2) (match_dup 3)))
         (match_operand:SWI248 2 "<general_operand>" "<g>,r<i>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<logicprefix>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, QImode, operands)"
   "@
-   <logicprefix>{b}\t{%2, %0|%0, %2}
-   <logicprefix>{b}\t{%2, %0|%0, %2}
-   <logicprefix>{l}\t{%k2, %k0|%k0, %k2}"
+   <logic>{b}\t{%2, %0|%0, %2}
+   <logic>{b}\t{%2, %0|%0, %2}
+   <logic>{l}\t{%k2, %k0|%k0, %k2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI,QI,SI")])
 
                    (match_operand:SI 2 "general_operand" "g"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logicprefix>{l}\t{%2, %k0|%k0, %2}"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
         (match_operand:DI 2 "x86_64_zext_immediate_operand" "Z")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logicprefix>{l}\t{%2, %k0|%k0, %2}"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
    (clobber (reg:CC FLAGS_REG))]
   "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "<logicprefix>{b}\t{%1, %0|%0, %1}"
+  "<logic>{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "mode" "QI")])
 
        (any_or:SWI (match_dup 1) (match_dup 2)))]
   "ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<logicprefix>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
        (zero_extend:DI (any_or:SI (match_dup 1) (match_dup 2))))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logicprefix>{l}\t{%2, %k0|%k0, %2}"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
        (any_or:DI (zero_extend:DI (match_dup 1)) (match_dup 2)))]
   "TARGET_64BIT && ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, SImode, operands)"
-  "<logicprefix>{l}\t{%2, %k0|%k0, %2}"
+  "<logic>{l}\t{%2, %k0|%k0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "SI")])
 
   "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
    && ix86_match_ccmode (insn, CCNOmode)
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "<logicprefix>{b}\t{%1, %0|%0, %1}"
+  "<logic>{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "mode" "QI")])
 
    (clobber (match_scratch:SWI 0 "=<r>"))]
   "ix86_match_ccmode (insn, CCNOmode)
    && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "<logicprefix>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
          (match_operand 2 "const_int_operand" "n")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
-  "<logicprefix>{b}\t{%2, %h0|%h0, %2}"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "1")
    (set_attr "modrm" "1")
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT
    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
-  "<logicprefix>{b}\t{%2, %h0|%h0, %2}"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "QI")])
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT
    && (!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))"
-  "<logicprefix>{b}\t{%2, %h0|%h0, %2}"
+  "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "QI")])
                           (const_int 8))))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
-  "<logicprefix>{b}\t{%h2, %h0|%h0, %h2}"
+  "<logic>{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "length_immediate" "0")
    (set_attr "mode" "QI")])
   "TARGET_80387
    && (reload_completed
        || !(SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))"
-  "f<absnegprefix>"
+  "f<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "<MODE>")])
 
        (absneg:DF (float_extend:DF
                     (match_operand:SF 1 "register_operand" "0"))))]
   "TARGET_80387 && (!TARGET_SSE_MATH || TARGET_MIX_SSE_I387)"
-  "f<absnegprefix>"
+  "f<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "DF")])
 
        (absneg:XF (float_extend:XF
                     (match_operand:SF 1 "register_operand" "0"))))]
   "TARGET_80387"
-  "f<absnegprefix>"
+  "f<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "XF")])
 
 (define_insn "*<code>extenddfxf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
        (absneg:XF (float_extend:XF
-                     (match_operand:DF 1 "register_operand" "0"))))]
+                    (match_operand:DF 1 "register_operand" "0"))))]
   "TARGET_80387"
-  "f<absnegprefix>"
+  "f<absneg_mnemonic>"
   [(set_attr "type" "fsgn")
    (set_attr "mode" "XF")])
 
                   (zero_extend:DI (xor:SI (match_dup 3) (const_int -1))))])]
   "")
 \f
-;; Arithmetic shift instructions
+;; Shift instructions
 
 ;; DImode shifts are implemented using the i386 "shift double" opcode,
 ;; which is written as "sh[lr]d[lw] imm,reg,reg/mem".  If the shift count
 {
   switch (get_attr_type (insn))
     {
+    case TYPE_LEA:
+      return "#";
+
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       gcc_assert (rtx_equal_p (operands[0], operands[1]));
 {
   switch (get_attr_type (insn))
     {
+    case TYPE_LEA:
+      return "#";
+
     case TYPE_ALU:
       gcc_assert (operands[2] == const1_rtx);
       return "add{l}\t%k0, %k0";
       gcc_assert (operands[2] == const1_rtx);
       return "add{w}\t%0, %0";
 
+    case TYPE_ALU:
+      gcc_assert (operands[2] == const1_rtx);
+      return "add{w}\t%0, %0";
+
     default:
       if (REG_P (operands[2]))
        return "sal{w}\t{%b2, %0|%0, %b2}";
 \f
 ;; Rotate instructions
 
-(define_expand "rotldi3"
+(define_expand "<rotate_insn>ti3"
+  [(set (match_operand:TI 0 "register_operand" "")
+       (any_rotate:TI (match_operand:TI 1 "register_operand" "")
+                      (match_operand:QI 2 "nonmemory_operand" "")))]
+  "TARGET_64BIT"
+{
+  if (const_1_to_63_operand (operands[2], VOIDmode))
+    emit_insn (gen_ix86_<rotate_insn>ti3_doubleword
+               (operands[0], operands[1], operands[2]));
+  else
+    FAIL;
+
+  DONE;
+})
+
+(define_expand "<rotate_insn>di3"
   [(set (match_operand:DI 0 "shiftdi_operand" "")
-       (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))]
+       (any_rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
+                      (match_operand:QI 2 "nonmemory_operand" "")))]
  ""
 {
   if (TARGET_64BIT)
-    {
-      ix86_expand_binary_operator (ROTATE, DImode, operands);
-      DONE;
-    }
-  if (!const_1_to_31_operand (operands[2], VOIDmode))
+    ix86_expand_binary_operator (<CODE>, DImode, operands);
+  else if (const_1_to_31_operand (operands[2], VOIDmode))
+    emit_insn (gen_ix86_<rotate_insn>di3_doubleword
+               (operands[0], operands[1], operands[2]));
+  else
     FAIL;
-  emit_insn (gen_ix86_rotldi3 (operands[0], operands[1], operands[2]));
+
   DONE;
 })
 
-;; Implement rotation using two double-precision shift instructions
-;; and a scratch register.
-(define_insn_and_split "ix86_rotldi3"
- [(set (match_operand:DI 0 "register_operand" "=r")
-       (rotate:DI (match_operand:DI 1 "register_operand" "0")
-                  (match_operand:QI 2 "const_1_to_31_operand" "I")))
+(define_expand "<rotate_insn><mode>3"
+  [(set (match_operand:SWIM124 0 "nonimmediate_operand" "")
+       (any_rotate:SWIM124 (match_operand:SWIM124 1 "nonimmediate_operand" "")
+                           (match_operand:QI 2 "nonmemory_operand" "")))]
+  ""
+  "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
+
+;; Implement rotation using two double-precision
+;; shift instructions and a scratch register.
+
+(define_insn_and_split "ix86_rotl<dwi>3_doubleword"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+       (rotate:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
+                    (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
   (clobber (reg:CC FLAGS_REG))
-  (clobber (match_scratch:SI 3 "=&r"))]
- "!TARGET_64BIT"
+  (clobber (match_scratch:DWIH 3 "=&r"))]
  ""
- "&& reload_completed"
+ "#"
+ "reload_completed"
  [(set (match_dup 3) (match_dup 4))
   (parallel
    [(set (match_dup 4)
-         (ior:SI (ashift:SI (match_dup 4) (match_dup 2))
-                 (lshiftrt:SI (match_dup 5)
-                              (minus:QI (const_int 32) (match_dup 2)))))
+        (ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2))
+                  (lshiftrt:DWIH (match_dup 5)
+                                 (minus:QI (match_dup 6) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])
   (parallel
    [(set (match_dup 5)
-         (ior:SI (ashift:SI (match_dup 5) (match_dup 2))
-                 (lshiftrt:SI (match_dup 3)
-                              (minus:QI (const_int 32) (match_dup 2)))))
+        (ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2))
+                  (lshiftrt:DWIH (match_dup 3)
+                                 (minus:QI (match_dup 6) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])]
- "split_di (&operands[0], 1, &operands[4], &operands[5]);")
-
-(define_insn "*rotlsi3_1_one_bit_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-       (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                  (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, DImode, operands)"
-  "rol{q}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-(define_insn "*rotldi3_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
-       (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-                  (match_operand:QI 2 "nonmemory_operand" "e,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, DImode, operands)"
-  "@
-   rol{q}\t{%2, %0|%0, %2}
-   rol{q}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "DI")])
-
-(define_expand "rotlsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-       (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (ROTATE, SImode, operands); DONE;")
-
-(define_insn "*rotlsi3_1_one_bit"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-       (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-                  (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, SImode, operands)"
-  "rol{l}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotlsi3_1_one_bit_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (zero_extend:DI
-         (rotate:SI (match_operand:SI 1 "register_operand" "0")
-                    (match_operand:QI 2 "const1_operand" ""))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, SImode, operands)"
-  "rol{l}\t%k0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotlsi3_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
-       (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-                  (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, SImode, operands)"
-  "@
-   rol{l}\t{%2, %0|%0, %2}
-   rol{l}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (zero_extend:DI
-         (rotate:SI (match_operand:SI 1 "register_operand" "0,0")
-                    (match_operand:QI 2 "nonmemory_operand" "I,c"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATE, SImode, operands)"
-  "@
-   rol{l}\t{%2, %k0|%k0, %2}
-   rol{l}\t{%b2, %k0|%k0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "SI")])
-
-(define_expand "rotlhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-       (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (ROTATE, HImode, operands); DONE;")
-
-(define_insn "*rotlhi3_1_one_bit"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-       (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-                  (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, HImode, operands)"
-  "rol{w}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-(define_insn "*rotlhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
-       (rotate:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-                  (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, HImode, operands)"
-  "@
-   rol{w}\t{%2, %0|%0, %2}
-   rol{w}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "HI")])
-
-(define_split
- [(set (match_operand:HI 0 "register_operand" "")
-       (rotate:HI (match_dup 0) (const_int 8)))
-  (clobber (reg:CC FLAGS_REG))]
- "reload_completed"
- [(parallel [(set (strict_low_part (match_dup 0))
-                 (bswap:HI (match_dup 0)))
-            (clobber (reg:CC FLAGS_REG))])]
- "")
-
-(define_expand "rotlqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-       (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (ROTATE, QImode, operands); DONE;")
-
-(define_insn "*rotlqi3_1_one_bit_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
-       (rotate:QI (match_dup 0)
-                  (match_operand:QI 1 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
-  "rol{b}\t%0"
-  [(set_attr "type" "rotate1")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotlqi3_1_one_bit"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-       (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                  (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATE, QImode, operands)"
-  "rol{b}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotlqi3_1_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
-       (rotate:QI (match_dup 0)
-                  (match_operand:QI 1 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-   rol{b}\t{%1, %0|%0, %1}
-   rol{b}\t{%b1, %0|%0, %b1}"
-  [(set_attr "type" "rotate1")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotlqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
-       (rotate:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-                  (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATE, QImode, operands)"
-  "@
-   rol{b}\t{%2, %0|%0, %2}
-   rol{b}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "QI")])
-
-(define_expand "rotrdi3"
-  [(set (match_operand:DI 0 "shiftdi_operand" "")
-       (rotate:DI (match_operand:DI 1 "shiftdi_operand" "")
-                  (match_operand:QI 2 "nonmemory_operand" "")))]
- ""
 {
-  if (TARGET_64BIT)
-    {
-      ix86_expand_binary_operator (ROTATERT, DImode, operands);
-      DONE;
-    }
-  if (!const_1_to_31_operand (operands[2], VOIDmode))
-    FAIL;
-  emit_insn (gen_ix86_rotrdi3 (operands[0], operands[1], operands[2]));
-  DONE;
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+
+  split_<dwi> (&operands[0], 1, &operands[4], &operands[5]);
 })
 
-;; Implement rotation using two double-precision shift instructions
-;; and a scratch register.
-(define_insn_and_split "ix86_rotrdi3"
- [(set (match_operand:DI 0 "register_operand" "=r")
-       (rotatert:DI (match_operand:DI 1 "register_operand" "0")
-                    (match_operand:QI 2 "const_1_to_31_operand" "I")))
+(define_insn_and_split "ix86_rotr<dwi>3_doubleword"
+ [(set (match_operand:<DWI> 0 "register_operand" "=r")
+       (rotatert:<DWI> (match_operand:<DWI> 1 "register_operand" "0")
+                      (match_operand:QI 2 "<shift_immediate_operand>" "<S>")))
   (clobber (reg:CC FLAGS_REG))
-  (clobber (match_scratch:SI 3 "=&r"))]
- "!TARGET_64BIT"
+  (clobber (match_scratch:DWIH 3 "=&r"))]
  ""
- "&& reload_completed"
+ "#"
+ "reload_completed"
  [(set (match_dup 3) (match_dup 4))
   (parallel
    [(set (match_dup 4)
-         (ior:SI (ashiftrt:SI (match_dup 4) (match_dup 2))
-                 (ashift:SI (match_dup 5)
-                            (minus:QI (const_int 32) (match_dup 2)))))
+        (ior:DWIH (ashiftrt:DWIH (match_dup 4) (match_dup 2))
+                  (ashift:DWIH (match_dup 5)
+                               (minus:QI (match_dup 6) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])
   (parallel
    [(set (match_dup 5)
-         (ior:SI (ashiftrt:SI (match_dup 5) (match_dup 2))
-                 (ashift:SI (match_dup 3)
-                            (minus:QI (const_int 32) (match_dup 2)))))
+        (ior:DWIH (ashiftrt:DWIH (match_dup 5) (match_dup 2))
+                  (ashift:DWIH (match_dup 3)
+                               (minus:QI (match_dup 6) (match_dup 2)))))
     (clobber (reg:CC FLAGS_REG))])]
- "split_di (&operands[0], 1, &operands[4], &operands[5]);")
-
-(define_insn "*rotrdi3_1_one_bit_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
-       (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
-  "ror{q}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "DI")])
-
-(define_insn "*rotrdi3_1_rex64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,rm")
-       (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "0,0")
-                    (match_operand:QI 2 "nonmemory_operand" "J,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, DImode, operands)"
-  "@
-   ror{q}\t{%2, %0|%0, %2}
-   ror{q}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "DI")])
+{
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
 
-(define_expand "rotrsi3"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "")
-       (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))]
-  ""
-  "ix86_expand_binary_operator (ROTATERT, SImode, operands); DONE;")
+  split_<dwi> (&operands[0], 1, &operands[4], &operands[5]);
+})
 
-(define_insn "*rotrsi3_1_one_bit"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
-       (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const1_operand" "")))
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+       (any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
+                       (match_operand:QI 2 "nonmemory_operand" "c<S>")))
    (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
-  "ror{l}\t%0"
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
+  else
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
   [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+           (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+               (const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*rotrsi3_1_one_bit_zext"
+(define_insn "*<rotate_insn>si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
        (zero_extend:DI
-         (rotatert:SI (match_operand:SI 1 "register_operand" "0")
-                      (match_operand:QI 2 "const1_operand" ""))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
-  "ror{l}\t%k0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotrsi3_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm")
-       (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "0,0")
-                    (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, SImode, operands)"
-  "@
-   ror{l}\t{%2, %0|%0, %2}
-   ror{l}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "SI")])
-
-(define_insn "*rotrsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-       (zero_extend:DI
-         (rotatert:SI (match_operand:SI 1 "register_operand" "0,0")
-                      (match_operand:QI 2 "nonmemory_operand" "I,c"))))
+         (any_rotate:SI (match_operand:SI 1 "register_operand" "0")
+                        (match_operand:QI 2 "nonmemory_operand" "cI"))))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && ix86_binary_operator_ok (ROTATERT, SImode, operands)"
-  "@
-   ror{l}\t{%2, %k0|%k0, %2}
-   ror{l}\t{%b2, %k0|%k0, %b2}"
+  "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
+{
+    if (operands[2] == const1_rtx
+       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{l}\t%k0";
+  else
+    return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+}
   [(set_attr "type" "rotate")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+           (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+               (const_int 0)))
+       (const_string "0")
+       (const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_expand "rotrhi3"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "")
-       (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_HIMODE_MATH"
-  "ix86_expand_binary_operator (ROTATERT, HImode, operands); DONE;")
-
-(define_insn "*rotrhi3_one_bit"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
-       (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, HImode, operands)"
-  "ror{w}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "HI")])
-
-(define_insn "*rotrhi3_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,rm")
-       (rotatert:HI (match_operand:HI 1 "nonimmediate_operand" "0,0")
-                    (match_operand:QI 2 "nonmemory_operand" "I,c")))
+(define_insn "*<rotate_insn>qi3_1_slp"
+  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+       (any_rotate:QI (match_dup 0)
+                      (match_operand:QI 1 "nonmemory_operand" "cI")))
    (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, HImode, operands)"
-  "@
-   ror{w}\t{%2, %0|%0, %2}
-   ror{w}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "HI")])
+  "(optimize_function_for_size_p (cfun)
+    || !TARGET_PARTIAL_REG_STALL
+    || (operands[1] == const1_rtx
+       && TARGET_SHIFT1))"
+{
+  if (operands[1] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{b}\t%0";
+  else
+    return "<rotate>{b}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "rotate1")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 1 "const1_operand" "")
+           (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+               (const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "QI")])
 
 (define_split
  [(set (match_operand:HI 0 "register_operand" "")
-       (rotatert:HI (match_dup 0) (const_int 8)))
+       (any_rotate:HI (match_dup 0) (const_int 8)))
   (clobber (reg:CC FLAGS_REG))]
- "reload_completed"
+ "reload_completed
+  && (TARGET_USE_XCHGB || optimize_function_for_size_p (cfun))"
  [(parallel [(set (strict_low_part (match_dup 0))
                  (bswap:HI (match_dup 0)))
             (clobber (reg:CC FLAGS_REG))])]
  "")
-
-(define_expand "rotrqi3"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "")
-       (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "")
-                    (match_operand:QI 2 "nonmemory_operand" "")))]
-  "TARGET_QIMODE_MATH"
-  "ix86_expand_binary_operator (ROTATERT, QImode, operands); DONE;")
-
-(define_insn "*rotrqi3_1_one_bit"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm")
-       (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0")
-                    (match_operand:QI 2 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
-   && ix86_binary_operator_ok (ROTATERT, QImode, operands)"
-  "ror{b}\t%0"
-  [(set_attr "type" "rotate")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotrqi3_1_one_bit_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
-       (rotatert:QI (match_dup 0)
-                    (match_operand:QI 1 "const1_operand" "")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))"
-  "ror{b}\t%0"
-  [(set_attr "type" "rotate1")
-   (set_attr "length_immediate" "0")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotrqi3_1"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,qm")
-       (rotatert:QI (match_operand:QI 1 "nonimmediate_operand" "0,0")
-                    (match_operand:QI 2 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ROTATERT, QImode, operands)"
-  "@
-   ror{b}\t{%2, %0|%0, %2}
-   ror{b}\t{%b2, %0|%0, %b2}"
-  [(set_attr "type" "rotate")
-   (set_attr "mode" "QI")])
-
-(define_insn "*rotrqi3_1_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,qm"))
-       (rotatert:QI (match_dup 0)
-                    (match_operand:QI 1 "nonmemory_operand" "I,c")))
-   (clobber (reg:CC FLAGS_REG))]
-  "(! TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-   ror{b}\t{%1, %0|%0, %1}
-   ror{b}\t{%b1, %0|%0, %b1}"
-  [(set_attr "type" "rotate1")
-   (set_attr "mode" "QI")])
-\f
-;; Bit set / bit test instructions
+\f
+;; Bit set / bit test instructions
 
 (define_expand "extv"
   [(set (match_operand:SI 0 "register_operand" "")
   DONE;
 })
 
-(define_insn "*btdi_rex64"
-  [(set (reg:CCC FLAGS_REG)
-       (compare:CCC
-         (zero_extract:DI
-           (match_operand:DI 0 "register_operand" "r")
-           (const_int 1)
-           (match_operand:DI 1 "nonmemory_operand" "rN"))
-         (const_int 0)))]
-  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
-  "bt{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "DI")])
-
-(define_insn "*btsi"
+(define_insn "*bt<mode>"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
-         (zero_extract:SI
-           (match_operand:SI 0 "register_operand" "r")
+         (zero_extract:SWI48
+           (match_operand:SWI48 0 "register_operand" "r")
            (const_int 1)
-           (match_operand:SI 1 "nonmemory_operand" "rN"))
+           (match_operand:SWI48 1 "nonmemory_operand" "rN"))
          (const_int 0)))]
   "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
-  "bt{l}\t{%1, %0|%0, %1}"
+  "bt{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "prefix_0f" "1")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 \f
 ;; Store-flag instructions.
 
     FAIL;
 })
 
-;; zero_extend in SImode is correct, since this is what combine pass
-;; generates from shift insn with QImode operand.  Actually, the mode of
-;; operand 2 (bit offset operand) doesn't matter since bt insn takes
+;; zero_extend in SImode is correct also for DImode, since this is what combine
+;; pass generates from shift insn with QImode operand.  Actually, the mode
+;; of operand 2 (bit offset operand) doesn't matter since bt insn takes
 ;; appropriate modulo of the bit offset value.
 
-(define_insn_and_split "*jcc_btdi_rex64"
+(define_insn_and_split "*jcc_bt<mode>"
   [(set (pc)
        (if_then_else (match_operator 0 "bt_comparison_operator"
-                       [(zero_extract:DI
-                          (match_operand:DI 1 "register_operand" "r")
+                       [(zero_extract:SWI48
+                          (match_operand:SWI48 1 "register_operand" "r")
                           (const_int 1)
                           (zero_extend:SI
                             (match_operand:QI 2 "register_operand" "r")))
                      (label_ref (match_operand 3 "" ""))
                      (pc)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))"
+  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
   "#"
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
-         (zero_extract:DI
+         (zero_extract:SWI48
            (match_dup 1)
            (const_int 1)
            (match_dup 2))
                      (label_ref (match_dup 3))
                      (pc)))]
 {
-  operands[2] = simplify_gen_subreg (DImode, operands[2], QImode, 0);
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], QImode, 0);
 
   PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
 })
 
-;; avoid useless masking of bit offset operand
-(define_insn_and_split "*jcc_btdi_mask_rex64"
+;; Avoid useless masking of bit offset operand.  "and" in SImode is correct
+;; also for DImode, this is what combine produces.
+(define_insn_and_split "*jcc_bt<mode>_mask"
   [(set (pc)
        (if_then_else (match_operator 0 "bt_comparison_operator"
-                       [(zero_extract:DI
-                          (match_operand:DI 1 "register_operand" "r")
+                       [(zero_extract:SWI48
+                          (match_operand:SWI48 1 "register_operand" "r")
                           (const_int 1)
                           (and:SI
                             (match_operand:SI 2 "register_operand" "r")
                      (label_ref (match_operand 4 "" ""))
                      (pc)))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && (TARGET_USE_BT || optimize_function_for_size_p (cfun))
-   && (INTVAL (operands[3]) & 0x3f) == 0x3f"
+  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1"
   "#"
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
        (compare:CCC
-         (zero_extract:DI
+         (zero_extract:SWI48
            (match_dup 1)
            (const_int 1)
            (match_dup 2))
                      (label_ref (match_dup 4))
                      (pc)))]
 {
-  operands[2] = simplify_gen_subreg (DImode, operands[2], SImode, 0);
+  operands[2] = simplify_gen_subreg (<MODE>mode, operands[2], SImode, 0);
 
   PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
 })
 
-(define_insn_and_split "*jcc_btsi"
-  [(set (pc)
-       (if_then_else (match_operator 0 "bt_comparison_operator"
-                       [(zero_extract:SI
-                          (match_operand:SI 1 "register_operand" "r")
-                          (const_int 1)
-                          (zero_extend:SI
-                            (match_operand:QI 2 "register_operand" "r")))
-                        (const_int 0)])
-                     (label_ref (match_operand 3 "" ""))
-                     (pc)))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_USE_BT || optimize_function_for_size_p (cfun)"
-  "#"
-  "&& 1"
-  [(set (reg:CCC FLAGS_REG)
-       (compare:CCC
-         (zero_extract:SI
-           (match_dup 1)
-           (const_int 1)
-           (match_dup 2))
-         (const_int 0)))
-   (set (pc)
-       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
-                     (label_ref (match_dup 3))
-                     (pc)))]
-{
-  operands[2] = simplify_gen_subreg (SImode, operands[2], QImode, 0);
-
-  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
-})
-
-;; avoid useless masking of bit offset operand
-(define_insn_and_split "*jcc_btsi_mask"
-  [(set (pc)
-       (if_then_else (match_operator 0 "bt_comparison_operator"
-                       [(zero_extract:SI
-                          (match_operand:SI 1 "register_operand" "r")
-                          (const_int 1)
-                          (and:SI
-                            (match_operand:SI 2 "register_operand" "r")
-                            (match_operand:SI 3 "const_int_operand" "n")))])
-                     (label_ref (match_operand 4 "" ""))
-                     (pc)))
-   (clobber (reg:CC FLAGS_REG))]
-  "(TARGET_USE_BT || optimize_function_for_size_p (cfun))
-   && (INTVAL (operands[3]) & 0x1f) == 0x1f"
-  "#"
-  "&& 1"
-  [(set (reg:CCC FLAGS_REG)
-       (compare:CCC
-         (zero_extract:SI
-           (match_dup 1)
-           (const_int 1)
-           (match_dup 2))
-         (const_int 0)))
-   (set (pc)
-       (if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
-                     (label_ref (match_dup 4))
-                     (pc)))]
-  "PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));")
-
 (define_insn_and_split "*jcc_btsi_1"
   [(set (pc)
        (if_then_else (match_operator 0 "bt_comparison_operator"
   [(const_int 0)]
 {
   operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]);
+
   ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
                        operands[3], operands[7],
                        operands[4], operands[5], operands[6], NULL_RTX);
 {
   operands[7] = ix86_force_to_memory (GET_MODE (operands[2]), operands[2]);
   operands[7] = gen_rtx_FLOAT (GET_MODE (operands[1]), operands[7]);
+
   ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])),
                        operands[3], operands[7],
                        operands[4], operands[5], operands[6], operands[2]);
   "leave"
   [(set_attr "type" "leave")])
 \f
-(define_expand "ffssi2"
-  [(parallel
-     [(set (match_operand:SI 0 "register_operand" "")
-          (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
-      (clobber (match_scratch:SI 2 ""))
-      (clobber (reg:CC FLAGS_REG))])]
-  ""
-{
-  if (TARGET_CMOVE)
-    {
-      emit_insn (gen_ffs_cmove (operands[0], operands[1]));
-      DONE;
-    }
-})
+;; Bit manipulation instructions.
 
-(define_expand "ffs_cmove"
+(define_expand "ffs<mode>2"
   [(set (match_dup 2) (const_int -1))
    (parallel [(set (reg:CCZ FLAGS_REG)
-                  (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "")
-                               (const_int 0)))
-             (set (match_operand:SI 0 "register_operand" "")
-                  (ctz:SI (match_dup 1)))])
-   (set (match_dup 0) (if_then_else:SI
+                  (compare:CCZ
+                    (match_operand:SWI48 1 "nonimmediate_operand" "")
+                    (const_int 0)))
+             (set (match_operand:SWI48 0 "register_operand" "")
+                  (ctz:SWI48 (match_dup 1)))])
+   (set (match_dup 0) (if_then_else:SWI48
                        (eq (reg:CCZ FLAGS_REG) (const_int 0))
                        (match_dup 2)
                        (match_dup 0)))
-   (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
+   (parallel [(set (match_dup 0) (plus:SWI48 (match_dup 0) (const_int 1)))
              (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_CMOVE"
-  "operands[2] = gen_reg_rtx (SImode);")
+  ""
+{
+  if (<MODE>mode == SImode && !TARGET_CMOVE)
+    {
+      emit_insn (gen_ffssi2_no_cmove (operands[0], operands [1]));
+      DONE;
+    }
+  operands[2] = gen_reg_rtx (<MODE>mode);
+})
 
-(define_insn_and_split "*ffs_no_cmove"
+(define_insn_and_split "ffssi2_no_cmove"
   [(set (match_operand:SI 0 "register_operand" "=r")
        (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
    (clobber (match_scratch:SI 2 "=&q"))
   ix86_expand_clear (operands[2]);
 })
 
-(define_insn "*ffssi_1"
+(define_insn "*ffs<mode>_1"
   [(set (reg:CCZ FLAGS_REG)
-       (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
+       (compare:CCZ (match_operand:SWI48 1 "nonimmediate_operand" "rm")
                     (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-       (ctz:SI (match_dup 1)))]
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+       (ctz:SWI48 (match_dup 1)))]
   ""
-  "bsf{l}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "SI")])
-
-(define_expand "ffsdi2"
-  [(set (match_dup 2) (const_int -1))
-   (parallel [(set (reg:CCZ FLAGS_REG)
-                  (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "")
-                               (const_int 0)))
-             (set (match_operand:DI 0 "register_operand" "")
-                  (ctz:DI (match_dup 1)))])
-   (set (match_dup 0) (if_then_else:DI
-                       (eq (reg:CCZ FLAGS_REG) (const_int 0))
-                       (match_dup 2)
-                       (match_dup 0)))
-   (parallel [(set (match_dup 0) (plus:DI (match_dup 0) (const_int 1)))
-             (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-  "operands[2] = gen_reg_rtx (DImode);")
-
-(define_insn "*ffsdi_1"
-  [(set (reg:CCZ FLAGS_REG)
-       (compare:CCZ (match_operand:DI 1 "nonimmediate_operand" "rm")
-                    (const_int 0)))
-   (set (match_operand:DI 0 "register_operand" "=r")
-       (ctz:DI (match_dup 1)))]
-  "TARGET_64BIT"
-  "bsf{q}\t{%1, %0|%0, %1}"
+  "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "prefix_0f" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "ctzsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+(define_insn "ctz<mode>2"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+       (ctz:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
   ""
-  "bsf{l}\t{%1, %0|%0, %1}"
+  "bsf{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "type" "alu1")
    (set_attr "prefix_0f" "1")
-   (set_attr "mode" "SI")])
-
-(define_insn "ctzdi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (ctz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "bsf{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_expand "clzsi2"
+(define_expand "clz<mode>2"
   [(parallel
-     [(set (match_operand:SI 0 "register_operand" "")
-          (minus:SI (const_int 31)
-                    (clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
+     [(set (match_operand:SWI248 0 "register_operand" "")
+          (minus:SWI248
+            (match_dup 2)
+            (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" ""))))
       (clobber (reg:CC FLAGS_REG))])
    (parallel
-     [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
+     [(set (match_dup 0) (xor:SWI248 (match_dup 0) (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
 {
   if (TARGET_ABM)
     {
-      emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+      emit_insn (gen_clz<mode>2_abm (operands[0], operands[1]));
       DONE;
     }
+  operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1);
 })
 
-(define_insn "clzsi2_abm"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
+(define_insn "clz<mode>2_abm"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+       (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_ABM"
-  "lzcnt{l}\t{%1, %0|%0, %1}"
+  "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
   [(set_attr "prefix_rep" "1")
    (set_attr "type" "bitmanip")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bsr_rex64"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (minus:DI (const_int 63)
+                 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  "bsr{q}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
 
 (define_insn "bsr"
   [(set (match_operand:SI 0 "register_operand" "=r")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")])
 
+(define_insn "*bsrhi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+       (minus:HI (const_int 15)
+                 (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{w}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "HI")])
+
 (define_insn "popcount<mode>2"
   [(set (match_operand:SWI248 0 "register_operand" "=r")
        (popcount:SWI248
 #if TARGET_MACHO
   return "popcnt\t{%1, %0|%0, %1}";
 #else
-  return "popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+  return "popcnt{l}\t{%1, %0|%0, %1}";
 #endif
 }
   [(set_attr "prefix_rep" "1")
    (set_attr "type" "bitmanip")
    (set_attr "mode" "SI")])
 
-(define_expand "bswapsi2"
-  [(set (match_operand:SI 0 "register_operand" "")
-       (bswap:SI (match_operand:SI 1 "register_operand" "")))]
+(define_expand "bswap<mode>2"
+  [(set (match_operand:SWI48 0 "register_operand" "")
+       (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "")))]
   ""
 {
-  if (!(TARGET_BSWAP || TARGET_MOVBE))
+  if (<MODE>mode == SImode && !(TARGET_BSWAP || TARGET_MOVBE))
     {
       rtx x = operands[0];
 
     }
 })
 
-(define_insn "*bswapsi_movbe"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,m")
-       (bswap:SI (match_operand:SI 1 "nonimmediate_operand" "0,m,r")))]
-  "TARGET_MOVBE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+(define_insn "*bswap<mode>2_movbe"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r,r,m")
+       (bswap:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,m,r")))]
+  "TARGET_MOVBE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     bswap\t%0
     movbe\t{%1, %0|%0, %1}
     movbe\t{%1, %0|%0, %1}"
-  [(set_attr "type" "*,imov,imov")
-   (set_attr "modrm" "*,1,1")
-   (set_attr "prefix_0f" "1")
+  [(set_attr "type" "bitmanip,imov,imov")
+   (set_attr "modrm" "0,1,1")
+   (set_attr "prefix_0f" "*,1,1")
    (set_attr "prefix_extra" "*,1,1")
-   (set_attr "length" "2,*,*")
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "*bswapsi_1"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (bswap:SI (match_operand:SI 1 "register_operand" "0")))]
+(define_insn "*bswap<mode>2_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+       (bswap:SWI48 (match_operand:SWI48 1 "register_operand" "0")))]
   "TARGET_BSWAP"
   "bswap\t%0"
-  [(set_attr "prefix_0f" "1")
-   (set_attr "length" "2")])
+  [(set_attr "type" "bitmanip")
+   (set_attr "modrm" "0")
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*bswaphi_lowpart_1"
   [(set (strict_low_part (match_operand:HI 0 "register_operand" "+Q,r"))
   [(set_attr "length" "4")
    (set_attr "mode" "HI")])
 
-(define_expand "bswapdi2"
-  [(set (match_operand:DI 0 "register_operand" "")
-       (bswap:DI (match_operand:DI 1 "register_operand" "")))]
-  "TARGET_64BIT"
-  "")
-
-(define_insn "*bswapdi_movbe"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m")
-       (bswap:DI (match_operand:DI 1 "nonimmediate_operand" "0,m,r")))]
-  "TARGET_64BIT && TARGET_MOVBE
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-    bswap\t%0
-    movbe\t{%1, %0|%0, %1}
-    movbe\t{%1, %0|%0, %1}"
-  [(set_attr "type" "*,imov,imov")
-   (set_attr "modrm" "*,1,1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "prefix_extra" "*,1,1")
-   (set_attr "length" "3,*,*")
-   (set_attr "mode" "DI")])
-
-(define_insn "*bswapdi_1"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (bswap:DI (match_operand:DI 1 "register_operand" "0")))]
-  "TARGET_64BIT"
-  "bswap\t%0"
-  [(set_attr "prefix_0f" "1")
-   (set_attr "length" "3")])
-
-(define_expand "clzdi2"
-  [(parallel
-     [(set (match_operand:DI 0 "register_operand" "")
-          (minus:DI (const_int 63)
-                    (clz:DI (match_operand:DI 1 "nonimmediate_operand" ""))))
-      (clobber (reg:CC FLAGS_REG))])
-   (parallel
-     [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
-      (clobber (reg:CC FLAGS_REG))])]
-  "TARGET_64BIT"
-{
-  if (TARGET_ABM)
-    {
-      emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_insn "clzdi2_abm"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_ABM"
-  "lzcnt{q}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "DI")])
-
-(define_insn "bsr_rex64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-       (minus:DI (const_int 63)
-                 (clz:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT"
-  "bsr{q}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "DI")])
-
-(define_expand "clzhi2"
-  [(parallel
-     [(set (match_operand:HI 0 "register_operand" "")
-          (minus:HI (const_int 15)
-                    (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
-      (clobber (reg:CC FLAGS_REG))])
-   (parallel
-     [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
-      (clobber (reg:CC FLAGS_REG))])]
-  ""
-{
-  if (TARGET_ABM)
-    {
-      emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
-      DONE;
-    }
-})
-
-(define_insn "clzhi2_abm"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-       (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm")))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_ABM"
-  "lzcnt{w}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_rep" "1")
-   (set_attr "type" "bitmanip")
-   (set_attr "mode" "HI")])
-
-(define_insn "*bsrhi"
-  [(set (match_operand:HI 0 "register_operand" "=r")
-       (minus:HI (const_int 15)
-                 (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
-   (clobber (reg:CC FLAGS_REG))]
-  ""
-  "bsr{w}\t{%1, %0|%0, %1}"
-  [(set_attr "type" "alu1")
-   (set_attr "prefix_0f" "1")
-   (set_attr "mode" "HI")])
-
 (define_expand "paritydi2"
   [(set (match_operand:DI 0 "register_operand" "")
        (parity:DI (match_operand:DI 1 "register_operand" "")))]
   DONE;
 })
 
+(define_expand "paritysi2"
+  [(set (match_operand:SI 0 "register_operand" "")
+       (parity:SI (match_operand:SI 1 "register_operand" "")))]
+  "! TARGET_POPCNT"
+{
+  rtx scratch = gen_reg_rtx (QImode);
+  rtx cond;
+
+  emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));
+
+  cond = gen_rtx_fmt_ee (ORDERED, QImode,
+                        gen_rtx_REG (CCmode, FLAGS_REG),
+                        const0_rtx);
+  emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
+
+  emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
+  DONE;
+})
+
 (define_insn_and_split "paritydi2_cmp"
   [(set (reg:CC FLAGS_REG)
        (parity:CC (match_operand:DI 3 "register_operand" "0")))
     operands[1] = gen_highpart (SImode, operands[3]);
 })
 
-(define_expand "paritysi2"
-  [(set (match_operand:SI 0 "register_operand" "")
-       (parity:SI (match_operand:SI 1 "register_operand" "")))]
-  "! TARGET_POPCNT"
-{
-  rtx scratch = gen_reg_rtx (QImode);
-  rtx cond;
-
-  emit_insn (gen_paritysi2_cmp (NULL_RTX, NULL_RTX, operands[1]));
-
-  cond = gen_rtx_fmt_ee (ORDERED, QImode,
-                        gen_rtx_REG (CCmode, FLAGS_REG),
-                        const0_rtx);
-  emit_insn (gen_rtx_SET (VOIDmode, scratch, cond));
-
-  emit_insn (gen_zero_extendqisi2 (operands[0], scratch));
-  DONE;
-})
-
 (define_insn_and_split "paritysi2_cmp"
   [(set (reg:CC FLAGS_REG)
        (parity:CC (match_operand:SI 2 "register_operand" "0")))
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "TARGET_64BIT"
-  "rep movsq"
+  "rep{%;} movsq"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!TARGET_64BIT"
-  "rep movs{l|d}"
+  "rep{%;} movs{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "TARGET_64BIT"
-  "rep movs{l|d}"
+  "rep{%;} movs{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!TARGET_64BIT"
-  "rep movsb"
+  "rep{%;} movsb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
        (mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "TARGET_64BIT"
-  "rep movsb"
+  "rep{%;} movsb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
    (use (match_operand:DI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "TARGET_64BIT"
-  "rep stosq"
+  "rep{%;} stosq"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (use (match_operand:SI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!TARGET_64BIT"
-  "rep stos{l|d}"
+  "rep{%;} stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (use (match_operand:SI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "TARGET_64BIT"
-  "rep stos{l|d}"
+  "rep{%;} stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!TARGET_64BIT"
-  "rep stosb"
+  "rep{%;} stosb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "TARGET_64BIT"
-  "rep stosb"
+  "rep{%;} stosb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "store")
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (match_operand:SI 2 "register_operand" "=c"))]
   "!TARGET_64BIT"
-  "repz cmpsb"
+  "repz{%;} cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (match_operand:DI 2 "register_operand" "=c"))]
   "TARGET_64BIT"
-  "repz cmpsb"
+  "repz{%;} cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rex" "0")
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (match_operand:SI 2 "register_operand" "=c"))]
   "!TARGET_64BIT"
-  "repz cmpsb"
+  "repz{%;} cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (match_operand:DI 2 "register_operand" "=c"))]
   "TARGET_64BIT"
-  "repz cmpsb"
+  "repz{%;} cmpsb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rex" "0")
    (clobber (match_operand:SI 1 "register_operand" "=D"))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_64BIT"
-  "repnz scasb"
+  "repnz{%;} scasb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rep" "1")])
    (clobber (match_operand:DI 1 "register_operand" "=D"))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
-  "repnz scasb"
+  "repnz{%;} scasb"
   [(set_attr "type" "str")
    (set_attr "mode" "QI")
    (set_attr "prefix_rex" "0")
        (if_then_else:SI (match_op_dup 1 [(match_dup 4) (const_int 0)])
                      (match_dup 7)
                      (match_dup 8)))]
-  "split_di (&operands[2], 2, &operands[5], &operands[7]);
-   split_di (&operands[0], 1, &operands[2], &operands[3]);")
+{
+  split_di (&operands[2], 2, &operands[5], &operands[7]);
+  split_di (&operands[0], 1, &operands[2], &operands[3]);
+})
 
 (define_insn "*movxfcc_1"
   [(set (match_operand:XF 0 "register_operand" "=f,f")
          (match_operand:MODEF 1 "nonimmediate_operand" "%x")
          (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
   "AVX_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "v<maxminfprefix>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "v<maxmin_float>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseadd")
    (set_attr "prefix" "vex")
    (set_attr "mode" "<MODE>")])
          (match_operand:MODEF 1 "nonimmediate_operand" "%0")
          (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "<maxminfprefix>s<ssemodefsuffix>\t{%2, %0|%0, %2}"
+  "<maxmin_float>s<ssemodefsuffix>\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
                                (reg:DI XMM5_REG)
                                (reg:DI XMM6_REG)
                                (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
-             (use (match_operand:DI 1 "register_operand" ""))
+             (clobber (reg:CC FLAGS_REG))
+             (clobber (match_operand:DI 1 "register_operand" ""))
              (use (match_operand:DI 2 "immediate_operand" ""))
-             (use (label_ref:DI (match_operand 3 "" "")))])]
+             (use (label_ref:DI (match_operand 3 "" "")))
+             (clobber (match_operand:DI 4 "register_operand" ""))
+             (use (match_dup 1))])]
   "TARGET_64BIT"
   "")
 
-(define_insn "*sse_prologue_save_insn"
+;; Pre-reload version of prologue save.  Until after prologue generation we don't know
+;; what the size of save instruction will be.
+;; Operand 0+operand 6 is the memory save area
+;; Operand 1 is number of registers to save (will get overwritten to operand 5)
+;; Operand 2 is number of non-vaargs SSE arguments
+;; Operand 3 is label starting the save block
+;; Operand 4 is used for temporary computation of jump address
+(define_insn "*sse_prologue_save_insn1"
   [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
-                         (match_operand:DI 4 "const_int_operand" "n")))
+                         (match_operand:DI 6 "const_int_operand" "n")))
        (unspec:BLK [(reg:DI XMM0_REG)
                     (reg:DI XMM1_REG)
                     (reg:DI XMM2_REG)
                     (reg:DI XMM5_REG)
                     (reg:DI XMM6_REG)
                     (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
+   (clobber (reg:CC FLAGS_REG))
+   (clobber (match_operand:DI 1 "register_operand" "=r"))
+   (use (match_operand:DI 2 "const_int_operand" "i"))
+   (use (label_ref:DI (match_operand 3 "" "X")))
+   (clobber (match_operand:DI 4 "register_operand" "=&r"))
+   (use (match_operand:DI 5 "register_operand" "1"))]
+  "TARGET_64BIT
+   && INTVAL (operands[6]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
+   && INTVAL (operands[6]) + INTVAL (operands[2]) * 16 >= -128"
+  "#"
+  [(set_attr "type" "other")
+   (set_attr "memory" "store")
+   (set_attr "mode" "DI")])
+
+;; We know size of save instruction; expand the computation of jump address
+;; in the jumptable.
+(define_split
+  [(parallel [(set (match_operand:BLK 0 "" "")
+                   (unspec:BLK [(reg:DI XMM0_REG)
+                                (reg:DI XMM1_REG)
+                                (reg:DI XMM2_REG)
+                                (reg:DI XMM3_REG)
+                                (reg:DI XMM4_REG)
+                                (reg:DI XMM5_REG)
+                                (reg:DI XMM6_REG)
+                                (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE))
+              (clobber (reg:CC FLAGS_REG))
+              (clobber (match_operand:DI 1 "register_operand" ""))
+              (use (match_operand:DI 2 "const_int_operand" ""))
+              (use (match_operand 3 "" ""))
+              (clobber (match_operand:DI 4 "register_operand" ""))
+              (use (match_operand:DI 5 "register_operand" ""))])]
+  "reload_completed"
+  [(parallel [(set (match_dup 0)
+                  (unspec:BLK [(reg:DI XMM0_REG)
+                               (reg:DI XMM1_REG)
+                               (reg:DI XMM2_REG)
+                               (reg:DI XMM3_REG)
+                               (reg:DI XMM4_REG)
+                               (reg:DI XMM5_REG)
+                               (reg:DI XMM6_REG)
+                               (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
+             (use (match_dup 1))
+             (use (match_dup 2))
+             (use (match_dup 3))
+             (use (match_dup 5))])]
+{
+  /* Movaps is 4 bytes, AVX and movsd is 5 bytes.  */
+  int size = 4 + (TARGET_AVX || crtl->stack_alignment_needed < 128);
+
+  /* Compute address to jump to:
+     label - eax*size + nnamed_sse_arguments*size. */
+  if (size == 5)
+    emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+                           gen_rtx_PLUS
+                             (Pmode,
+                              gen_rtx_MULT (Pmode, operands[1],
+                                            GEN_INT (4)),
+                              operands[1])));
+  else  if (size == 4)
+    emit_insn (gen_rtx_SET (VOIDmode, operands[4],
+                           gen_rtx_MULT (Pmode, operands[1],
+                                         GEN_INT (4))));
+  else
+    gcc_unreachable ();
+  if (INTVAL (operands[2]))
+    emit_move_insn
+      (operands[1],
+       gen_rtx_CONST (DImode,
+                     gen_rtx_PLUS (DImode,
+                                   operands[3],
+                                   GEN_INT (INTVAL (operands[2])
+                                            * size))));
+  else
+    emit_move_insn (operands[1], operands[3]);
+  emit_insn (gen_subdi3 (operands[1], operands[1], operands[4]));
+  operands[5] = GEN_INT (size);
+})
+
+(define_insn "sse_prologue_save_insn"
+  [(set (mem:BLK (plus:DI (match_operand:DI 0 "register_operand" "R")
+                         (match_operand:DI 4 "const_int_operand" "n")))
+       (unspec:BLK [(reg:DI XMM0_REG)
+                    (reg:DI XMM1_REG)
+                    (reg:DI XMM2_REG)
+                    (reg:DI XMM3_REG)
+                    (reg:DI XMM4_REG)
+                    (reg:DI XMM5_REG)
+                    (reg:DI XMM6_REG)
+                    (reg:DI XMM7_REG)] UNSPEC_SSE_PROLOGUE_SAVE_LOW))
    (use (match_operand:DI 1 "register_operand" "r"))
    (use (match_operand:DI 2 "const_int_operand" "i"))
-   (use (label_ref:DI (match_operand 3 "" "X")))]
+   (use (label_ref:DI (match_operand 3 "" "X")))
+   (use (match_operand:DI 5 "const_int_operand" "i"))]
   "TARGET_64BIT
    && INTVAL (operands[4]) + X86_64_SSE_REGPARM_MAX * 16 - 16 < 128
    && INTVAL (operands[4]) + INTVAL (operands[2]) * 16 >= -128"
       PUT_MODE (operands[4], TImode);
       if (GET_CODE (XEXP (operands[0], 0)) != PLUS)
         output_asm_insn ("rex", operands);
-      output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
+      if (crtl->stack_alignment_needed < 128)
+        output_asm_insn ("%vmovsd\t{%5, %4|%4, %5}", operands);
+      else
+        output_asm_insn ("%vmovaps\t{%5, %4|%4, %5}", operands);
     }
   (*targetm.asm_out.internal_label) (asm_out_file, "L",
                                     CODE_LABEL_NUMBER (operands[3]));
   [(set_attr "type" "other")
    (set_attr "length_immediate" "0")
    (set_attr "length_address" "0")
+   ;; 2 bytes for jump and opernds[4] bytes for each save.
    (set (attr "length")
-     (if_then_else
-       (eq (symbol_ref "TARGET_AVX") (const_int 0))
-       (const_string "34")
-       (const_string "42")))
+     (plus (const_int 2)
+          (mult (symbol_ref ("INTVAL (operands[5])"))
+                (symbol_ref ("X86_64_SSE_REGPARM_MAX - INTVAL (operands[2])")))))
    (set_attr "memory" "store")
    (set_attr "modrm" "0")
    (set_attr "prefix" "maybe_vex")