;; GCC machine description for SSE instructions
-;; Copyright (C) 2005, 2006
+;; Copyright (C) 2005, 2006, 2007
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
(define_insn "*mov<mode>_internal"
[(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
(match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "TARGET_SSE
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
{
switch (which_alternative)
{
(const_string "V4SF")
(const_string "TI")))])
+;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
+;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
+;; from memory, we'd prefer to load the memory directly into the %xmm
+;; register. To facilitate this happy circumstance, this pattern won't
+;; split until after register allocation. If the 64-bit value didn't
+;; come from memory, this is the best we can do. This is much better
+;; than storing %edx:%eax into a stack temporary and loading an %xmm
+;; from there.
+
+(define_insn_and_split "movdi_to_sse"
+ [(parallel
+ [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
+ (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
+ (clobber (match_scratch:V4SI 2 "=&x,X"))])]
+ "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ switch (which_alternative)
+ {
+ case 0:
+ /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+ Assemble the 64-bit DImode value in an xmm register. */
+ emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 0)));
+ emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
+ gen_rtx_SUBREG (SImode, operands[1], 4)));
+ emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
+ break;
+
+ case 1:
+ emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ DONE;
+})
+
(define_expand "movv4sf"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "")
(match_operand:V4SF 1 "nonimmediate_operand" ""))]
(define_insn "*movv4sf_internal"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
(match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
- "TARGET_SSE"
+ "TARGET_SSE
+ && (register_operand (operands[0], V4SFmode)
+ || register_operand (operands[1], V4SFmode))"
{
switch (which_alternative)
{
(define_insn "*movv2df_internal"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
(match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
- "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "TARGET_SSE
+ && (register_operand (operands[0], V2DFmode)
+ || register_operand (operands[1], V2DFmode))"
{
switch (which_alternative)
{
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"movdqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssemov")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse_movntv4sf"
"TARGET_SSE2"
"movntdq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_movntsi"
"TARGET_SSE3"
"lddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "sse_vmaddv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (plus:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
(define_insn "sse_vmmulv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
- (mult:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (mult:V4SF (match_operand:V4SF 1 "register_operand" "0")
(match_operand:V4SF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
[(set_attr "type" "sse")
(set_attr "mode" "V4SF")])
-(define_insn "*sse_vmsmaxv4sf3_finite"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (smax:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (SMAX, V4SFmode, operands)"
- "maxss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "SF")])
-
(define_insn "sse_vmsmaxv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
[(set_attr "type" "sse")
(set_attr "mode" "V4SF")])
-(define_insn "*sse_vmsminv4sf3_finite"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_merge:V4SF
- (smin:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "%0")
- (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE && flag_finite_math_only
- && ix86_binary_operator_ok (SMIN, V4SFmode, operands)"
- "minss\t{%2, %0|%0, %2}"
- [(set_attr "type" "sse")
- (set_attr "mode" "SF")])
-
(define_insn "sse_vmsminv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
"TARGET_SSE3"
"addsubps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
(define_insn "sse3_haddv4sf3"
"TARGET_SSE3"
"haddps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
(define_insn "sse3_hsubv4sf3"
"TARGET_SSE3"
"hsubps\t{%2, %0|%0, %2}"
[(set_attr "type" "sseadd")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
(define_expand "reduc_splus_v4sf"
"cvtsi2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "sse_cvtsi2ssq"
"cvtsi2ssq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "sse_cvtss2si"
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
(define_insn "sse_cvtss2si_2"
"cvtss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
(define_insn "sse_cvtss2siq"
"cvtss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
(define_insn "sse_cvtss2siq_2"
"cvtss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
(define_insn "sse_cvttss2si"
"cvttss2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
(define_insn "sse_cvttss2siq"
"cvttss2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
(define_insn "sse2_cvtdq2ps"
"TARGET_SSE2"
"cvtdq2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V4SF")])
(define_insn "sse2_cvtps2dq"
[(set (match_operand:V4SI 0 "register_operand" "=x")
"TARGET_SSE2"
"cvtps2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_cvttps2dq"
"TARGET_SSE2"
"cvttps2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
"TARGET_SSE3"
"movshdup\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
(define_insn "sse3_movsldup"
"TARGET_SSE3"
"movsldup\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "V4SF")])
(define_expand "sse_shufps"
DONE;
})
-(define_insn "*vec_setv4sf_0"
- [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y ,m")
+(define_insn "vec_setv4sf_0"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
(vec_merge:V4SF
(vec_duplicate:V4SF
(match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
[(set_attr "type" "ssemov")
(set_attr "mode" "SF")])
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_sse4_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+ return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_INSERTPS))]
+ "TARGET_SSE4_1"
+ "insertps\t{%3, %2, %0|%0, %2, %3}";
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
(define_split
[(set (match_operand:V4SF 0 "memory_operand" "")
(vec_merge:V4SF
DONE;
})
+(define_insn "*sse4_1_extractps"
+ [(set (match_operand:SF 0 "register_operand" "=rm")
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "extractps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
(define_expand "vec_extractv4sf"
[(match_operand:SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")
(define_insn "sse2_vmaddv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
- (plus:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (plus:V2DF (match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
(define_insn "sse2_vmmulv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
- (mult:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (mult:V2DF (match_operand:V2DF 1 "register_operand" "0")
(match_operand:V2DF 2 "nonimmediate_operand" "xm"))
(match_dup 1)
(const_int 1)))]
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
-(define_insn "*sse2_vmsmaxv2df3_finite"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (smax:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE2 && flag_finite_math_only
- && ix86_binary_operator_ok (SMAX, V2DFmode, operands)"
- "maxsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
(define_insn "sse2_vmsmaxv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
-(define_insn "*sse2_vmsminv2df3_finite"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_merge:V2DF
- (smin:V2DF (match_operand:V2DF 1 "nonimmediate_operand" "%0")
- (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
- (match_dup 1)
- (const_int 1)))]
- "TARGET_SSE2 && flag_finite_math_only
- && ix86_binary_operator_ok (SMIN, V2DFmode, operands)"
- "minsd\t{%2, %0|%0, %2}"
- [(set_attr "type" "sseadd")
- (set_attr "mode" "DF")])
-
(define_insn "sse2_vmsminv2df3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(vec_merge:V2DF
"cvtpd2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "unit" "mmx")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "DI")])
(define_insn "sse2_cvttpd2pi"
"cvttpd2pi\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "unit" "mmx")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_cvtsi2sd"
"cvtsi2sd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,direct")])
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")])
(define_insn "sse2_cvtsi2sdq"
[(set (match_operand:V2DF 0 "register_operand" "=x,x")
"cvtsi2sdq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseicvt")
(set_attr "mode" "DF")
- (set_attr "athlon_decode" "double,direct")])
+ (set_attr "athlon_decode" "double,direct")
+ (set_attr "amdfam10_decode" "vector,double")])
(define_insn "sse2_cvtsd2si"
[(set (match_operand:SI 0 "register_operand" "=r,r")
"cvtsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
(define_insn "sse2_cvtsd2si_2"
"cvtsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "SI")])
(define_insn "sse2_cvtsd2siq"
"cvtsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
(define_insn "sse2_cvtsd2siq_2"
"cvtsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
(set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "DI")])
(define_insn "sse2_cvttsd2si"
"TARGET_SSE2"
"cvttsd2si\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "SI")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "sse2_cvttsd2siq"
[(set (match_operand:DI 0 "register_operand" "=r,r")
"TARGET_SSE2 && TARGET_64BIT"
"cvttsd2siq\t{%1, %0|%0, %1}"
[(set_attr "type" "sseicvt")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "DI")
- (set_attr "athlon_decode" "double,vector")])
+ (set_attr "athlon_decode" "double,vector")
+ (set_attr "amdfam10_decode" "double,double")])
(define_insn "sse2_cvtdq2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
"TARGET_SSE2"
"cvtpd2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "TI")])
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")
+ (set_attr "amdfam10_decode" "double")])
(define_expand "sse2_cvttpd2dq"
[(set (match_operand:V4SI 0 "register_operand" "")
"TARGET_SSE2"
"cvttpd2dq\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "TI")])
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")
+ (set_attr "amdfam10_decode" "double")])
(define_insn "sse2_cvtsd2ss"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
"cvtsd2ss\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
(set_attr "athlon_decode" "vector,double")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "SF")])
(define_insn "sse2_cvtss2sd"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_merge:V2DF
(float_extend:V2DF
(vec_select:V2SF
- (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
(parallel [(const_int 0) (const_int 1)])))
- (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 1 "register_operand" "0,0")
(const_int 1)))]
"TARGET_SSE2"
"cvtss2sd\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecvt")
+ (set_attr "amdfam10_decode" "vector,double")
(set_attr "mode" "DF")])
(define_expand "sse2_cvtpd2ps"
"TARGET_SSE2"
"cvtpd2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V4SF")])
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "V4SF")
+ (set_attr "amdfam10_decode" "double")])
(define_insn "sse2_cvtps2pd"
[(set (match_operand:V2DF 0 "register_operand" "=x")
"TARGET_SSE2"
"cvtps2pd\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V2DF")])
+ (set_attr "mode" "V2DF")
+ (set_attr "amdfam10_decode" "direct")])
+
+(define_expand "vec_unpacks_hi_v4sf"
+ [(set (match_dup 2)
+ (vec_select:V4SF
+ (vec_concat:V8SF
+ (match_dup 2)
+ (match_operand:V4SF 1 "nonimmediate_operand" ""))
+ (parallel [(const_int 6)
+ (const_int 7)
+ (const_int 2)
+ (const_int 3)])))
+ (set (match_operand:V2DF 0 "register_operand" "")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_reg_rtx (V4SFmode);
+})
+
+(define_expand "vec_unpacks_lo_v4sf"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (float_extend:V2DF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2")
+
+(define_expand "vec_unpacks_float_hi_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacks_hi_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacks_lo_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_hi_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacku_hi_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacku_float_lo_v8hi"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_vec_unpacku_lo_v8hi (tmp, operands[1]));
+ emit_insn (gen_sse2_cvtdq2ps (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "vec_unpacks_float_hi_v4si"
+ [(set (match_dup 2)
+ (vec_select:V4SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 2)
+ (const_int 3)
+ (const_int 2)
+ (const_int 3)])))
+ (set (match_operand:V2DF 0 "register_operand" "")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_dup 2)
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2"
+{
+ operands[2] = gen_reg_rtx (V4SImode);
+})
+
+(define_expand "vec_unpacks_float_lo_v4si"
+ [(set (match_operand:V2DF 0 "register_operand" "")
+ (float:V2DF
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))))]
+ "TARGET_SSE2")
+
+(define_expand "vec_pack_trunc_v2df"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SFmode);
+ r2 = gen_reg_rtx (V4SFmode);
+
+ emit_insn (gen_sse2_cvtpd2ps (r1, operands[1]));
+ emit_insn (gen_sse2_cvtpd2ps (r2, operands[2]));
+ emit_insn (gen_sse_movlhps (operands[0], r1, r2));
+ DONE;
+})
+
+(define_expand "vec_pack_sfix_trunc_v2df"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx r1, r2;
+
+ r1 = gen_reg_rtx (V4SImode);
+ r2 = gen_reg_rtx (V4SImode);
+
+ emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
+ emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
+ emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
+ gen_lowpart (V2DImode, r1),
+ gen_lowpart (V2DImode, r2)));
+ DONE;
+})
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
"TARGET_SSE2"
"unpcklpd\t%0, %0"
[(set_attr "type" "sselog1")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "V2DF")])
(define_insn "*vec_concatv2df_sse3"
[(set (match_operand:V2DF 0 "register_operand" "=x")
(set_attr "mode" "DF")])
(define_insn "*vec_concatv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=Y,Y,Y,x,x")
+ [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
(vec_concat:V2DF
- (match_operand:DF 1 "nonimmediate_operand" " 0,0,m,0,0")
- (match_operand:DF 2 "vector_move_operand" " Y,m,C,x,m")))]
+ (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
+ (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
"TARGET_SSE"
"@
unpcklpd\t{%2, %0|%0, %2}
"TARGET_SSE2 && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
"padd<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_ssadd<mode>3"
"TARGET_SSE2 && ix86_binary_operator_ok (SS_PLUS, <MODE>mode, operands)"
"padds<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_usadd<mode>3"
"TARGET_SSE2 && ix86_binary_operator_ok (US_PLUS, <MODE>mode, operands)"
"paddus<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "sub<mode>3"
"TARGET_SSE2"
"psub<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_sssub<mode>3"
"TARGET_SSE2"
"psubs<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_ussub<mode>3"
"TARGET_SSE2"
"psubus<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "mulv16qi3"
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"pmullw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "smulv8hi3_highpart"
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"pmulhw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "umulv8hi3_highpart"
"TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"pmulhuw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_umulv2siv2di3"
(vec_select:V2SI
(match_operand:V4SI 2 "nonimmediate_operand" "xm")
(parallel [(const_int 0) (const_int 2)])))))]
- "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
"pmuludq\t{%2, %0|%0, %2}"
[(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_mulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmuldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_pmaddwd"
(const_int 3)
(const_int 5)
(const_int 7)]))))))]
- "TARGET_SSE2"
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"pmaddwd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "mulv4si3"
(match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2"
{
- rtx t1, t2, t3, t4, t5, t6, thirtytwo;
- rtx op0, op1, op2;
-
- op0 = operands[0];
- op1 = operands[1];
- op2 = operands[2];
- t1 = gen_reg_rtx (V4SImode);
- t2 = gen_reg_rtx (V4SImode);
- t3 = gen_reg_rtx (V4SImode);
- t4 = gen_reg_rtx (V4SImode);
- t5 = gen_reg_rtx (V4SImode);
- t6 = gen_reg_rtx (V4SImode);
- thirtytwo = GEN_INT (32);
-
- /* Multiply elements 2 and 0. */
- emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1), op1, op2));
-
- /* Shift both input vectors down one element, so that elements 3 and 1
- are now in the slots for elements 2 and 0. For K8, at least, this is
- faster than using a shuffle. */
- emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
- gen_lowpart (TImode, op1), thirtytwo));
- emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
- gen_lowpart (TImode, op2), thirtytwo));
-
- /* Multiply elements 3 and 1. */
- emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), t2, t3));
-
- /* Move the results in element 2 down to element 1; we don't care what
- goes in elements 2 and 3. */
- emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
- const0_rtx, const0_rtx));
- emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
+ else
+ {
+ rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+ rtx op0, op1, op2;
+
+ op0 = operands[0];
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ t4 = gen_reg_rtx (V4SImode);
+ t5 = gen_reg_rtx (V4SImode);
+ t6 = gen_reg_rtx (V4SImode);
+ thirtytwo = GEN_INT (32);
+
+ /* Multiply elements 2 and 0. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
+ op1, op2));
+
+ /* Shift both input vectors down one element, so that elements 3
+ and 1 are now in the slots for elements 2 and 0. For K8, at
+ least, this is faster than using a shuffle. */
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+ gen_lowpart (TImode, op1),
+ thirtytwo));
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+ gen_lowpart (TImode, op2),
+ thirtytwo));
+ /* Multiply elements 3 and 1. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
+ t2, t3));
+
+ /* Move the results in element 2 down to element 1; we don't care
+ what goes in elements 2 and 3. */
+ emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
const0_rtx, const0_rtx));
+ emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
- /* Merge the parts back together. */
- emit_insn (gen_sse2_punpckldq (op0, t5, t6));
- DONE;
+ /* Merge the parts back together. */
+ emit_insn (gen_sse2_punpckldq (op0, t5, t6));
+ DONE;
+ }
})
+(define_insn "*sse4_1_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmulld\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
(define_expand "mulv2di3"
[(set (match_operand:V2DI 0 "register_operand" "")
(mult:V2DI (match_operand:V2DI 1 "register_operand" "")
(define_expand "sdot_prodv8hi"
[(match_operand:V4SI 0 "register_operand" "")
- (match_operand:V8HI 1 "nonimmediate_operand" "")
- (match_operand:V8HI 2 "nonimmediate_operand" "")
+ (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "register_operand" "")
(match_operand:V4SI 3 "register_operand" "")]
"TARGET_SSE2"
{
[(set (match_operand:SSEMODE24 0 "register_operand" "=x")
(ashiftrt:SSEMODE24
(match_operand:SSEMODE24 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xn")))]
"TARGET_SSE2"
"psra<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "lshr<mode>3"
[(set (match_operand:SSEMODE248 0 "register_operand" "=x")
(lshiftrt:SSEMODE248
(match_operand:SSEMODE248 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xn")))]
"TARGET_SSE2"
"psrl<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "ashl<mode>3"
[(set (match_operand:SSEMODE248 0 "register_operand" "=x")
(ashift:SSEMODE248
(match_operand:SSEMODE248 1 "register_operand" "0")
- (match_operand:SI 2 "nonmemory_operand" "xi")))]
+ (match_operand:TI 2 "nonmemory_operand" "xn")))]
"TARGET_SSE2"
"psll<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_ashlti3"
return "pslldq\t{%2, %0|%0, %2}";
}
[(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "vec_shl_<mode>"
return "psrldq\t{%2, %0|%0, %2}";
}
[(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "vec_shr_<mode>"
"TARGET_SSE2 && ix86_binary_operator_ok (UMAX, V16QImode, operands)"
"pmaxub\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "smaxv8hi3"
"TARGET_SSE2 && ix86_binary_operator_ok (SMAX, V8HImode, operands)"
"pmaxsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "umaxv8hi3"
- [(set (match_operand:V8HI 0 "register_operand" "=x")
- (us_minus:V8HI (match_operand:V8HI 1 "register_operand" "0")
- (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
- (set (match_dup 3)
- (plus:V8HI (match_dup 0) (match_dup 2)))]
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (umax:V8HI (match_operand:V8HI 1 "register_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
{
- operands[3] = operands[0];
- if (rtx_equal_p (operands[0], operands[2]))
- operands[0] = gen_reg_rtx (V8HImode);
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
+ else
+ {
+ rtx op0 = operands[0], op2 = operands[2], op3 = op0;
+ if (rtx_equal_p (op3, op2))
+ op3 = gen_reg_rtx (V8HImode);
+ emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
+ emit_insn (gen_addv8hi3 (op0, op3, op2));
+ DONE;
+ }
})
(define_expand "smax<mode>3"
(match_operand:SSEMODE14 2 "register_operand" "")))]
"TARGET_SSE2"
{
- rtx xops[6];
- bool ok;
-
- xops[0] = operands[0];
- xops[1] = operands[1];
- xops[2] = operands[2];
- xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
- xops[4] = operands[1];
- xops[5] = operands[2];
- ok = ix86_expand_int_vcond (xops);
- gcc_assert (ok);
- DONE;
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (SMAX, <MODE>mode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[1];
+ xops[2] = operands[2];
+ xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
})
+(define_insn "*sse4_1_smax<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
+ (smax:SSEMODE14
+ (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (SMAX, <MODE>mode, operands)"
+ "pmaxs<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
(define_expand "umaxv4si3"
[(set (match_operand:V4SI 0 "register_operand" "")
(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")))]
"TARGET_SSE2"
{
- rtx xops[6];
- bool ok;
-
- xops[0] = operands[0];
- xops[1] = operands[1];
- xops[2] = operands[2];
- xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
- xops[4] = operands[1];
- xops[5] = operands[2];
- ok = ix86_expand_int_vcond (xops);
- gcc_assert (ok);
- DONE;
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[1];
+ xops[2] = operands[2];
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
})
+(define_insn "*sse4_1_umax<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (umax:SSEMODE24
+ (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (UMAX, <MODE>mode, operands)"
+ "pmaxu<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
(define_expand "uminv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(umin:V16QI (match_operand:V16QI 1 "nonimmediate_operand" "")
(match_operand:V16QI 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
- "ix86_fixup_binary_operands_no_copy (UMAX, V16QImode, operands);")
+ "ix86_fixup_binary_operands_no_copy (UMIN, V16QImode, operands);")
(define_insn "*uminv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "=x")
"TARGET_SSE2 && ix86_binary_operator_ok (UMIN, V16QImode, operands)"
"pminub\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "sminv8hi3"
"TARGET_SSE2 && ix86_binary_operator_ok (SMIN, V8HImode, operands)"
"pminsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "smin<mode>3"
(match_operand:SSEMODE14 2 "register_operand" "")))]
"TARGET_SSE2"
{
- rtx xops[6];
- bool ok;
-
- xops[0] = operands[0];
- xops[1] = operands[2];
- xops[2] = operands[1];
- xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
- xops[4] = operands[1];
- xops[5] = operands[2];
- ok = ix86_expand_int_vcond (xops);
- gcc_assert (ok);
- DONE;
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (SMIN, <MODE>mode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
})
+(define_insn "*sse4_1_smin<mode>3"
+ [(set (match_operand:SSEMODE14 0 "register_operand" "=x")
+ (smin:SSEMODE14
+ (match_operand:SSEMODE14 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE14 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (SMIN, <MODE>mode, operands)"
+ "pmins<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
(define_expand "umin<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "")
(umin:SSEMODE24 (match_operand:SSEMODE24 1 "register_operand" "")
(match_operand:SSEMODE24 2 "register_operand" "")))]
"TARGET_SSE2"
{
- rtx xops[6];
- bool ok;
-
- xops[0] = operands[0];
- xops[1] = operands[2];
- xops[2] = operands[1];
- xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
- xops[4] = operands[1];
- xops[5] = operands[2];
- ok = ix86_expand_int_vcond (xops);
- gcc_assert (ok);
- DONE;
+ if (TARGET_SSE4_1)
+ ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
+ else
+ {
+ rtx xops[6];
+ bool ok;
+
+ xops[0] = operands[0];
+ xops[1] = operands[2];
+ xops[2] = operands[1];
+ xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
+ xops[4] = operands[1];
+ xops[5] = operands[2];
+ ok = ix86_expand_int_vcond (xops);
+ gcc_assert (ok);
+ DONE;
+ }
})
+(define_insn "*sse4_1_umin<mode>3"
+ [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
+ (umin:SSEMODE24
+ (match_operand:SSEMODE24 1 "nonimmediate_operand" "%0")
+ (match_operand:SSEMODE24 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (UMIN, <MODE>mode, operands)"
+ "pminu<ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel integral comparisons
"TARGET_SSE2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
"pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_eqv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (eq:V2DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
+ "pcmpeqq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssecmp")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_gt<mode>3"
"TARGET_SSE2"
"pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "ssecmp")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "vcond<mode>"
"TARGET_SSE2 && ix86_binary_operator_ok (AND, <MODE>mode, operands)"
"pand\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_nand<mode>3"
"TARGET_SSE2"
"pandn\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "ior<mode>3"
"TARGET_SSE2 && ix86_binary_operator_ok (IOR, <MODE>mode, operands)"
"por\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "xor<mode>3"
"TARGET_SSE2 && ix86_binary_operator_ok (XOR, <MODE>mode, operands)"
"pxor\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; h3 = aeimquy2bfjnrvz3
;; l3 = cgkosw04dhlptx15
;; result = bdfhjlnprtvxz135
-(define_expand "vec_pack_mod_v8hi"
+(define_expand "vec_pack_trunc_v8hi"
[(match_operand:V16QI 0 "register_operand" "")
(match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "register_operand" "")]
"TARGET_SSE2"
{
rtx op1, op2, h1, l1, h2, l2, h3, l3;
-
+
op1 = gen_lowpart (V16QImode, operands[1]);
op2 = gen_lowpart (V16QImode, operands[2]);
h1 = gen_reg_rtx (V16QImode);
l2 = gen_reg_rtx (V16QImode);
h3 = gen_reg_rtx (V16QImode);
l3 = gen_reg_rtx (V16QImode);
-
+
emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
DONE;
})
-
+
;; Reduce:
;; op1 = abcdefgh
;; op2 = ijklmnop
;; h2 = aeimbfjn
;; l2 = cgkodhlp
;; result = bdfhjlnp
-(define_expand "vec_pack_mod_v4si"
+(define_expand "vec_pack_trunc_v4si"
[(match_operand:V8HI 0 "register_operand" "")
(match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "register_operand" "")]
"TARGET_SSE2"
{
rtx op1, op2, h1, l1, h2, l2;
-
+
op1 = gen_lowpart (V8HImode, operands[1]);
op2 = gen_lowpart (V8HImode, operands[2]);
h1 = gen_reg_rtx (V8HImode);
l1 = gen_reg_rtx (V8HImode);
h2 = gen_reg_rtx (V8HImode);
l2 = gen_reg_rtx (V8HImode);
-
+
emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
DONE;
})
-
+
;; Reduce:
;; op1 = abcd
;; op2 = efgh
;; h1 = aebf
;; l1 = cgdh
;; result = bdfh
-(define_expand "vec_pack_mod_v2di"
+(define_expand "vec_pack_trunc_v2di"
[(match_operand:V4SI 0 "register_operand" "")
(match_operand:V2DI 1 "register_operand" "")
(match_operand:V2DI 2 "register_operand" "")]
"TARGET_SSE2"
{
rtx op1, op2, h1, l1;
-
+
op1 = gen_lowpart (V4SImode, operands[1]);
op2 = gen_lowpart (V4SImode, operands[2]);
h1 = gen_reg_rtx (V4SImode);
l1 = gen_reg_rtx (V4SImode);
-
+
emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
"TARGET_SSE2"
"packsswb\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_packssdw"
"TARGET_SSE2"
"packssdw\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_packuswb"
"TARGET_SSE2"
"packuswb\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckhbw"
"TARGET_SSE2"
"punpckhbw\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_punpcklbw"
"TARGET_SSE2"
"punpcklbw\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckhwd"
"TARGET_SSE2"
"punpckhwd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_punpcklwd"
"TARGET_SSE2"
"punpcklwd\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckhdq"
"TARGET_SSE2"
"punpckhdq\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckldq"
"TARGET_SSE2"
"punpckldq\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckhqdq"
"TARGET_SSE2"
"punpckhqdq\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_punpcklqdq"
"TARGET_SSE2"
"punpcklqdq\t{%2, %0|%0, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
-(define_expand "sse2_pinsrw"
- [(set (match_operand:V8HI 0 "register_operand" "")
- (vec_merge:V8HI
- (vec_duplicate:V8HI
- (match_operand:SI 2 "nonimmediate_operand" ""))
- (match_operand:V8HI 1 "register_operand" "")
- (match_operand:SI 3 "const_0_to_7_operand" "")))]
- "TARGET_SSE2"
+(define_insn "*sse4_1_pinsrb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (vec_merge:V16QI
+ (vec_duplicate:V16QI
+ (match_operand:QI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_32768_operand" "n")))]
+ "TARGET_SSE4_1"
{
- operands[2] = gen_lowpart (HImode, operands[2]);
- operands[3] = GEN_INT ((1 << INTVAL (operands[3])));
-})
-
-(define_insn "*sse2_pinsrw"
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pinsrw"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_merge:V8HI
(vec_duplicate:V8HI
return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
}
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+;; It must come before sse2_loadld since it is preferred.
+(define_insn "*sse4_1_pinsrd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (vec_merge:V4SI
+ (vec_duplicate:V4SI
+ (match_operand:SI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V4SI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
-(define_insn "sse2_pextrw"
+(define_insn "*sse4_1_pinsrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (vec_merge:V2DI
+ (vec_duplicate:V2DI
+ (match_operand:DI 2 "nonimmediate_operand" "rm"))
+ (match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_2_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+ return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrb"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (zero_extend:SI
+ (vec_select:QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
+ "TARGET_SSE4_1"
+ "pextrb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrb_memory"
+ [(set (match_operand:QI 0 "memory_operand" "=m")
+ (vec_select:QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "pextrb\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse2_pextrw"
[(set (match_operand:SI 0 "register_operand" "=r")
(zero_extend:SI
(vec_select:HI
"TARGET_SSE2"
"pextrw\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrw_memory"
+ [(set (match_operand:HI 0 "memory_operand" "=m")
+ (vec_select:HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "pextrw\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_pextrd"
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
+ (vec_select:SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "pextrd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+;; It must come before *vec_extractv2di_1_sse since it is preferred.
+(define_insn "*sse4_1_pextrq"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
+ "TARGET_SSE4_1 && TARGET_64BIT"
+ "pextrq\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_expand "sse2_pshufd"
return "pshufd\t{%2, %1, %0|%0, %1, %2}";
}
[(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_expand "sse2_pshuflw"
return "pshuflw\t{%2, %1, %0|%0, %1, %2}";
}
[(set_attr "type" "sselog")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
(define_expand "sse2_pshufhw"
return "pshufhw\t{%2, %1, %0|%0, %1, %2}";
}
[(set_attr "type" "sselog")
+ (set_attr "prefix_rep" "1")
(set_attr "mode" "TI")])
(define_expand "sse2_loadd"
"operands[2] = CONST0_RTX (V4SImode);")
(define_insn "sse2_loadld"
- [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=Y2,Yi,x,x")
(vec_merge:V4SI
(vec_duplicate:V4SI
- (match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
- (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
+ (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x"))
+ (match_operand:V4SI 1 "reg_or_0_operand" "C ,C ,C,0")
(const_int 1)))]
"TARGET_SSE"
"@
movd\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
movss\t{%2, %0|%0, %2}
movss\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemov")
- (set_attr "mode" "TI,V4SF,SF")])
+ (set_attr "mode" "TI,TI,V4SF,SF")])
-;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
-;; be taken into account, and movdi isn't fully populated even without.
(define_insn_and_split "sse2_stored"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=mx")
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=mx,r")
(vec_select:SI
- (match_operand:V4SI 1 "register_operand" "x")
+ (match_operand:V4SI 1 "register_operand" "x,Yi")
(parallel [(const_int 0)])))]
"TARGET_SSE"
"#"
- "&& reload_completed"
+ "&& reload_completed
+ && (TARGET_INTER_UNIT_MOVES
+ || MEM_P (operands [0])
+ || !GENERAL_REGNO_P (true_regnum (operands [0])))"
[(set (match_dup 0) (match_dup 1))]
{
operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));
"TARGET_SSE"
"")
-;; ??? The hardware supports more, but TARGET_INTER_UNIT_MOVES must
-;; be taken into account, and movdi isn't fully populated even without.
+(define_insn "*sse2_storeq_rex64"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=mx,r")
+ (vec_select:DI
+ (match_operand:V2DI 1 "register_operand" "x,Yi")
+ (parallel [(const_int 0)])))]
+ "TARGET_64BIT && TARGET_SSE"
+ "#")
+
(define_insn "*sse2_storeq"
[(set (match_operand:DI 0 "nonimmediate_operand" "=mx")
(vec_select:DI
(vec_select:DI
(match_operand:V2DI 1 "register_operand" "")
(parallel [(const_int 0)])))]
- "TARGET_SSE && reload_completed"
+ "TARGET_SSE
+ && reload_completed
+ && (TARGET_INTER_UNIT_MOVES
+ || MEM_P (operands [0])
+ || !GENERAL_REGNO_P (true_regnum (operands [0])))"
[(set (match_dup 0) (match_dup 1))]
{
operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"@
movhps\t{%1, %0|%0, %1}
- psrldq\t{$4, %0|%0, 4}
+ psrldq\t{$8, %0|%0, 8}
movq\t{%H1, %0|%0, %H1}"
[(set_attr "type" "ssemov,sseishft,ssemov")
(set_attr "mode" "V2SF,TI,TI")])
(set_attr "mode" "V2SF,V4SF,V2SF")])
(define_insn "*vec_dupv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=Y,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
(vec_duplicate:V4SI
- (match_operand:SI 1 "register_operand" " Y,0")))]
+ (match_operand:SI 1 "register_operand" " Y2,0")))]
"TARGET_SSE"
"@
pshufd\t{$0, %1, %0|%0, %1, 0}
(set_attr "mode" "TI,V4SF")])
(define_insn "*vec_dupv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=Y,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
(vec_duplicate:V2DI
- (match_operand:DI 1 "register_operand" " 0,0")))]
+ (match_operand:DI 1 "register_operand" " 0 ,0")))]
"TARGET_SSE"
"@
punpcklqdq\t%0, %0
;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
;; alternatives pretty much forces the MMX alternative to be chosen.
(define_insn "*sse2_concatv2si"
- [(set (match_operand:V2SI 0 "register_operand" "=Y, Y,*y,*y")
+ [(set (match_operand:V2SI 0 "register_operand" "=Y2, Y2,*y,*y")
(vec_concat:V2SI
- (match_operand:SI 1 "nonimmediate_operand" " 0,rm, 0,rm")
- (match_operand:SI 2 "reg_or_0_operand" " Y, C,*y, C")))]
+ (match_operand:SI 1 "nonimmediate_operand" " 0 ,rm , 0,rm")
+ (match_operand:SI 2 "reg_or_0_operand" " Y2,C ,*y, C")))]
"TARGET_SSE2"
"@
punpckldq\t{%2, %0|%0, %2}
(set_attr "mode" "V4SF,V4SF,DI,DI")])
(define_insn "*vec_concatv4si_1"
- [(set (match_operand:V4SI 0 "register_operand" "=Y,x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=Y2,x,x")
(vec_concat:V4SI
- (match_operand:V2SI 1 "register_operand" " 0,0,0")
- (match_operand:V2SI 2 "nonimmediate_operand" " Y,x,m")))]
+ (match_operand:V2SI 1 "register_operand" " 0 ,0,0")
+ (match_operand:V2SI 2 "nonimmediate_operand" " Y2,x,m")))]
"TARGET_SSE"
"@
punpcklqdq\t{%2, %0|%0, %2}
[(set_attr "type" "sselog,ssemov,ssemov")
(set_attr "mode" "TI,V4SF,V2SF")])
-(define_insn "*vec_concatv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=Y,?Y,Y,x,x,x")
+(define_insn "vec_concatv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
(vec_concat:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " m,*y,0,0,0,m")
- (match_operand:DI 2 "vector_move_operand" " C, C,Y,x,m,0")))]
+ (match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
+ (match_operand:DI 2 "vector_move_operand" " C, C,Y2,x,m,0")))]
"TARGET_SSE"
"@
movq\t{%1, %0|%0, %1}
"TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V16QImode, operands)"
"pavgb\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse2_uavgv8hi3"
"TARGET_SSE2 && ix86_binary_operator_ok (PLUS, V8HImode, operands)"
"pavgw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
;; The correct representation for this is absolutely enormous, and
"TARGET_SSE2"
"psadbw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse_movmskps"
"TARGET_SSE2"
"pmovmskb\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
- (set_attr "mode" "V2DF")])
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "SI")])
(define_expand "sse2_maskmovdqu"
[(set (match_operand:V16QI 0 "memory_operand" "")
;; @@@ check ordering of operands in intel/nonintel syntax
"maskmovdqu\t{%2, %1|%1, %2}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "*sse2_maskmovdqu_rex64"
;; @@@ check ordering of operands in intel/nonintel syntax
"maskmovdqu\t{%2, %1|%1, %2}"
[(set_attr "type" "ssecvt")
+ (set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "sse_ldmxcsr"
"TARGET_SSSE3"
"phaddw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_phaddwv4hi3"
"TARGET_SSSE3"
"phaddw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_phadddv4si3"
"TARGET_SSSE3"
"phaddd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_phadddv2si3"
"TARGET_SSSE3"
"phaddd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_phaddswv8hi3"
"TARGET_SSSE3"
"phaddsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_phaddswv4hi3"
"TARGET_SSSE3"
"phaddsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_phsubwv8hi3"
"TARGET_SSSE3"
"phsubw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_phsubwv4hi3"
"TARGET_SSSE3"
"phsubw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_phsubdv4si3"
"TARGET_SSSE3"
"phsubd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_phsubdv2si3"
"TARGET_SSSE3"
"phsubd\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_phsubswv8hi3"
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_phsubswv4hi3"
"TARGET_SSSE3"
"phsubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_pmaddubswv8hi3"
"TARGET_SSSE3"
"pmaddubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_pmaddubswv4hi3"
"TARGET_SSSE3"
"pmaddubsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseiadd")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_pmulhrswv8hi3"
"TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
"pmulhrsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_pmulhrswv4hi3"
"TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
"pmulhrsw\t{%2, %0|%0, %2}"
[(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_pshufbv16qi3"
"TARGET_SSSE3"
"pshufb\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_pshufbv8qi3"
"TARGET_SSSE3"
"pshufb\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_psign<mode>3"
"TARGET_SSSE3"
"psign<ssevecsize>\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_psign<mode>3"
"TARGET_SSSE3"
"psign<mmxvecsize>\t{%2, %0|%0, %2}";
[(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "ssse3_palignrti"
return "palignr\t{%3, %2, %0|%0, %2, %3}";
}
[(set_attr "type" "sseishft")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "ssse3_palignrdi"
return "palignr\t{%3, %2, %0|%0, %2, %3}";
}
[(set_attr "type" "sseishft")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
(define_insn "abs<mode>2"
"TARGET_SSSE3"
"pabs<ssevecsize>\t{%1, %0|%0, %1}";
[(set_attr "type" "sselog1")
+ (set_attr "prefix_data16" "1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "TI")])
(define_insn "abs<mode>2"
"TARGET_SSSE3"
"pabs<mmxvecsize>\t{%1, %0|%0, %1}";
[(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
(set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; AMD SSE4A instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse4a_vmmovntv2df"
+ [(set (match_operand:DF 0 "memory_operand" "=m")
+ (unspec:DF [(vec_select:DF
+ (match_operand:V2DF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movntsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse4a_movntdf"
+ [(set (match_operand:DF 0 "memory_operand" "=m")
+ (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movntsd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "DF")])
+
+(define_insn "sse4a_vmmovntv4sf"
+ [(set (match_operand:SF 0 "memory_operand" "=m")
+ (unspec:SF [(vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(const_int 0)]))]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movntss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse4a_movntsf"
+ [(set (match_operand:SF 0 "memory_operand" "=m")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "x")]
+ UNSPEC_MOVNT))]
+ "TARGET_SSE4A"
+ "movntss\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "sse4a_extrqi"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand 2 "const_int_operand" "")
+ (match_operand 3 "const_int_operand" "")]
+ UNSPEC_EXTRQI))]
+ "TARGET_SSE4A"
+ "extrq\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_extrq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "register_operand" "x")]
+ UNSPEC_EXTRQ))]
+ "TARGET_SSE4A"
+ "extrq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertqi"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")
+ (match_operand 3 "const_int_operand" "")
+ (match_operand 4 "const_int_operand" "")]
+ UNSPEC_INSERTQI))]
+ "TARGET_SSE4A"
+ "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+ [(set_attr "type" "sseins")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertq"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+ (match_operand:V2DI 2 "register_operand" "x")]
+ UNSPEC_INSERTQ))]
+ "TARGET_SSE4A"
+ "insertq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseins")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "TI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Intel SSE4.1 instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse4_1_blendpd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_3_operand" "n")))]
+ "TARGET_SSE4_1"
+ "blendpd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_blendps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")))]
+ "TARGET_SSE4_1"
+ "blendps\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_blendvpd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (reg:V2DF 21)]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "blendvpd\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_blendvps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (reg:V4SF 21)]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "blendvps\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_dppd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "%0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
+ "TARGET_SSE4_1"
+ "dppd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_dpps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_DP))]
+ "TARGET_SSE4_1"
+ "dpps\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_movntdqa"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
+ UNSPEC_MOVNTDQA))]
+ "TARGET_SSE4_1"
+ "movntdqa\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_mpsadbw"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_MPSADBW))]
+ "TARGET_SSE4_1"
+ "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_packusdw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_concat:V8HI
+ (us_truncate:V4HI
+ (match_operand:V4SI 1 "register_operand" "0"))
+ (us_truncate:V4HI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
+ "TARGET_SSE4_1"
+ "packusdw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_pblendvb"
+ [(set (match_operand:V16QI 0 "register_operand" "=x")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
+ (match_operand:V16QI 2 "nonimmediate_operand" "xm")
+ (reg:V16QI 21)]
+ UNSPEC_BLENDV))]
+ "TARGET_SSE4_1"
+ "pblendvb\t{%%xmm0, %2, %0|%0, %2, %%xmm0}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_pblendw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (vec_merge:V8HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (match_operand:V8HI 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")))]
+ "TARGET_SSE4_1"
+ "pblendw\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_phminposuw"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PHMINPOSUW))]
+ "TARGET_SSE4_1"
+ "phminposuw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (sign_extend:V8HI
+ (vec_select:V8QI
+ (vec_duplicate:V16QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4QI
+ (vec_duplicate:V16QI
+ (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2QI
+ (vec_duplicate:V16QI
+ (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (vec_duplicate:V8HI
+ (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2HI
+ (vec_duplicate:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (vec_duplicate:V4SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovsxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv8qiv8hi2"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (zero_extend:V8HI
+ (vec_select:V8QI
+ (vec_duplicate:V16QI
+ (match_operand:V8QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)
+ (const_int 4)
+ (const_int 5)
+ (const_int 6)
+ (const_int 7)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbw\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv4qiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4QI
+ (vec_duplicate:V16QI
+ (match_operand:V4QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (match_operand:V16QI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2qiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2QI
+ (vec_duplicate:V16QI
+ (match_operand:V2QI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxbq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv4hiv4si2"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (zero_extend:V4SI
+ (vec_select:V4HI
+ (vec_duplicate:V8HI
+ (match_operand:V4HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)
+ (const_int 2)
+ (const_int 3)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxwd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (match_operand:V8HI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2hiv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2HI
+ (vec_duplicate:V8HI
+ (match_operand:V2HI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxwq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_zero_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "register_operand" "x")
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "*sse4_1_zero_extendv2siv2di2"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (vec_duplicate:V4SI
+ (match_operand:V2SI 1 "nonimmediate_operand" "xm"))
+ (parallel [(const_int 0)
+ (const_int 1)]))))]
+ "TARGET_SSE4_1"
+ "pmovzxdq\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
+;; But it is not a really compare instruction.
+(define_insn "sse4_1_ptest"
+ [(set (reg:CC FLAGS_REG)
+ (unspec:CC [(match_operand:V2DI 0 "register_operand" "x")
+ (match_operand:V2DI 1 "nonimmediate_operand" "xm")]
+ UNSPEC_PTEST))]
+ "TARGET_SSE4_1"
+ "ptest\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssecomi")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn "sse4_1_roundpd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUNDP))]
+ "TARGET_SSE4_1"
+ "roundpd\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_roundps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm")
+ (match_operand:SI 2 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUNDP))]
+ "TARGET_SSE4_1"
+ "roundps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_roundsd"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_merge:V2DF
+ (unspec:V2DF [(match_operand:V2DF 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUNDS)
+ (match_operand:V2DF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE4_1"
+ "roundsd\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse4_1_roundss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_ROUNDS)
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE4_1"
+ "roundss\t{%3, %2, %0|%0, %2, %3}"
+ [(set_attr "type" "ssecvt")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])