;; GCC machine description for SSE instructions
-;; Copyright (C) 2005
+;; Copyright (C) 2005, 2006
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
(define_insn "*mov<mode>_internal"
[(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
- (match_operand:SSEMODEI 1 "vector_move_operand" "C ,xm,x"))]
+ (match_operand:SSEMODEI 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (which_alternative)
{
case 0:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
- else
- return "pxor\t%0, %0";
+ return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
if (get_attr_mode (insn) == MODE_V4SF)
}
[(set_attr "type" "sselog1,ssemov,ssemov")
(set (attr "mode")
- (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
- (const_string "V4SF")
-
- (eq_attr "alternative" "0,1")
- (if_then_else
- (ne (symbol_ref "optimize_size")
- (const_int 0))
- (const_string "V4SF")
- (const_string "TI"))
- (eq_attr "alternative" "2")
- (if_then_else
- (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
- (const_int 0))
- (ne (symbol_ref "optimize_size")
- (const_int 0)))
- (const_string "V4SF")
- (const_string "TI"))]
- (const_string "TI")))])
+ (if_then_else
+ (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+ (and (eq_attr "alternative" "2")
+ (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))))
+ (const_string "V4SF")
+ (const_string "TI")))])
(define_expand "movv4sf"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "")
(define_insn "*movv4sf_internal"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
- (match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
+ (match_operand:V4SF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
"TARGET_SSE"
- "@
- xorps\t%0, %0
- movaps\t{%1, %0|%0, %1}
- movaps\t{%1, %0|%0, %1}"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return standard_sse_constant_opcode (insn, operands[1]);
+ case 1:
+ case 2:
+ return "movaps\t{%1, %0|%0, %1}";
+ default:
+ abort();
+ }
+}
[(set_attr "type" "sselog1,ssemov,ssemov")
(set_attr "mode" "V4SF")])
(define_insn "*movv2df_internal"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
- (match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
+ (match_operand:V2DF 1 "nonimmediate_or_sse_const_operand" "C,xm,x"))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (which_alternative)
{
case 0:
- if (get_attr_mode (insn) == MODE_V4SF)
- return "xorps\t%0, %0";
- else
- return "xorpd\t%0, %0";
+ return standard_sse_constant_opcode (insn, operands[1]);
case 1:
case 2:
if (get_attr_mode (insn) == MODE_V4SF)
}
[(set_attr "type" "sselog1,ssemov,ssemov")
(set (attr "mode")
- (cond [(eq (symbol_ref "TARGET_SSE2") (const_int 0))
- (const_string "V4SF")
- (eq_attr "alternative" "0,1")
- (if_then_else
- (ne (symbol_ref "optimize_size")
- (const_int 0))
- (const_string "V4SF")
- (const_string "V2DF"))
- (eq_attr "alternative" "2")
- (if_then_else
- (ior (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
- (const_int 0))
- (ne (symbol_ref "optimize_size")
- (const_int 0)))
- (const_string "V4SF")
- (const_string "V2DF"))]
- (const_string "V2DF")))])
+ (if_then_else
+ (ior (ior (ne (symbol_ref "optimize_size") (const_int 0))
+ (eq (symbol_ref "TARGET_SSE2") (const_int 0)))
+ (and (eq_attr "alternative" "2")
+ (ne (symbol_ref "TARGET_SSE_TYPELESS_STORES")
+ (const_int 0))))
+ (const_string "V4SF")
+ (const_string "V2DF")))])
(define_split
[(set (match_operand:V2DF 0 "register_operand" "")
(define_insn "sse3_lddqu"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
- UNSPEC_LDQQU))]
+ UNSPEC_LDDQU))]
"TARGET_SSE3"
"lddqu\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
[(set_attr "type" "sse")
(set_attr "mode" "SF")])
+;; These versions of the min/max patterns implement exactly the operations
+;; min = (op1 < op2 ? op1 : op2)
+;; max = (!(op1 < op2) ? op1 : op2)
+;; Their operands are not commutative, and thus they may be used in the
+;; presence of -0.0 and NaN.
+
+(define_insn "*ieee_sminv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE"
+ "minps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*ieee_smaxv4sf3"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE"
+ "maxps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "*ieee_sminv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MIN))]
+ "TARGET_SSE2"
+ "minpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*ieee_smaxv2df3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (unspec:V2DF [(match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm")]
+ UNSPEC_IEEE_MAX))]
+ "TARGET_SSE2"
+ "maxpd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseadd")
+ (set_attr "mode" "V2DF")])
+
(define_insn "sse3_addsubv4sf3"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
[(set_attr "type" "sseadd")
(set_attr "mode" "V4SF")])
-(define_expand "reduc_plus_v4sf"
+(define_expand "reduc_splus_v4sf"
[(match_operand:V4SF 0 "register_operand" "")
(match_operand:V4SF 1 "register_operand" "")]
"TARGET_SSE"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
(vec_select:V4SF
(vec_concat:V8SF
- (match_operand:V4SF 1 "nonimmediate_operand" " 0,o,x")
- (match_operand:V4SF 2 "nonimmediate_operand" " x,0,0"))
+ (match_operand:V4SF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V4SF 2 "nonimmediate_operand" " x,o,x"))
(parallel [(const_int 6)
(const_int 7)
(const_int 2)
"TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"@
movhlps\t{%2, %0|%0, %2}
- movlps\t{%H1, %0|%0, %H1}
- movhps\t{%1, %0|%0, %1}"
+ movlps\t{%H2, %0|%0, %H2}
+ movhps\t{%2, %0|%0, %2}"
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF,V2SF,V2SF")])
[(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
(vec_concat:V2SF
(match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
- (match_operand:SF 2 "vector_move_operand" " x,C,*y, C")))]
+ (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
"TARGET_SSE"
"@
unpcklps\t{%2, %0|%0, %2}
"TARGET_SSE2"
"sqrtsd\t{%1, %0|%0, %1}"
[(set_attr "type" "sse")
- (set_attr "mode" "SF")])
+ (set_attr "mode" "DF")])
;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
;; isn't really correct, as those rtl operators aren't defined when
[(set_attr "type" "sseadd")
(set_attr "mode" "V2DF")])
+(define_expand "reduc_splus_v2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")]
+ "TARGET_SSE3"
+{
+ emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point comparisons
operands[0] = adjust_address (operands[0], DFmode, 8);
})
+;; Not sure these two are ever used, but it doesn't hurt to have
+;; them. -aoliva
+(define_insn "*vec_extractv2df_1_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_extractv2df_0_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
(define_insn "sse2_movsd"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
(vec_merge:V2DF
movlpd\t{%2, %0|%0, %2}
movlpd\t{%2, %0|%0, %2}
shufpd\t{$2, %2, %0|%0, %2, 2}
- movhps\t{%H1, %0|%0, %H1
- movhps\t{%1, %H0|%H0, %1"
+ movhps\t{%H1, %0|%0, %H1}
+ movhps\t{%1, %H0|%H0, %1}"
[(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
(set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
DONE;
})
+(define_expand "sdot_prodv8hi"
+ [(match_operand:V4SI 0 "register_operand" "")
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")
+ (match_operand:V4SI 3 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx t = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_pmaddwd (t, operands[1], operands[2]));
+ emit_insn (gen_addv4si3 (operands[0], operands[3], t));
+ DONE;
+})
+
+(define_expand "udot_prodv4si"
+ [(match_operand:V2DI 0 "register_operand" "")
+ (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")
+ (match_operand:V2DI 3 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx t1, t2, t3, t4;
+
+ t1 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
+ emit_insn (gen_addv2di3 (t1, t1, operands[3]));
+
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+ gen_lowpart (TImode, operands[1]),
+ GEN_INT (32)));
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+ gen_lowpart (TImode, operands[2]),
+ GEN_INT (32)));
+
+ t4 = gen_reg_rtx (V2DImode);
+ emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
+
+ emit_insn (gen_addv2di3 (operands[0], t1, t4));
+ DONE;
+})
+
(define_insn "ashr<mode>3"
[(set (match_operand:SSEMODE24 0 "register_operand" "=x")
(ashiftrt:SSEMODE24
})
(define_expand "vcondu<mode>"
- [(set (match_operand:SSEMODE12 0 "register_operand" "")
- (if_then_else:SSEMODE12
+ [(set (match_operand:SSEMODE124 0 "register_operand" "")
+ (if_then_else:SSEMODE124
(match_operator 3 ""
- [(match_operand:SSEMODE12 4 "nonimmediate_operand" "")
- (match_operand:SSEMODE12 5 "nonimmediate_operand" "")])
- (match_operand:SSEMODE12 1 "general_operand" "")
- (match_operand:SSEMODE12 2 "general_operand" "")))]
+ [(match_operand:SSEMODE124 4 "nonimmediate_operand" "")
+ (match_operand:SSEMODE124 5 "nonimmediate_operand" "")])
+ (match_operand:SSEMODE124 1 "general_operand" "")
+ (match_operand:SSEMODE124 2 "general_operand" "")))]
"TARGET_SSE2"
{
if (ix86_expand_int_vcond (operands))
(vec_merge:V4SI
(vec_duplicate:V4SI
(match_operand:SI 2 "nonimmediate_operand" "mr,m,x"))
- (match_operand:V4SI 1 "vector_move_operand" " C,C,0")
+ (match_operand:V4SI 1 "reg_or_0_operand" " C,C,0")
(const_int 1)))]
"TARGET_SSE"
"@
operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
})
+(define_insn "*vec_extractv2di_1_sse2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,0,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ psrldq\t{$4, %0|%0, 4}
+ movq\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov,sseishft,ssemov")
+ (set_attr "mode" "V2SF,TI,TI")])
+
+;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
+(define_insn "*vec_extractv2di_1_sse"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DI
+ (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
(define_insn "*vec_dupv4si"
[(set (match_operand:V4SI 0 "register_operand" "=Y,x")
(vec_duplicate:V4SI
(match_operand:SI 1 "register_operand" "c")]
UNSPECV_MWAIT)]
"TARGET_SSE3"
- "mwait\t%0, %1"
+;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
+;; Since 32bit register operands are implicitly zero extended to 64bit,
+;; we only need to set up 32bit registers.
+ "mwait"
[(set_attr "length" "3")])
(define_insn "sse3_monitor"
(match_operand:SI 1 "register_operand" "c")
(match_operand:SI 2 "register_operand" "d")]
UNSPECV_MONITOR)]
- "TARGET_SSE3"
+ "TARGET_SSE3 && !TARGET_64BIT"
"monitor\t%0, %1, %2"
[(set_attr "length" "3")])
+
+(define_insn "sse3_monitor64"
+ [(unspec_volatile [(match_operand:DI 0 "register_operand" "a")
+ (match_operand:SI 1 "register_operand" "c")
+ (match_operand:SI 2 "register_operand" "d")]
+ UNSPECV_MONITOR)]
+ "TARGET_SSE3 && TARGET_64BIT"
+;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
+;; RCX and RDX are used. Since 32bit register operands are implicitly
+;; zero extended to 64bit, we only need to set up 32bit registers.
+ "monitor"
+ [(set_attr "length" "3")])