(define_mode_attr sseintvecmode
[(V8SF "V8SI") (V4DF "V4DI")
(V4SF "V4SI") (V2DF "V2DI")
- (V4DF "V4DI") (V8SF "V8SI")
(V8SI "V8SI") (V4DI "V4DI")
(V4SI "V4SI") (V2DI "V2DI")
(V16HI "V16HI") (V8HI "V8HI")
(V32QI "V32QI") (V16QI "V16QI")])
+(define_mode_attr sseintvecmodelower
+ [(V8SF "v8si") (V4DF "v4di")
+ (V4SF "v4si") (V2DF "v2di")
+ (V8SI "v8si") (V4DI "v4di")
+ (V4SI "v4si") (V2DI "v2di")
+ (V16HI "v16hi") (V8HI "v8hi")
+ (V32QI "v32qi") (V16QI "v16qi")])
+
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr ssedoublevecmode
[(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
-(define_mode_iterator AVXMODE48P_DI
- [V2DI V2DF V4DI V4DF V4SF V4SI])
-(define_mode_attr AVXMODE48P_DI
- [(V2DI "V2DI") (V2DF "V2DI")
- (V4DI "V4DI") (V4DF "V4DI")
- (V4SI "V2DI") (V4SF "V2DI")
- (V8SI "V4DI") (V8SF "V4DI")])
-
(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
;; Mapping of immediate bits for blend instructions
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "avx_cvtdq2ps256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvtdq2ps\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "sse2_cvtdq2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+(define_insn "float<sseintvecmodelower><mode>2"
+ [(set (match_operand:VF1 0 "register_operand" "=x")
+ (float:VF1
+ (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"%vcvtdq2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<sseinsnmode>")])
-(define_expand "sse2_cvtudq2ps"
- [(set (match_dup 5)
- (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
- (set (match_dup 6)
- (lt:V4SF (match_dup 5) (match_dup 3)))
- (set (match_dup 7)
- (and:V4SF (match_dup 6) (match_dup 4)))
- (set (match_operand:V4SF 0 "register_operand" "")
- (plus:V4SF (match_dup 5) (match_dup 7)))]
- "TARGET_SSE2"
+(define_expand "floatuns<sseintvecmodelower><mode>2"
+ [(match_operand:VF1 0 "register_operand" "")
+ (match_operand:<sseintvecmode> 1 "register_operand" "")]
+ "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
{
- REAL_VALUE_TYPE TWO32r;
- rtx x;
- int i;
-
- real_ldexp (&TWO32r, &dconst1, 32);
- x = const_double_from_real_value (TWO32r, SFmode);
-
- operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
- operands[4] = force_reg (V4SFmode,
- ix86_build_const_vector (V4SFmode, 1, x));
-
- for (i = 5; i < 8; i++)
- operands[i] = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
+ DONE;
})
(define_insn "avx_cvtps2dq256"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx_cvttps2dq256"
+(define_insn "fix_truncv8sfv8si2"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "sse2_cvttps2dq"
+(define_insn "fix_truncv4sfv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
+ [(match_operand:<sseintvecmode> 0 "register_operand" "")
+ (match_operand:VF1 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp[3];
+ tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+ tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
+ emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
+ emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point conversion operations
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "avx_cvtdq2pd256"
+(define_insn "floatv4siv4df2"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
(set_attr "athlon_decode" "vector")
(set_attr "bdver1_decode" "double")])
-(define_insn "avx_cvttpd2dq256"
+(define_insn "fix_truncv4dfv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
for (i = 2; i < 5; i++)
tmp[i] = gen_reg_rtx (V4DFmode);
emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
- emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
+ emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
DONE;
})
+(define_mode_attr ssepackfltmode
+ [(V4DF "V8SI") (V2DF "V4SI")])
+
+(define_expand "vec_pack_ufix_trunc_<mode>"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "register_operand" "")
+ (match_operand:VF2 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp[7];
+ tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+ tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
+ tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
+ emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
+ if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
+ {
+ tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
+ ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
+ }
+ else
+ {
+ tmp[5] = gen_reg_rtx (V8SFmode);
+ ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
+ gen_lowpart (V8SFmode, tmp[3]), 0);
+ tmp[5] = gen_lowpart (V8SImode, tmp[5]);
+ }
+ tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
+ operands[0], 0, OPTAB_DIRECT);
+ if (tmp[6] != operands[0])
+ emit_move_insn (operands[0], tmp[6]);
+ DONE;
+})
+
(define_expand "vec_pack_sfix_v4df"
[(match_operand:V8SI 0 "register_operand" "")
(match_operand:V4DF 1 "nonimmediate_operand" "")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
-(define_expand "vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "")))]
- "TARGET_SSE"
-{
- if (!TARGET_AVX)
- operands[1] = force_reg (SFmode, operands[1]);
-})
-
-(define_insn "avx2_vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_duplicate:V4SF
+(define_insn "avx2_vec_dup<mode>"
+ [(set (match_operand:VF1 0 "register_operand" "=x")
+ (vec_duplicate:VF1
(vec_select:SF
(match_operand:V4SF 1 "register_operand" "x")
(parallel [(const_int 0)]))))]
"vbroadcastss\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix" "vex")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "*vec_dupv4sf_avx"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+(define_insn "vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
(vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
- "TARGET_AVX"
+ (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
+ "TARGET_SSE"
"@
vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
- vbroadcastss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1,ssemov")
- (set_attr "length_immediate" "1,0")
- (set_attr "prefix_extra" "0,1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4SF")])
-
-(define_insn "avx2_vec_dupv8sf"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (vec_duplicate:V8SF
- (vec_select:SF
- (match_operand:V4SF 1 "register_operand" "x")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "vbroadcastss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "*vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_duplicate:V4SF
- (match_operand:SF 1 "register_operand" "0")))]
- "TARGET_SSE"
- "shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "type" "sselog1")
- (set_attr "length_immediate" "1")
+ vbroadcastss\t{%1, %0|%0, %1}
+ shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "isa" "avx,avx,noavx")
+ (set_attr "type" "sselog1,ssemov,sselog1")
+ (set_attr "length_immediate" "1,0,1")
+ (set_attr "prefix_extra" "0,1,*")
+ (set_attr "prefix" "vex,vex,orig")
(set_attr "mode" "V4SF")])
;; Although insertps takes register source, we prefer
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
(set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
-(define_expand "vec_dupv2df"
- [(set (match_operand:V2DF 0 "register_operand" "")
- (vec_duplicate:V2DF
- (match_operand:DF 1 "nonimmediate_operand" "")))]
- "TARGET_SSE2"
-{
- if (!TARGET_SSE3)
- operands[1] = force_reg (DFmode, operands[1]);
-})
-
-(define_insn "*vec_dupv2df_sse3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_duplicate:V2DF
- (match_operand:DF 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE3"
- "%vmovddup\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "DF")])
-
-(define_insn "*vec_dupv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
+(define_insn "vec_dupv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_duplicate:V2DF
- (match_operand:DF 1 "register_operand" "0")))]
+ (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
"TARGET_SSE2"
- "unpcklpd\t%0, %0"
- [(set_attr "type" "sselog1")
+ "@
+ unpcklpd\t%0, %0
+ %vmovddup\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "noavx,sse3")
+ (set_attr "type" "sselog1")
+ (set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "V2DF")])
-(define_insn "*vec_concatv2df_sse3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_concat:V2DF
- (match_operand:DF 1 "nonimmediate_operand" "xm")
- (match_dup 1)))]
- "TARGET_SSE3"
- "%vmovddup\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "DF")])
-
(define_insn "*vec_concatv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
(vec_concat:V2DF
- (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
- (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
+ (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
+ (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
"TARGET_SSE"
"@
unpcklpd\t{%2, %0|%0, %2}
vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+ %vmovddup\t{%1, %0|%0, %1}
movhpd\t{%2, %0|%0, %2}
vmovhpd\t{%2, %1, %0|%0, %1, %2}
%vmovsd\t{%1, %0|%0, %1}
movlhps\t{%2, %0|%0, %2}
movhps\t{%2, %0|%0, %2}"
- [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
+ [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
(set (attr "type")
(if_then_else
- (eq_attr "alternative" "0,1")
+ (eq_attr "alternative" "0,1,2")
(const_string "sselog")
(const_string "ssemov")))
- (set_attr "prefix_data16" "*,*,1,*,*,*,*")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
- (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
+ (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
+ (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
+ (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
(define_expand "<code><mode>3"
[(set (match_operand:VI8_AVX2 0 "register_operand" "")
- (maxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
- (match_operand:VI8_AVX2 2 "register_operand" "")))]
+ (maxmin:VI8_AVX2
+ (match_operand:VI8_AVX2 1 "register_operand" "")
+ (match_operand:VI8_AVX2 2 "register_operand" "")))]
"TARGET_SSE4_2"
{
enum rtx_code code;
(define_expand "<code><mode>3"
[(set (match_operand:VI124_128 0 "register_operand" "")
- (smaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
- (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
+ (smaxmin:VI124_128
+ (match_operand:VI124_128 1 "nonimmediate_operand" "")
+ (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
{
if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
(define_expand "<code><mode>3"
[(set (match_operand:VI124_128 0 "register_operand" "")
- (umaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
- (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
+ (umaxmin:VI124_128
+ (match_operand:VI124_128 1 "nonimmediate_operand" "")
+ (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
"TARGET_SSE2"
{
if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
(set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
(set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
-(define_insn "*vec_dupv4si_avx"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+(define_expand "vec_dupv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "")
(vec_duplicate:V4SI
- (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
- "TARGET_AVX"
- "@
- vpshufd\t{$0, %1, %0|%0, %1, 0}
- vbroadcastss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1,ssemov")
- (set_attr "length_immediate" "1,0")
- (set_attr "prefix_extra" "0,1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "TI,V4SF")])
+ (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (!TARGET_AVX)
+ operands[1] = force_reg (V4SImode, operands[1]);
+})
(define_insn "*vec_dupv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
(vec_duplicate:V4SI
- (match_operand:SI 1 "register_operand" " x,0")))]
+ (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
"TARGET_SSE"
"@
- pshufd\t{$0, %1, %0|%0, %1, 0}
+ %vpshufd\t{$0, %1, %0|%0, %1, 0}
+ vbroadcastss\t{%1, %0|%0, %1}
shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "isa" "sse2,*")
- (set_attr "type" "sselog1")
- (set_attr "length_immediate" "1")
- (set_attr "mode" "TI,V4SF")])
-
-(define_insn "*vec_dupv2di_sse3"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ [(set_attr "isa" "sse2,avx,noavx")
+ (set_attr "type" "sselog1,ssemov,sselog1")
+ (set_attr "length_immediate" "1,0,1")
+ (set_attr "prefix_extra" "0,1,*")
+ (set_attr "prefix" "maybe_vex,vex,orig")
+ (set_attr "mode" "TI,V4SF,V4SF")])
+
+(define_expand "vec_dupv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "")
(vec_duplicate:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
- "TARGET_SSE3"
- "@
- punpcklqdq\t%0, %0
- vpunpcklqdq\t{%d1, %0|%0, %d1}
- %vmovddup\t{%1, %0|%0, %1}"
- [(set_attr "isa" "noavx,avx,*")
- (set_attr "type" "sselog1")
- (set_attr "prefix" "orig,vex,maybe_vex")
- (set_attr "mode" "TI,TI,DF")])
+ (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (!TARGET_AVX)
+ operands[1] = force_reg (V2DImode, operands[1]);
+})
(define_insn "*vec_dupv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
(vec_duplicate:V2DI
- (match_operand:DI 1 "register_operand" " 0,0")))]
+ (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
"TARGET_SSE"
"@
punpcklqdq\t%0, %0
+ vpunpcklqdq\t{%d1, %0|%0, %d1}
+ %vmovddup\t{%1, %0|%0, %1}
movlhps\t%0, %0"
- [(set_attr "isa" "sse2,*")
- (set_attr "type" "sselog1,ssemov")
- (set_attr "mode" "TI,V4SF")])
+ [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
+ (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
+ (set_attr "prefix" "orig,vex,maybe_vex,orig")
+ (set_attr "mode" "TI,TI,DF,V4SF")])
(define_insn "*vec_concatv2si_sse4_1"
[(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
+ [(match_operand:<sseintvecmode> 0 "register_operand" "")
+ (match_operand:VF1 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_15_operand" "")]
+ "TARGET_ROUND"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
+ operands[2]));
+ emit_insn
+ (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "nonimmediate_operand" "")
+ (match_operand:VF2 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_0_to_15_operand" "")]
+ "TARGET_ROUND"
+{
+ rtx tmp0, tmp1;
+
+ tmp0 = gen_reg_rtx (<MODE>mode);
+ tmp1 = gen_reg_rtx (<MODE>mode);
+
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
+ operands[3]));
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
+ operands[3]));
+ emit_insn
+ (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+ DONE;
+})
+
(define_insn "sse4_1_round<ssescalarmodesuffix>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(define_expand "round<mode>2"
[(set (match_dup 4)
(plus:VF
- (match_operand:VF 1 "nonimmediate_operand" "")
+ (match_operand:VF 1 "register_operand" "")
(match_dup 3)))
(set (match_operand:VF 0 "register_operand" "")
(unspec:VF
operands[5] = GEN_INT (ROUND_TRUNC);
})
+(define_expand "round<mode>2_sfix"
+ [(match_operand:<sseintvecmode> 0 "register_operand" "")
+ (match_operand:VF1 1 "register_operand" "")]
+ "TARGET_ROUND && !flag_trapping_math"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_round<mode>2 (tmp, operands[1]));
+
+ emit_insn
+ (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "round<mode>2_vec_pack_sfix"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "register_operand" "")
+ (match_operand:VF2 2 "register_operand" "")]
+ "TARGET_ROUND && !flag_trapping_math"
+{
+ rtx tmp0, tmp1;
+
+ tmp0 = gen_reg_rtx (<MODE>mode);
+ tmp1 = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_round<mode>2 (tmp0, operands[1]));
+ emit_insn (gen_round<mode>2 (tmp1, operands[2]));
+
+ emit_insn
+ (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Intel SSE4.2 string/text processing instructions
{
rtx neg = gen_reg_rtx (<MODE>mode);
emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
+ emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
DONE;
})
{
rtx neg = gen_reg_rtx (<MODE>mode);
emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
+ emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
DONE;
}
})
{
rtx neg = gen_reg_rtx (<MODE>mode);
emit_insn (gen_neg<mode>2 (neg, operands[2]));
- emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
+ emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], neg));
DONE;
})
{
rtx neg = gen_reg_rtx (V4SImode);
emit_insn (gen_negv4si2 (neg, operands[2]));
- emit_insn (gen_xop_ashlv4si3 (operands[0], operands[1], neg));
+ emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
DONE;
}
})
(match_operand:VI12_128 2 "nonimmediate_operand" "")))]
"TARGET_XOP"
{
- emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
DONE;
})
if (!TARGET_AVX2)
{
operands[2] = force_reg (<MODE>mode, operands[2]);
- emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
+ emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
DONE;
}
})
(match_operand:VI48_256 2 "nonimmediate_operand" "")))]
"TARGET_AVX2")
-(define_insn "xop_ashl<mode>3"
+(define_insn "xop_sha<mode>3"
[(set (match_operand:VI_128 0 "register_operand" "=x,x")
(if_then_else:VI_128
(ge:VI_128
(set_attr "prefix_extra" "2")
(set_attr "mode" "TI")])
-(define_insn "xop_lshl<mode>3"
+(define_insn "xop_shl<mode>3"
[(set (match_operand:VI_128 0 "register_operand" "=x,x")
(if_then_else:VI_128
(ge:VI_128
(set_attr "prefix_extra" "2")
(set_attr "mode" "TI")])
-;; SSE2 doesn't have some shift varients, so define versions for XOP
+;; SSE2 doesn't have some shift variants, so define versions for XOP
(define_expand "ashlv16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
(ashift:V16QI
(match_operand:SI 2 "nonmemory_operand" "")))]
"TARGET_XOP"
{
- rtvec vs = rtvec_alloc (16);
- rtx par = gen_rtx_PARALLEL (V16QImode, vs);
rtx reg = gen_reg_rtx (V16QImode);
+ rtx par;
int i;
- for (i = 0; i < 16; i++)
- RTVEC_ELT (vs, i) = operands[2];
- emit_insn (gen_vec_initv16qi (reg, par));
- emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
- DONE;
-})
-
-(define_expand "lshlv16qi3"
- [(match_operand:V16QI 0 "register_operand" "")
- (match_operand:V16QI 1 "register_operand" "")
- (match_operand:SI 2 "nonmemory_operand" "")]
- "TARGET_XOP"
-{
- rtvec vs = rtvec_alloc (16);
- rtx par = gen_rtx_PARALLEL (V16QImode, vs);
- rtx reg = gen_reg_rtx (V16QImode);
- int i;
+ par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
for (i = 0; i < 16; i++)
- RTVEC_ELT (vs, i) = operands[2];
+ XVECEXP (par, 0, i) = operands[2];
emit_insn (gen_vec_initv16qi (reg, par));
- emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
+ emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], reg));
DONE;
})
-(define_expand "ashrv16qi3"
+(define_expand "<shift_insn>v16qi3"
[(set (match_operand:V16QI 0 "register_operand" "")
- (ashiftrt:V16QI
+ (any_shiftrt:V16QI
(match_operand:V16QI 1 "register_operand" "")
(match_operand:SI 2 "nonmemory_operand" "")))]
"TARGET_XOP"
{
- rtvec vs = rtvec_alloc (16);
- rtx par = gen_rtx_PARALLEL (V16QImode, vs);
rtx reg = gen_reg_rtx (V16QImode);
+ rtx par;
+ bool negate = false;
+ rtx (*shift_insn)(rtx, rtx, rtx);
int i;
- rtx ele = ((CONST_INT_P (operands[2]))
- ? GEN_INT (- INTVAL (operands[2]))
- : operands[2]);
+ if (CONST_INT_P (operands[2]))
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
+ else
+ negate = true;
+
+ par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
for (i = 0; i < 16; i++)
- RTVEC_ELT (vs, i) = ele;
+ XVECEXP (par, 0, i) = operands[2];
emit_insn (gen_vec_initv16qi (reg, par));
- if (!CONST_INT_P (operands[2]))
- {
- rtx neg = gen_reg_rtx (V16QImode);
- emit_insn (gen_negv16qi2 (neg, reg));
- emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
- }
+ if (negate)
+ emit_insn (gen_negv16qi2 (reg, reg));
+
+ if (<CODE> == LSHIFTRT)
+ shift_insn = gen_xop_shlv16qi3;
else
- emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
+ shift_insn = gen_xop_shav16qi3;
+ emit_insn (shift_insn (operands[0], operands[1], reg));
DONE;
})
(match_operand:DI 2 "nonmemory_operand" "")))]
"TARGET_XOP"
{
- rtvec vs = rtvec_alloc (2);
- rtx par = gen_rtx_PARALLEL (V2DImode, vs);
rtx reg = gen_reg_rtx (V2DImode);
- rtx ele;
+ rtx par;
+ bool negate = false;
+ int i;
if (CONST_INT_P (operands[2]))
- ele = GEN_INT (- INTVAL (operands[2]));
- else if (GET_MODE (operands[2]) != DImode)
- {
- rtx move = gen_reg_rtx (DImode);
- ele = gen_reg_rtx (DImode);
- convert_move (move, operands[2], false);
- emit_insn (gen_negdi2 (ele, move));
- }
+ operands[2] = GEN_INT (-INTVAL (operands[2]));
else
- {
- ele = gen_reg_rtx (DImode);
- emit_insn (gen_negdi2 (ele, operands[2]));
- }
+ negate = true;
+
+ par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
+ for (i = 0; i < 2; i++)
+ XVECEXP (par, 0, i) = operands[2];
- RTVEC_ELT (vs, 0) = ele;
- RTVEC_ELT (vs, 1) = ele;
emit_insn (gen_vec_initv2di (reg, par));
- emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
+
+ if (negate)
+ emit_insn (gen_negv2di2 (reg, reg));
+
+ emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
DONE;
})
&& avx_vperm2f128_parallel (operands[3], <MODE>mode)"
{
int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
+ if (mask == 0x12)
+ return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
+ if (mask == 0x20)
+ return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
operands[3] = GEN_INT (mask);
return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
}
;; For gather* insn patterns
(define_mode_iterator VEC_GATHER_MODE
[V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
-(define_mode_attr VEC_GATHER_MODE
+(define_mode_attr VEC_GATHER_IDXSI
[(V2DI "V4SI") (V2DF "V4SI")
(V4DI "V4SI") (V4DF "V4SI")
(V4SI "V4SI") (V4SF "V4SI")
(V8SI "V8SI") (V8SF "V8SI")])
+(define_mode_attr VEC_GATHER_IDXDI
+ [(V2DI "V2DI") (V2DF "V2DI")
+ (V4DI "V4DI") (V4DF "V4DI")
+ (V4SI "V2DI") (V4SF "V2DI")
+ (V8SI "V4DI") (V8SF "V4DI")])
+(define_mode_attr VEC_GATHER_SRCDI
+ [(V2DI "V2DI") (V2DF "V2DF")
+ (V4DI "V4DI") (V4DF "V4DF")
+ (V4SI "V4SI") (V4SF "V4SF")
+ (V8SI "V4SI") (V8SF "V4SF")])
(define_expand "avx2_gathersi<mode>"
[(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
(mem:<ssescalarmode>
(match_par_dup 7
[(match_operand 2 "vsib_address_operand" "")
- (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
+ (match_operand:<VEC_GATHER_IDXSI>
+ 3 "register_operand" "")
(match_operand:SI 5 "const1248_operand " "")]))
(mem:BLK (scratch))
(match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
(match_operator:<ssescalarmode> 7 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "p")
- (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
+ (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
(match_operand:SI 6 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*avx2_gathersi<mode>_2"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+ (unspec:VEC_GATHER_MODE
+ [(pc)
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (mem:BLK (scratch))
+ (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
+ "TARGET_AVX2"
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "avx2_gatherdi<mode>"
[(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
(unspec:VEC_GATHER_MODE
- [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
+ [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
(mem:<ssescalarmode>
(match_par_dup 7
[(match_operand 2 "vsib_address_operand" "")
- (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
+ (match_operand:<VEC_GATHER_IDXDI>
+ 3 "register_operand" "")
(match_operand:SI 5 "const1248_operand " "")]))
(mem:BLK (scratch))
- (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
+ (match_operand:<VEC_GATHER_SRCDI>
+ 4 "register_operand" "")]
UNSPEC_GATHER))
(clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
"TARGET_AVX2"
})
(define_insn "*avx2_gatherdi<mode>"
- [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
- (unspec:AVXMODE48P_DI
- [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+ (unspec:VEC_GATHER_MODE
+ [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
(match_operator:<ssescalarmode> 7 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "p")
- (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
+ (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
(match_operand:SI 6 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
- (match_operand:AVXMODE48P_DI 5 "register_operand" "1")]
+ (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
UNSPEC_GATHER))
- (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
+ (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
"TARGET_AVX2"
- "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-;; Special handling for VEX.256 with float arguments
-;; since there're still xmms as operands
-(define_expand "avx2_gatherdi<mode>256"
- [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
- (unspec:VI4F_128
- [(match_operand:VI4F_128 1 "register_operand" "")
- (mem:<ssescalarmode>
- (match_par_dup 7
- [(match_operand 2 "vsib_address_operand" "")
- (match_operand:V4DI 3 "register_operand" "")
- (match_operand:SI 5 "const1248_operand " "")]))
- (mem:BLK (scratch))
- (match_operand:VI4F_128 4 "register_operand" "")]
- UNSPEC_GATHER))
- (clobber (match_scratch:VI4F_128 6 ""))])]
- "TARGET_AVX2"
-{
- operands[7]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
-})
-
-(define_insn "*avx2_gatherdi<mode>256"
- [(set (match_operand:VI4F_128 0 "register_operand" "=x")
- (unspec:VI4F_128
- [(match_operand:VI4F_128 2 "register_operand" "0")
- (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
+(define_insn "*avx2_gatherdi<mode>_2"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+ (unspec:VEC_GATHER_MODE
+ [(pc)
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
[(unspec:P
- [(match_operand:P 3 "vsib_address_operand" "p")
- (match_operand:V4DI 4 "register_operand" "x")
- (match_operand:SI 6 "const1248_operand" "n")]
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
- (match_operand:VI4F_128 5 "register_operand" "1")]
- UNSPEC_GATHER))
- (clobber (match_scratch:VI4F_128 1 "=&x"))]
+ (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
"TARGET_AVX2"
- "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+{
+ if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
+}
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])