(define_mode_attr sseintvecmode
[(V8SF "V8SI") (V4DF "V4DI")
(V4SF "V4SI") (V2DF "V2DI")
- (V4DF "V4DI") (V8SF "V8SI")
(V8SI "V8SI") (V4DI "V4DI")
(V4SI "V4SI") (V2DI "V2DI")
(V16HI "V16HI") (V8HI "V8HI")
(V32QI "V32QI") (V16QI "V16QI")])
+(define_mode_attr sseintvecmodelower
+ [(V8SF "v8si") (V4DF "v4di")
+ (V4SF "v4si") (V2DF "v2di")
+ (V8SI "v8si") (V4DI "v4di")
+ (V4SI "v4si") (V2DI "v2di")
+ (V16HI "v16hi") (V8HI "v8hi")
+ (V32QI "v32qi") (V16QI "v16qi")])
+
;; Mapping of vector modes to a vector mode of double size
(define_mode_attr ssedoublevecmode
[(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
-(define_mode_iterator AVXMODE48P_DI
- [V2DI V2DF V4DI V4DF V4SF V4SI])
-(define_mode_attr AVXMODE48P_DI
- [(V2DI "V2DI") (V2DF "V2DI")
- (V4DI "V4DI") (V4DF "V4DI")
- (V4SI "V2DI") (V4SF "V2DI")
- (V8SI "V4DI") (V8SF "V4DI")])
-
(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
;; Mapping of immediate bits for blend instructions
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "avx_cvtdq2ps256"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (float:V8SF (match_operand:V8SI 1 "nonimmediate_operand" "xm")))]
- "TARGET_AVX"
- "vcvtdq2ps\t{%1, %0|%0, %1}"
- [(set_attr "type" "ssecvt")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "sse2_cvtdq2ps"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
+(define_insn "float<sseintvecmodelower><mode>2"
+ [(set (match_operand:VF1 0 "register_operand" "=x")
+ (float:VF1
+ (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
"%vcvtdq2ps\t{%1, %0|%0, %1}"
[(set_attr "type" "ssecvt")
(set_attr "prefix" "maybe_vex")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<sseinsnmode>")])
-(define_expand "sse2_cvtudq2ps"
- [(set (match_dup 5)
- (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
- (set (match_dup 6)
- (lt:V4SF (match_dup 5) (match_dup 3)))
- (set (match_dup 7)
- (and:V4SF (match_dup 6) (match_dup 4)))
- (set (match_operand:V4SF 0 "register_operand" "")
- (plus:V4SF (match_dup 5) (match_dup 7)))]
- "TARGET_SSE2"
+(define_expand "floatuns<sseintvecmodelower><mode>2"
+ [(match_operand:VF1 0 "register_operand" "")
+ (match_operand:<sseintvecmode> 1 "register_operand" "")]
+ "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
{
- REAL_VALUE_TYPE TWO32r;
- rtx x;
- int i;
-
- real_ldexp (&TWO32r, &dconst1, 32);
- x = const_double_from_real_value (TWO32r, SFmode);
-
- operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
- operands[4] = force_reg (V4SFmode,
- ix86_build_const_vector (V4SFmode, 1, x));
-
- for (i = 5; i < 8; i++)
- operands[i] = gen_reg_rtx (V4SFmode);
+ ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
+ DONE;
})
(define_insn "avx_cvtps2dq256"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
-(define_insn "avx_cvttps2dq256"
+(define_insn "fix_truncv8sfv8si2"
[(set (match_operand:V8SI 0 "register_operand" "=x")
(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
-(define_insn "sse2_cvttps2dq"
+(define_insn "fix_truncv4sfv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
"TARGET_SSE2"
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "TI")])
+(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
+ [(match_operand:<sseintvecmode> 0 "register_operand" "")
+ (match_operand:VF1 1 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp[3];
+ tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+ tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
+ emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
+ emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parallel double-precision floating point conversion operations
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "DI")])
-(define_insn "avx_cvtdq2pd256"
+(define_insn "floatv4siv4df2"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(float:V4DF (match_operand:V4SI 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
(set_attr "athlon_decode" "vector")
(set_attr "bdver1_decode" "double")])
-(define_insn "avx_cvttpd2dq256"
+(define_insn "fix_truncv4dfv4si2"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "xm")))]
"TARGET_AVX"
for (i = 2; i < 5; i++)
tmp[i] = gen_reg_rtx (V4DFmode);
emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
- emit_insn (gen_avx_cvtdq2pd256 (tmp[2], tmp[5]));
+ emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
DONE;
})
+(define_mode_attr ssepackfltmode
+ [(V4DF "V8SI") (V2DF "V4SI")])
+
+(define_expand "vec_pack_ufix_trunc_<mode>"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "register_operand" "")
+ (match_operand:VF2 2 "register_operand" "")]
+ "TARGET_SSE2"
+{
+ rtx tmp[7];
+ tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
+ tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
+ tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
+ emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
+ if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
+ {
+ tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
+ ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
+ }
+ else
+ {
+ tmp[5] = gen_reg_rtx (V8SFmode);
+ ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
+ gen_lowpart (V8SFmode, tmp[3]), 0);
+ tmp[5] = gen_lowpart (V8SImode, tmp[5]);
+ }
+ tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
+ operands[0], 0, OPTAB_DIRECT);
+ if (tmp[6] != operands[0])
+ emit_move_insn (operands[0], tmp[6]);
+ DONE;
+})
+
(define_expand "vec_pack_sfix_v4df"
[(match_operand:V8SI 0 "register_operand" "")
(match_operand:V4DF 1 "nonimmediate_operand" "")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "SF")])
-(define_expand "vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "")
- (vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "")))]
- "TARGET_SSE"
-{
- if (!TARGET_AVX)
- operands[1] = force_reg (SFmode, operands[1]);
-})
-
-(define_insn "avx2_vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_duplicate:V4SF
+(define_insn "avx2_vec_dup<mode>"
+ [(set (match_operand:VF1 0 "register_operand" "=x")
+ (vec_duplicate:VF1
(vec_select:SF
(match_operand:V4SF 1 "register_operand" "x")
(parallel [(const_int 0)]))))]
"vbroadcastss\t{%1, %0|%0, %1}"
[(set_attr "type" "sselog1")
(set_attr "prefix" "vex")
- (set_attr "mode" "V4SF")])
+ (set_attr "mode" "<MODE>")])
-(define_insn "*vec_dupv4sf_avx"
- [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+(define_insn "vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
(vec_duplicate:V4SF
- (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
- "TARGET_AVX"
+ (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
+ "TARGET_SSE"
"@
vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
- vbroadcastss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1,ssemov")
- (set_attr "length_immediate" "1,0")
- (set_attr "prefix_extra" "0,1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V4SF")])
-
-(define_insn "avx2_vec_dupv8sf"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (vec_duplicate:V8SF
- (vec_select:SF
- (match_operand:V4SF 1 "register_operand" "x")
- (parallel [(const_int 0)]))))]
- "TARGET_AVX2"
- "vbroadcastss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "V8SF")])
-
-(define_insn "*vec_dupv4sf"
- [(set (match_operand:V4SF 0 "register_operand" "=x")
- (vec_duplicate:V4SF
- (match_operand:SF 1 "register_operand" "0")))]
- "TARGET_SSE"
- "shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "type" "sselog1")
- (set_attr "length_immediate" "1")
+ vbroadcastss\t{%1, %0|%0, %1}
+ shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "isa" "avx,avx,noavx")
+ (set_attr "type" "sselog1,ssemov,sselog1")
+ (set_attr "length_immediate" "1,0,1")
+ (set_attr "prefix_extra" "0,1,*")
+ (set_attr "prefix" "vex,vex,orig")
(set_attr "mode" "V4SF")])
;; Although insertps takes register source, we prefer
(set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
(set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
-(define_expand "vec_dupv2df"
- [(set (match_operand:V2DF 0 "register_operand" "")
+(define_insn "vec_dupv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x")
(vec_duplicate:V2DF
- (match_operand:DF 1 "nonimmediate_operand" "")))]
+ (match_operand:DF 1 "nonimmediate_operand" " 0,xm")))]
"TARGET_SSE2"
-{
- if (!TARGET_SSE3)
- operands[1] = force_reg (DFmode, operands[1]);
-})
-
-(define_insn "*vec_dupv2df_sse3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_duplicate:V2DF
- (match_operand:DF 1 "nonimmediate_operand" "xm")))]
- "TARGET_SSE3"
- "%vmovddup\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "DF")])
-
-(define_insn "*vec_dupv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_duplicate:V2DF
- (match_operand:DF 1 "register_operand" "0")))]
- "TARGET_SSE2"
- "unpcklpd\t%0, %0"
- [(set_attr "type" "sselog1")
+ "@
+ unpcklpd\t%0, %0
+ %vmovddup\t{%1, %0|%0, %1}"
+ [(set_attr "isa" "noavx,sse3")
+ (set_attr "type" "sselog1")
+ (set_attr "prefix" "orig,maybe_vex")
(set_attr "mode" "V2DF")])
-(define_insn "*vec_concatv2df_sse3"
- [(set (match_operand:V2DF 0 "register_operand" "=x")
- (vec_concat:V2DF
- (match_operand:DF 1 "nonimmediate_operand" "xm")
- (match_dup 1)))]
- "TARGET_SSE3"
- "%vmovddup\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1")
- (set_attr "prefix" "maybe_vex")
- (set_attr "mode" "DF")])
-
(define_insn "*vec_concatv2df"
- [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x")
+ [(set (match_operand:V2DF 0 "register_operand" "=x,x,x,x,x,x,x,x")
(vec_concat:V2DF
- (match_operand:DF 1 "nonimmediate_operand" " 0,x,0,x,m,0,0")
- (match_operand:DF 2 "vector_move_operand" " x,x,m,m,C,x,m")))]
+ (match_operand:DF 1 "nonimmediate_operand" " 0,x,m,0,x,m,0,0")
+ (match_operand:DF 2 "vector_move_operand" " x,x,1,m,m,C,x,m")))]
"TARGET_SSE"
"@
unpcklpd\t{%2, %0|%0, %2}
vunpcklpd\t{%2, %1, %0|%0, %1, %2}
+ %vmovddup\t{%1, %0|%0, %1}
movhpd\t{%2, %0|%0, %2}
vmovhpd\t{%2, %1, %0|%0, %1, %2}
%vmovsd\t{%1, %0|%0, %1}
movlhps\t{%2, %0|%0, %2}
movhps\t{%2, %0|%0, %2}"
- [(set_attr "isa" "sse2_noavx,avx,sse2_noavx,avx,sse2,noavx,noavx")
+ [(set_attr "isa" "sse2_noavx,avx,sse3,sse2_noavx,avx,sse2,noavx,noavx")
(set (attr "type")
(if_then_else
- (eq_attr "alternative" "0,1")
+ (eq_attr "alternative" "0,1,2")
(const_string "sselog")
(const_string "ssemov")))
- (set_attr "prefix_data16" "*,*,1,*,*,*,*")
- (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig")
- (set_attr "mode" "V2DF,V2DF,V1DF,V1DF,DF,V4SF,V2SF")])
+ (set_attr "prefix_data16" "*,*,*,1,*,*,*,*")
+ (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex,orig,orig")
+ (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,DF,V4SF,V2SF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
(set_attr "prefix" "maybe_vex,orig,vex,maybe_vex,orig,orig")
(set_attr "mode" "V2SF,TI,TI,TI,V4SF,V2SF")])
-(define_insn "*vec_dupv4si_avx"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+(define_expand "vec_dupv4si"
+ [(set (match_operand:V4SI 0 "register_operand" "")
(vec_duplicate:V4SI
- (match_operand:SI 1 "nonimmediate_operand" " x,m")))]
- "TARGET_AVX"
- "@
- vpshufd\t{$0, %1, %0|%0, %1, 0}
- vbroadcastss\t{%1, %0|%0, %1}"
- [(set_attr "type" "sselog1,ssemov")
- (set_attr "length_immediate" "1,0")
- (set_attr "prefix_extra" "0,1")
- (set_attr "prefix" "vex")
- (set_attr "mode" "TI,V4SF")])
+ (match_operand:SI 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (!TARGET_AVX)
+ operands[1] = force_reg (V4SImode, operands[1]);
+})
(define_insn "*vec_dupv4si"
- [(set (match_operand:V4SI 0 "register_operand" "=x,x")
+ [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
(vec_duplicate:V4SI
- (match_operand:SI 1 "register_operand" " x,0")))]
+ (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
"TARGET_SSE"
"@
- pshufd\t{$0, %1, %0|%0, %1, 0}
+ %vpshufd\t{$0, %1, %0|%0, %1, 0}
+ vbroadcastss\t{%1, %0|%0, %1}
shufps\t{$0, %0, %0|%0, %0, 0}"
- [(set_attr "isa" "sse2,*")
- (set_attr "type" "sselog1")
- (set_attr "length_immediate" "1")
- (set_attr "mode" "TI,V4SF")])
-
-(define_insn "*vec_dupv2di_sse3"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,x")
+ [(set_attr "isa" "sse2,avx,noavx")
+ (set_attr "type" "sselog1,ssemov,sselog1")
+ (set_attr "length_immediate" "1,0,1")
+ (set_attr "prefix_extra" "0,1,*")
+ (set_attr "prefix" "maybe_vex,vex,orig")
+ (set_attr "mode" "TI,V4SF,V4SF")])
+
+(define_expand "vec_dupv2di"
+ [(set (match_operand:V2DI 0 "register_operand" "")
(vec_duplicate:V2DI
- (match_operand:DI 1 "nonimmediate_operand" " 0,x,m")))]
- "TARGET_SSE3"
- "@
- punpcklqdq\t%0, %0
- vpunpcklqdq\t{%d1, %0|%0, %d1}
- %vmovddup\t{%1, %0|%0, %1}"
- [(set_attr "isa" "noavx,avx,*")
- (set_attr "type" "sselog1")
- (set_attr "prefix" "orig,vex,maybe_vex")
- (set_attr "mode" "TI,TI,DF")])
+ (match_operand:DI 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+{
+ if (!TARGET_AVX)
+ operands[1] = force_reg (V2DImode, operands[1]);
+})
(define_insn "*vec_dupv2di"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,x")
(vec_duplicate:V2DI
- (match_operand:DI 1 "register_operand" " 0,0")))]
+ (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
"TARGET_SSE"
"@
punpcklqdq\t%0, %0
+ vpunpcklqdq\t{%d1, %0|%0, %d1}
+ %vmovddup\t{%1, %0|%0, %1}
movlhps\t%0, %0"
- [(set_attr "isa" "sse2,*")
- (set_attr "type" "sselog1,ssemov")
- (set_attr "mode" "TI,V4SF")])
+ [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
+ (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
+ (set_attr "prefix" "orig,vex,maybe_vex,orig")
+ (set_attr "mode" "TI,TI,DF,V4SF")])
(define_insn "*vec_concatv2si_sse4_1"
[(set (match_operand:V2SI 0 "register_operand" "=x, x,x,x, x, *y,*y")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
+ [(match_operand:<sseintvecmode> 0 "register_operand" "")
+ (match_operand:VF1 1 "nonimmediate_operand" "")
+ (match_operand:SI 2 "const_0_to_15_operand" "")]
+ "TARGET_ROUND"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
+ operands[2]));
+ emit_insn
+ (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "nonimmediate_operand" "")
+ (match_operand:VF2 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_0_to_15_operand" "")]
+ "TARGET_ROUND"
+{
+ rtx tmp0, tmp1;
+
+ tmp0 = gen_reg_rtx (<MODE>mode);
+ tmp1 = gen_reg_rtx (<MODE>mode);
+
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
+ operands[3]));
+ emit_insn
+ (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
+ operands[3]));
+ emit_insn
+ (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+ DONE;
+})
+
(define_insn "sse4_1_round<ssescalarmodesuffix>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
(define_expand "round<mode>2"
[(set (match_dup 4)
(plus:VF
- (match_operand:VF 1 "nonimmediate_operand" "")
+ (match_operand:VF 1 "register_operand" "")
(match_dup 3)))
(set (match_operand:VF 0 "register_operand" "")
(unspec:VF
operands[5] = GEN_INT (ROUND_TRUNC);
})
+(define_expand "round<mode>2_sfix"
+ [(match_operand:<sseintvecmode> 0 "register_operand" "")
+ (match_operand:VF1 1 "register_operand" "")]
+ "TARGET_ROUND && !flag_trapping_math"
+{
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_round<mode>2 (tmp, operands[1]));
+
+ emit_insn
+ (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
+ DONE;
+})
+
+(define_expand "round<mode>2_vec_pack_sfix"
+ [(match_operand:<ssepackfltmode> 0 "register_operand" "")
+ (match_operand:VF2 1 "register_operand" "")
+ (match_operand:VF2 2 "register_operand" "")]
+ "TARGET_ROUND && !flag_trapping_math"
+{
+ rtx tmp0, tmp1;
+
+ tmp0 = gen_reg_rtx (<MODE>mode);
+ tmp1 = gen_reg_rtx (<MODE>mode);
+
+ emit_insn (gen_round<mode>2 (tmp0, operands[1]));
+ emit_insn (gen_round<mode>2 (tmp1, operands[2]));
+
+ emit_insn
+ (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
+ DONE;
+})
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Intel SSE4.2 string/text processing instructions
&& avx_vperm2f128_parallel (operands[3], <MODE>mode)"
{
int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
+ if (mask == 0x12)
+ return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
+ if (mask == 0x20)
+ return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
operands[3] = GEN_INT (mask);
return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
}
;; For gather* insn patterns
(define_mode_iterator VEC_GATHER_MODE
[V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
-(define_mode_attr VEC_GATHER_MODE
+(define_mode_attr VEC_GATHER_IDXSI
[(V2DI "V4SI") (V2DF "V4SI")
(V4DI "V4SI") (V4DF "V4SI")
(V4SI "V4SI") (V4SF "V4SI")
(V8SI "V8SI") (V8SF "V8SI")])
+(define_mode_attr VEC_GATHER_IDXDI
+ [(V2DI "V2DI") (V2DF "V2DI")
+ (V4DI "V4DI") (V4DF "V4DI")
+ (V4SI "V2DI") (V4SF "V2DI")
+ (V8SI "V4DI") (V8SF "V4DI")])
+(define_mode_attr VEC_GATHER_SRCDI
+ [(V2DI "V2DI") (V2DF "V2DF")
+ (V4DI "V4DI") (V4DF "V4DF")
+ (V4SI "V4SI") (V4SF "V4SF")
+ (V8SI "V4SI") (V8SF "V4SF")])
(define_expand "avx2_gathersi<mode>"
[(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
(mem:<ssescalarmode>
(match_par_dup 7
[(match_operand 2 "vsib_address_operand" "")
- (match_operand:<VEC_GATHER_MODE> 3 "register_operand" "")
+ (match_operand:<VEC_GATHER_IDXSI>
+ 3 "register_operand" "")
(match_operand:SI 5 "const1248_operand " "")]))
(mem:BLK (scratch))
(match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
(match_operator:<ssescalarmode> 7 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "p")
- (match_operand:<VEC_GATHER_MODE> 4 "register_operand" "x")
+ (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
(match_operand:SI 6 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
+(define_insn "*avx2_gathersi<mode>_2"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+ (unspec:VEC_GATHER_MODE
+ [(pc)
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
+ [(unspec:P
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
+ UNSPEC_VSIBADDR)])
+ (mem:BLK (scratch))
+ (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
+ "TARGET_AVX2"
+ "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "prefix" "vex")
+ (set_attr "mode" "<sseinsnmode>")])
+
(define_expand "avx2_gatherdi<mode>"
[(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
(unspec:VEC_GATHER_MODE
- [(match_operand:VEC_GATHER_MODE 1 "register_operand" "")
+ [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "")
(mem:<ssescalarmode>
(match_par_dup 7
[(match_operand 2 "vsib_address_operand" "")
- (match_operand:<AVXMODE48P_DI> 3 "register_operand" "")
+ (match_operand:<VEC_GATHER_IDXDI>
+ 3 "register_operand" "")
(match_operand:SI 5 "const1248_operand " "")]))
(mem:BLK (scratch))
- (match_operand:VEC_GATHER_MODE 4 "register_operand" "")]
+ (match_operand:<VEC_GATHER_SRCDI>
+ 4 "register_operand" "")]
UNSPEC_GATHER))
(clobber (match_scratch:VEC_GATHER_MODE 6 ""))])]
"TARGET_AVX2"
})
(define_insn "*avx2_gatherdi<mode>"
- [(set (match_operand:AVXMODE48P_DI 0 "register_operand" "=&x")
- (unspec:AVXMODE48P_DI
- [(match_operand:AVXMODE48P_DI 2 "register_operand" "0")
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+ (unspec:VEC_GATHER_MODE
+ [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
(match_operator:<ssescalarmode> 7 "vsib_mem_operator"
[(unspec:P
[(match_operand:P 3 "vsib_address_operand" "p")
- (match_operand:<AVXMODE48P_DI> 4 "register_operand" "x")
+ (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
(match_operand:SI 6 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
- (match_operand:AVXMODE48P_DI 5 "register_operand" "1")]
+ (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
UNSPEC_GATHER))
- (clobber (match_scratch:AVXMODE48P_DI 1 "=&x"))]
+ (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
"TARGET_AVX2"
- "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+ "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-;; Special handling for VEX.256 with float arguments
-;; since there're still xmms as operands
-(define_expand "avx2_gatherdi<mode>256"
- [(parallel [(set (match_operand:VI4F_128 0 "register_operand" "")
- (unspec:VI4F_128
- [(match_operand:VI4F_128 1 "register_operand" "")
- (mem:<ssescalarmode>
- (match_par_dup 7
- [(match_operand 2 "vsib_address_operand" "")
- (match_operand:V4DI 3 "register_operand" "")
- (match_operand:SI 5 "const1248_operand " "")]))
- (mem:BLK (scratch))
- (match_operand:VI4F_128 4 "register_operand" "")]
- UNSPEC_GATHER))
- (clobber (match_scratch:VI4F_128 6 ""))])]
- "TARGET_AVX2"
-{
- operands[7]
- = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
- operands[5]), UNSPEC_VSIBADDR);
-})
-
-(define_insn "*avx2_gatherdi<mode>256"
- [(set (match_operand:VI4F_128 0 "register_operand" "=x")
- (unspec:VI4F_128
- [(match_operand:VI4F_128 2 "register_operand" "0")
- (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
+(define_insn "*avx2_gatherdi<mode>_2"
+ [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
+ (unspec:VEC_GATHER_MODE
+ [(pc)
+ (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
[(unspec:P
- [(match_operand:P 3 "vsib_address_operand" "p")
- (match_operand:V4DI 4 "register_operand" "x")
- (match_operand:SI 6 "const1248_operand" "n")]
+ [(match_operand:P 2 "vsib_address_operand" "p")
+ (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
+ (match_operand:SI 5 "const1248_operand" "n")]
UNSPEC_VSIBADDR)])
(mem:BLK (scratch))
- (match_operand:VI4F_128 5 "register_operand" "1")]
- UNSPEC_GATHER))
- (clobber (match_scratch:VI4F_128 1 "=&x"))]
+ (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
+ UNSPEC_GATHER))
+ (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
"TARGET_AVX2"
- "v<sseintprefix>gatherq<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
+{
+ if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
+ return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
+}
[(set_attr "type" "ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])