+(define_expand "sse_loadhps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "sse_loadhps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_concat:V4SF
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "0,0,0")
+ (parallel [(const_int 0) (const_int 1)]))
+ (match_operand:V2SF 2 "nonimmediate_operand" "m,x,x")))]
+ "TARGET_SSE"
+ "@
+ movhps\t{%2, %0|%0, %2}
+ movlhps\t{%2, %0|%0, %2}
+ movlps\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse_storelps"
+ [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_SSE"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_expand "sse_loadlps_exp"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+ "ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);")
+
+(define_insn "sse_loadlps"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_concat:V4SF
+ (match_operand:V2SF 2 "nonimmediate_operand" "0,m,x")
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "x,0,0")
+ (parallel [(const_int 2) (const_int 3)]))))]
+ "TARGET_SSE"
+ "@
+ shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
+ movlps\t{%2, %0|%0, %2}
+ movlps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V4SF,V2SF,V2SF")])
+
+(define_insn "sse_movss"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "movss\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+(define_insn "*vec_dupv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "register_operand" "0")))]
+ "TARGET_SSE"
+ "shufps\t{$0, %0, %0|%0, %0, 0}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V4SF")])
+
+;; ??? In theory we can match memory for the MMX alternative, but allowing
+;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
+;; alternatives pretty much forces the MMX alternative to be chosen.
+(define_insn "*sse_concatv2sf"
+ [(set (match_operand:V2SF 0 "register_operand" "=x,x,*y,*y")
+ (vec_concat:V2SF
+ (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
+ (match_operand:SF 2 "reg_or_0_operand" " x,C,*y, C")))]
+ "TARGET_SSE"
+ "@
+ unpcklps\t{%2, %0|%0, %2}
+ movss\t{%1, %0|%0, %1}
+ punpckldq\t{%2, %0|%0, %2}
+ movd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
+ (set_attr "mode" "V4SF,SF,DI,DI")])
+
+(define_insn "*sse_concatv4sf"
+ [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+ (vec_concat:V4SF
+ (match_operand:V2SF 1 "register_operand" " 0,0")
+ (match_operand:V2SF 2 "nonimmediate_operand" " x,m")))]
+ "TARGET_SSE"
+ "@
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V4SF,V2SF")])
+
+(define_expand "vec_initv4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+(define_insn "vec_setv4sf_0"
+ [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF"))
+ (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0")
+ (const_int 1)))]
+ "TARGET_SSE"
+ "@
+ movss\t{%2, %0|%0, %2}
+ movss\t{%2, %0|%0, %2}
+ movd\t{%2, %0|%0, %2}
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "SF")])
+
+;; A subset is vec_setv4sf.
+(define_insn "*vec_setv4sf_sse4_1"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 2 "nonimmediate_operand" "xm"))
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_pow2_1_to_8_operand" "n")))]
+ "TARGET_SSE4_1"
+{
+ operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
+ return "insertps\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn "sse4_1_insertps"
+ [(set (match_operand:V4SF 0 "register_operand" "=x")
+ (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
+ (match_operand:V4SF 1 "register_operand" "0")
+ (match_operand:SI 3 "const_0_to_255_operand" "n")]
+ UNSPEC_INSERTPS))]
+ "TARGET_SSE4_1"
+ "insertps\t{%3, %2, %0|%0, %2, %3}";
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_split
+ [(set (match_operand:V4SF 0 "memory_operand" "")
+ (vec_merge:V4SF
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "nonmemory_operand" ""))
+ (match_dup 0)
+ (const_int 1)))]
+ "TARGET_SSE && reload_completed"
+ [(const_int 0)]
+{
+ emit_move_insn (adjust_address (operands[0], SFmode, 0), operands[1]);
+ DONE;
+})
+
+(define_expand "vec_setv4sf"
+ [(match_operand:V4SF 0 "register_operand" "")
+ (match_operand:SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_insn_and_split "*vec_extractv4sf_0"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,fr")
+ (vec_select:SF
+ (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (SFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (SFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_insn "*sse4_1_extractps"
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=rm")
+ (vec_select:SF
+ (match_operand:V4SF 1 "register_operand" "x")
+ (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
+ "TARGET_SSE4_1"
+ "extractps\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "sselog")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "V4SF")])
+
+(define_insn_and_split "*vec_extract_v4sf_mem"
+ [(set (match_operand:SF 0 "register_operand" "=x*rf")
+ (vec_select:SF
+ (match_operand:V4SF 1 "memory_operand" "o")
+ (parallel [(match_operand 2 "const_0_to_3_operand" "n")])))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+{
+ int i = INTVAL (operands[2]);
+
+ emit_move_insn (operands[0], adjust_address (operands[1], SFmode, i*4));
+ DONE;
+})
+
+(define_expand "vec_extractv4sf"
+ [(match_operand:SF 0 "register_operand" "")
+ (match_operand:V4SF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel double-precision floating point element swizzling
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "sse2_unpckhpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "sse2_unpckhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
+ (parallel [(const_int 1)
+ (const_int 3)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ unpckhpd\t{%2, %0|%0, %2}
+ movlpd\t{%H1, %0|%0, %H1}
+ movhpd\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_insn "*sse3_movddup"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movddup\t{%1, %0|%0, %1}
+ #"
+ [(set_attr "type" "sselog1,ssemov")
+ (set_attr "mode" "V2DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_dup 1))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE3 && reload_completed"
+ [(const_int 0)]
+{
+ rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
+ emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
+ emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
+ DONE;
+})
+
+(define_expand "sse2_unpcklpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" ""))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "sse2_unpcklpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
+ (parallel [(const_int 0)
+ (const_int 2)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ unpcklpd\t{%2, %0|%0, %2}
+ movhpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %H0|%H0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,V1DF")])
+
+(define_expand "sse2_shufpd"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand:V2DF 2 "nonimmediate_operand" "")
+ (match_operand:SI 3 "const_int_operand" "")]
+ "TARGET_SSE2"
+{
+ int mask = INTVAL (operands[3]);
+ emit_insn (gen_sse2_shufpd_1 (operands[0], operands[1], operands[2],
+ GEN_INT (mask & 1),
+ GEN_INT (mask & 2 ? 3 : 2)));
+ DONE;
+})
+
+(define_insn "sse2_shufpd_1"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_select:V2DF
+ (vec_concat:V4DF
+ (match_operand:V2DF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "nonimmediate_operand" "xm"))
+ (parallel [(match_operand 3 "const_0_to_1_operand" "")
+ (match_operand 4 "const_2_to_3_operand" "")])))]
+ "TARGET_SSE2"
+{
+ int mask;
+ mask = INTVAL (operands[3]);
+ mask |= (INTVAL (operands[4]) - 2) << 1;
+ operands[3] = GEN_INT (mask);
+
+ return "shufpd\t{%3, %2, %0|%0, %2, %3}";
+}
+ [(set_attr "type" "sselog")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "sse2_storehpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,0,o")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhpd\t{%1, %0|%0, %1}
+ unpckhpd\t%0, %0
+ #"
+ [(set_attr "type" "ssemov,sselog1,ssemov")
+ (set_attr "mode" "V1DF,V2DF,DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "memory_operand" "")
+ (parallel [(const_int 1)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[1] = adjust_address (operands[1], DFmode, 8);
+})
+
+(define_insn "sse2_storelpd"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x*fr")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movlpd\t{%1, %0|%0, %1}
+ #
+ #"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V1DF,DF,DF")])
+
+(define_split
+ [(set (match_operand:DF 0 "register_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)])))]
+ "TARGET_SSE2 && reload_completed"
+ [(const_int 0)]
+{
+ rtx op1 = operands[1];
+ if (REG_P (op1))
+ op1 = gen_rtx_REG (DFmode, REGNO (op1));
+ else
+ op1 = gen_lowpart (DFmode, op1);
+ emit_move_insn (operands[0], op1);
+ DONE;
+})
+
+(define_expand "sse2_loadhpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "sse2_loadhpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
+ (vec_concat:V2DF
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,x,0")
+ (parallel [(const_int 0)]))
+ (match_operand:DF 2 "nonimmediate_operand" " m,x,0,x*fr")))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movhpd\t{%2, %0|%0, %2}
+ unpcklpd\t{%2, %0|%0, %2}
+ shufpd\t{$1, %1, %0|%0, %1, 1}
+ #"
+ [(set_attr "type" "ssemov,sselog,sselog,other")
+ (set_attr "mode" "V1DF,V2DF,V2DF,DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
+ (match_operand:DF 1 "register_operand" "")))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+(define_expand "sse2_loadlpd_exp"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" "")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
+
+(define_insn "sse2_loadlpd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,x,x,m")
+ (vec_concat:V2DF
+ (match_operand:DF 2 "nonimmediate_operand" " m,m,x,0,0,x*fr")
+ (vec_select:DF
+ (match_operand:V2DF 1 "vector_move_operand" " C,0,0,x,o,0")
+ (parallel [(const_int 1)]))))]
+ "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+ "@
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movsd\t{%2, %0|%0, %2}
+ shufpd\t{$2, %2, %0|%0, %2, 2}
+ movhpd\t{%H1, %0|%0, %H1}
+ #"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,other")
+ (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF")])
+
+(define_split
+ [(set (match_operand:V2DF 0 "memory_operand" "")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "register_operand" "")
+ (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
+ "TARGET_SSE2 && reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+{
+ operands[0] = adjust_address (operands[0], DFmode, 8);
+})
+
+;; Not sure these two are ever used, but it doesn't hurt to have
+;; them. -aoliva
+(define_insn "*vec_extractv2df_1_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
+ (parallel [(const_int 1)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movhps\t{%1, %0|%0, %1}
+ movhlps\t{%1, %0|%0, %1}
+ movlps\t{%H1, %0|%0, %H1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "*vec_extractv2df_0_sse"
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
+ (vec_select:DF
+ (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
+ (parallel [(const_int 0)])))]
+ "!TARGET_SSE2 && TARGET_SSE
+ && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ "@
+ movlps\t{%1, %0|%0, %1}
+ movaps\t{%1, %0|%0, %1}
+ movlps\t{%1, %0|%0, %1}"
+ [(set_attr "type" "ssemov")
+ (set_attr "mode" "V2SF,V4SF,V2SF")])
+
+(define_insn "sse2_movsd"
+ [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m,x,x,o")
+ (vec_merge:V2DF
+ (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x,0,0,0")
+ (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0,x,o,x")
+ (const_int 1)))]
+ "TARGET_SSE2"
+ "@
+ movsd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ movlpd\t{%2, %0|%0, %2}
+ shufpd\t{$2, %2, %0|%0, %2, 2}
+ movhps\t{%H1, %0|%0, %H1}
+ movhps\t{%1, %H0|%H0, %1}"
+ [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
+ (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
+
+(define_insn "*vec_dupv2df_sse3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_duplicate:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE3"
+ "movddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DF")])
+
+(define_insn "vec_dupv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_duplicate:V2DF
+ (match_operand:DF 1 "register_operand" "0")))]
+ "TARGET_SSE2"
+ "unpcklpd\t%0, %0"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "V2DF")])
+
+(define_insn "*vec_concatv2df_sse3"
+ [(set (match_operand:V2DF 0 "register_operand" "=x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" "xm")
+ (match_dup 1)))]
+ "TARGET_SSE3"
+ "movddup\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sselog1")
+ (set_attr "mode" "DF")])
+
+(define_insn "*vec_concatv2df"
+ [(set (match_operand:V2DF 0 "register_operand" "=Y2,Y2,Y2,x,x")
+ (vec_concat:V2DF
+ (match_operand:DF 1 "nonimmediate_operand" " 0 ,0 ,m ,0,0")
+ (match_operand:DF 2 "vector_move_operand" " Y2,m ,C ,x,m")))]
+ "TARGET_SSE"
+ "@
+ unpcklpd\t{%2, %0|%0, %2}
+ movhpd\t{%2, %0|%0, %2}
+ movsd\t{%1, %0|%0, %1}
+ movlhps\t{%2, %0|%0, %2}
+ movhps\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
+ (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
+
+(define_expand "vec_setv2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand:DF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_set (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_extractv2df"
+ [(match_operand:DF 0 "register_operand" "")
+ (match_operand:V2DF 1 "register_operand" "")
+ (match_operand 2 "const_int_operand" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_extract (false, operands[0], operands[1],
+ INTVAL (operands[2]));
+ DONE;
+})
+
+(define_expand "vec_initv2df"
+ [(match_operand:V2DF 0 "register_operand" "")
+ (match_operand 1 "" "")]
+ "TARGET_SSE"
+{
+ ix86_expand_vector_init (false, operands[0], operands[1]);
+ DONE;
+})
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Parallel integral arithmetic
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_expand "neg<mode>2"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (minus:SSEMODEI
+ (match_dup 2)
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
+
+(define_expand "<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "")
+ (plusminus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
+ (plusminus:SSEMODEI
+ (match_operand:SSEMODEI 1 "nonimmediate_operand" "<comm>0")
+ (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "")
+ (sat_plusminus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+
+(define_insn "*sse2_<plusminus_insn><mode>3"
+ [(set (match_operand:SSEMODE12 0 "register_operand" "=x")
+ (sat_plusminus:SSEMODE12
+ (match_operand:SSEMODE12 1 "nonimmediate_operand" "<comm>0")
+ (match_operand:SSEMODE12 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "p<plusminus_mnemonic><ssevecsize>\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "mulv16qi3"
+ [(set (match_operand:V16QI 0 "register_operand" "")
+ (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:V16QI 2 "register_operand" "")))]
+ "TARGET_SSE2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx t[12], op0, op[3];
+ int i;
+
+ if (TARGET_SSE5)
+ {
+ /* On SSE5, we can take advantage of the pperm instruction to pack and
+ unpack the bytes. Unpack data such that we've got a source byte in
+ each low byte of each word. We don't care what goes into the high
+ byte, so put 0 there. */
+ for (i = 0; i < 6; ++i)
+ t[i] = gen_reg_rtx (V8HImode);
+
+ for (i = 0; i < 2; i++)
+ {
+ op[0] = t[i];
+ op[1] = operands[i+1];
+ ix86_expand_sse5_unpack (op, true, true); /* high bytes */
+
+ op[0] = t[i+2];
+ ix86_expand_sse5_unpack (op, true, false); /* low bytes */
+ }
+
+ /* Multiply words. */
+ emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
+ emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
+
+ /* Pack the low byte of each word back into a single xmm */
+ op[0] = operands[0];
+ op[1] = t[5];
+ op[2] = t[4];
+ ix86_expand_sse5_pack (op);
+ DONE;
+ }
+
+ for (i = 0; i < 12; ++i)
+ t[i] = gen_reg_rtx (V16QImode);
+
+ /* Unpack data such that we've got a source byte in each low byte of
+ each word. We don't care what goes into the high byte of each word.
+ Rather than trying to get zero in there, most convenient is to let
+ it be a copy of the low byte. */
+ emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
+ emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
+ emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
+ emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
+
+ /* Multiply words. The end-of-line annotations here give a picture of what
+ the output of that instruction looks like. Dot means don't care; the
+ letters are the bytes of the result with A being the most significant. */
+ emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
+ gen_lowpart (V8HImode, t[0]),
+ gen_lowpart (V8HImode, t[1])));
+ emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
+ gen_lowpart (V8HImode, t[2]),
+ gen_lowpart (V8HImode, t[3])));
+
+ /* Extract the relevant bytes and merge them back together. */
+ emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */
+ emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
+ emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
+ emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
+ emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
+ emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
+
+ op0 = operands[0];
+ emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
+ DONE;
+})
+
+(define_expand "mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*mulv8hi3"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmullw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "smulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*smulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (sign_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (sign_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" ""))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")))
+ (const_int 16))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*umulv8hi3_highpart"
+ [(set (match_operand:V8HI 0 "register_operand" "=x")
+ (truncate:V8HI
+ (lshiftrt:V8SI
+ (mult:V8SI
+ (zero_extend:V8SI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0"))
+ (zero_extend:V8SI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")))
+ (const_int 16))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmulhuw\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_umulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (mult:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*sse2_umulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (zero_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmuludq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse4_1_mulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE4_1"
+ "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
+
+(define_insn "*sse4_1_mulv2siv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "=x")
+ (mult:V2DI
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0) (const_int 2)])))
+ (sign_extend:V2DI
+ (vec_select:V2SI
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0) (const_int 2)])))))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmuldq\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "sse2_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_SSE2"
+ "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
+
+(define_insn "*sse2_pmaddwd"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (plus:V4SI
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "%0")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 2 "nonimmediate_operand" "xm")
+ (parallel [(const_int 0)
+ (const_int 2)
+ (const_int 4)
+ (const_int 6)]))))
+ (mult:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 1)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)])))
+ (sign_extend:V4SI
+ (vec_select:V4HI (match_dup 2)
+ (parallel [(const_int 1)
+ (const_int 3)
+ (const_int 5)
+ (const_int 7)]))))))]
+ "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+ "pmaddwd\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseiadd")
+ (set_attr "prefix_data16" "1")
+ (set_attr "mode" "TI")])
+
+(define_expand "mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
+ "TARGET_SSE2"
+{
+ if (TARGET_SSE4_1 || TARGET_SSE5)
+ ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
+})
+
+(define_insn "*sse4_1_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=x")
+ (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, V4SImode, operands)"
+ "pmulld\t{%2, %0|%0, %2}"
+ [(set_attr "type" "sseimul")
+ (set_attr "prefix_extra" "1")
+ (set_attr "mode" "TI")])
+
+;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
+;; multiply/add. In general, we expect the define_split to occur before
+;; register allocation, so we have to handle the corner case where the target
+;; is used as the base or index register in operands 1/2.
+(define_insn_and_split "*sse5_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "=&x")
+ (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
+ (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
+ "TARGET_SSE5"
+ "#"
+ "&& (reload_completed
+ || (!reg_mentioned_p (operands[0], operands[1])
+ && !reg_mentioned_p (operands[0], operands[2])))"
+ [(set (match_dup 0)
+ (match_dup 3))
+ (set (match_dup 0)
+ (plus:V4SI (mult:V4SI (match_dup 1)
+ (match_dup 2))
+ (match_dup 0)))]
+{
+ operands[3] = CONST0_RTX (V4SImode);
+}
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "TI")])
+
+(define_insn_and_split "*sse2_mulv4si3"
+ [(set (match_operand:V4SI 0 "register_operand" "")
+ (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
+ (match_operand:V4SI 2 "register_operand" "")))]
+ "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+ rtx op0, op1, op2;
+
+ op0 = operands[0];
+ op1 = operands[1];
+ op2 = operands[2];
+ t1 = gen_reg_rtx (V4SImode);
+ t2 = gen_reg_rtx (V4SImode);
+ t3 = gen_reg_rtx (V4SImode);
+ t4 = gen_reg_rtx (V4SImode);
+ t5 = gen_reg_rtx (V4SImode);
+ t6 = gen_reg_rtx (V4SImode);
+ thirtytwo = GEN_INT (32);
+
+ /* Multiply elements 2 and 0. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t1),
+ op1, op2));
+
+ /* Shift both input vectors down one element, so that elements 3
+ and 1 are now in the slots for elements 2 and 0. For K8, at
+ least, this is faster than using a shuffle. */
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2),
+ gen_lowpart (TImode, op1),
+ thirtytwo));
+ emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3),
+ gen_lowpart (TImode, op2),
+ thirtytwo));
+ /* Multiply elements 3 and 1. */
+ emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
+ t2, t3));
+
+ /* Move the results in element 2 down to element 1; we don't care
+ what goes in elements 2 and 3. */
+ emit_insn (gen_sse2_pshufd_1 (t5, t1, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
+ emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
+ const0_rtx, const0_rtx));
+
+ /* Merge the parts back together. */
+ emit_insn (gen_sse2_punpckldq (op0, t5, t6));
+ DONE;
+})
+
+(define_insn_and_split "mulv2di3"
+ [(set (match_operand:V2DI 0 "register_operand" "")
+ (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
+ (match_operand:V2DI 2 "register_operand" "")))]
+ "TARGET_SSE2
+ && !(reload_completed || reload_in_progress)"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx t1, t2, t3, t4, t5, t6, thirtytwo;
+ rtx op0, op1, op2;