+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
+
+static rtx
+ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ tree arg3 = CALL_EXPR_ARG (exp, 3);
+ tree arg4 = CALL_EXPR_ARG (exp, 4);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ rtx op4 = expand_normal (arg4);
+ enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modei3 = insn_data[d->icode].operand[3].mode;
+ modev4 = insn_data[d->icode].operand[4].mode;
+ modei5 = insn_data[d->icode].operand[5].mode;
+ modeimm = insn_data[d->icode].operand[6].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev4))
+ op2 = safe_vector_operand (op2, modev4);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ op1 = copy_to_mode_reg (modei3, op1);
+ if ((optimize && !register_operand (op2, modev4))
+ || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ op2 = copy_to_mode_reg (modev4, op2);
+ if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ op3 = copy_to_mode_reg (modei5, op3);
+
+ if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
+ {
+ error ("the fifth argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPESTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modev3 = insn_data[d->icode].operand[3].mode;
+ modeimm = insn_data[d->icode].operand[4].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev3))
+ op1 = safe_vector_operand (op1, modev3);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if ((optimize && !register_operand (op1, modev3))
+ || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ op1 = copy_to_mode_reg (modev3, op1);
+
+ if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ {
+ error ("the third argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of insns with
+ variable number of operands. */
+
+static rtx
+ix86_expand_args_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat, real_target;
+ unsigned int i, nargs;
+ unsigned int nargs_constant = 0;
+ int num_memory = 0;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[4];
+ bool last_arg_count = false;
+ enum insn_code icode = d->icode;
+ const struct insn_data *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ enum machine_mode rmode = VOIDmode;
+ bool swap = false;
+ enum rtx_code comparison = d->comparison;
+
+ switch ((enum ix86_builtin_type) d->flag)
+ {
+ case INT_FTYPE_V2DI_V2DI_PTEST:
+ return ix86_expand_sse_ptest (d, exp, target);
+ case FLOAT128_FTYPE_FLOAT128:
+ case FLOAT_FTYPE_FLOAT:
+ case INT64_FTYPE_V4SF:
+ case INT64_FTYPE_V2DF:
+ case INT_FTYPE_V16QI:
+ case INT_FTYPE_V8QI:
+ case INT_FTYPE_V4SF:
+ case INT_FTYPE_V2DF:
+ case V16QI_FTYPE_V16QI:
+ case V8HI_FTYPE_V8HI:
+ case V8HI_FTYPE_V16QI:
+ case V8QI_FTYPE_V8QI:
+ case V4SI_FTYPE_V4SI:
+ case V4SI_FTYPE_V16QI:
+ case V4SI_FTYPE_V4SF:
+ case V4SI_FTYPE_V8HI:
+ case V4SI_FTYPE_V2DF:
+ case V4HI_FTYPE_V4HI:
+ case V4SF_FTYPE_V4SF:
+ case V4SF_FTYPE_V4SI:
+ case V4SF_FTYPE_V2DF:
+ case V2DI_FTYPE_V2DI:
+ case V2DI_FTYPE_V16QI:
+ case V2DI_FTYPE_V8HI:
+ case V2DI_FTYPE_V4SI:
+ case V2DF_FTYPE_V2DF:
+ case V2DF_FTYPE_V4SI:
+ case V2DF_FTYPE_V4SF:
+ case V2DF_FTYPE_V2SI:
+ case V2SI_FTYPE_V2SI:
+ case V2SI_FTYPE_V4SF:
+ case V2SI_FTYPE_V2SF:
+ case V2SI_FTYPE_V2DF:
+ case V2SF_FTYPE_V2SF:
+ case V2SF_FTYPE_V2SI:
+ nargs = 1;
+ break;
+ case V4SF_FTYPE_V4SF_VEC_MERGE:
+ case V2DF_FTYPE_V2DF_VEC_MERGE:
+ return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
+ case FLOAT128_FTYPE_FLOAT128_FLOAT128:
+ case V16QI_FTYPE_V16QI_V16QI:
+ case V16QI_FTYPE_V8HI_V8HI:
+ case V8QI_FTYPE_V8QI_V8QI:
+ case V8QI_FTYPE_V4HI_V4HI:
+ case V8HI_FTYPE_V8HI_V8HI:
+ case V8HI_FTYPE_V16QI_V16QI:
+ case V8HI_FTYPE_V4SI_V4SI:
+ case V4SI_FTYPE_V4SI_V4SI:
+ case V4SI_FTYPE_V8HI_V8HI:
+ case V4SI_FTYPE_V4SF_V4SF:
+ case V4SI_FTYPE_V2DF_V2DF:
+ case V4HI_FTYPE_V4HI_V4HI:
+ case V4HI_FTYPE_V8QI_V8QI:
+ case V4HI_FTYPE_V2SI_V2SI:
+ case V4SF_FTYPE_V4SF_V4SF:
+ case V4SF_FTYPE_V4SF_V2SI:
+ case V4SF_FTYPE_V4SF_V2DF:
+ case V4SF_FTYPE_V4SF_DI:
+ case V4SF_FTYPE_V4SF_SI:
+ case V2DI_FTYPE_V2DI_V2DI:
+ case V2DI_FTYPE_V16QI_V16QI:
+ case V2DI_FTYPE_V4SI_V4SI:
+ case V2DI_FTYPE_V2DI_V16QI:
+ case V2DI_FTYPE_V2DF_V2DF:
+ case V2SI_FTYPE_V2SI_V2SI: