}
/* Return true when TYPE should be 128bit aligned for 32bit argument passing
- ABI. Only called if TARGET_SSE. */
+ ABI. */
static bool
-contains_128bit_aligned_vector_p (tree type)
+contains_aligned_value_p (tree type)
{
enum machine_mode mode = TYPE_MODE (type);
- if (SSE_REG_MODE_P (mode)
+ if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode)
&& (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
return true;
if (TYPE_ALIGN (type) < 128)
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL
- && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
+ && contains_aligned_value_p (TREE_TYPE (field)))
return true;
}
break;
case ARRAY_TYPE:
/* Just for use if some languages passes arrays by value. */
- if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
+ if (contains_aligned_value_p (TREE_TYPE (type)))
return true;
break;
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
align = PARM_BOUNDARY;
- /* Decimal floating point is aligned to its natural boundary. */
- if (!TARGET_64BIT && !VALID_DFP_MODE_P (mode))
+ /* In 32bit, only _Decimal128 is aligned to its natural boundary. */
+ if (!TARGET_64BIT && mode != TDmode)
{
/* i386 ABI defines all arguments to be 4 byte aligned. We have to
make an exception for SSE modes since these require 128bit
The handling here differs from field_alignment. ICC aligns MMX
arguments to 4 byte boundaries, while structure fields are aligned
to 8 byte boundaries. */
- if (!TARGET_SSE)
- align = PARM_BOUNDARY;
- else if (!type)
+ if (!type)
{
- if (!SSE_REG_MODE_P (mode))
+ if (!(TARGET_SSE && SSE_REG_MODE_P (mode)) && mode != TDmode)
align = PARM_BOUNDARY;
}
else
{
- if (!contains_128bit_aligned_vector_p (type))
+ if (!contains_aligned_value_p (type))
align = PARM_BOUNDARY;
}
}
return (size != 1 && size != 2 && size != 4 && size != 8);
}
-int
-ix86_return_in_memory (const_tree type)
+bool
+ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
const enum machine_mode mode = type_natural_mode (type);
but differs notably in that when MMX is available, 8-byte vectors
are returned in memory, rather than in MMX registers. */
-int
-ix86_sol10_return_in_memory (const_tree type)
+bool
+ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
{
int size;
enum machine_mode mode = type_natural_mode (type);
return size > 12;
}
+bool
+ix86_i386elf_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+ return (TYPE_MODE (type) == BLKmode
+ || (VECTOR_MODE_P (TYPE_MODE (type)) && int_size_in_bytes (type) == 8));
+}
+
+bool
+ix86_i386interix_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
+{
+ return (TYPE_MODE (type) == BLKmode
+ || (AGGREGATE_TYPE_P (type) && int_size_in_bytes(type) > 8 ));
+}
+
/* When returning SSE vector types, we have a choice of either
(1) being abi incompatible with a -march switch, or
(2) generating an error.
Choose the STRUCT_VALUE_RTX hook because that's (at present) only
called in response to actually generating a caller or callee that
- uses such a type. As opposed to RETURN_IN_MEMORY, which is called
+ uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
via aggregate_value_p for general type probing from tree-ssa. */
static rtx
switch_to_section (text_section);
ASM_OUTPUT_LABEL (asm_out_file, name);
}
- if (TARGET_64BIT_MS_ABI)
- {
- xops[0] = gen_rtx_REG (Pmode, regno);
- xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
- output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
- output_asm_insn ("ret", xops);
- }
+
+ xops[0] = gen_rtx_REG (Pmode, regno);
+ xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
+ if (TARGET_64BIT)
+ output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
else
- {
- xops[0] = gen_rtx_REG (SImode, regno);
- xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
- output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
- output_asm_insn ("ret", xops);
- }
+ output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
+ output_asm_insn ("ret", xops);
}
if (NEED_INDICATE_EXEC_STACK)
xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
if (!flag_pic)
- output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
+ {
+ if (TARGET_64BIT)
+ output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
+ else
+ output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
+ }
else
output_asm_insn ("call\t%a2", xops);
CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
if (flag_pic)
- output_asm_insn ("pop{l}\t%0", xops);
+ {
+ if (TARGET_64BIT)
+ output_asm_insn ("pop{q}\t%0", xops);
+ else
+ output_asm_insn ("pop{l}\t%0", xops);
+ }
}
else
{
return "";
if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
- output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
+ {
+ if (TARGET_64BIT)
+ output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
+ else
+ output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
+ }
else
- output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
+ {
+ if (TARGET_64BIT)
+ output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
+ else
+ output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
+ }
return "";
}
insns = get_insns ();
end_sequence ();
- CONST_OR_PURE_CALL_P (insns) = 1;
+ RTL_CONST_CALL_P (insns) = 1;
emit_libcall_block (insns, dest, rax, x);
}
else if (TARGET_64BIT && TARGET_GNU2_TLS)
note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
- CONST_OR_PURE_CALL_P (insns) = 1;
+ RTL_CONST_CALL_P (insns) = 1;
emit_libcall_block (insns, base, rax, note);
}
else if (TARGET_64BIT && TARGET_GNU2_TLS)
/* Canonicalize operand order. */
if (ix86_swap_binary_operands_p (code, mode, operands))
{
- rtx temp = src1;
+ rtx temp;
+
+ /* It is invalid to swap operands of different modes. */
+ gcc_assert (GET_MODE (src1) == GET_MODE (src2));
+
+ temp = src1;
src1 = src2;
src2 = temp;
}
void
ix86_expand_copysign (rtx operands[])
{
- enum machine_mode mode, vmode;
+ enum machine_mode mode;
rtx dest, op0, op1, mask, nmask;
dest = operands[0];
op1 = operands[2];
mode = GET_MODE (dest);
- vmode = mode == SFmode ? V4SFmode : V2DFmode;
if (GET_CODE (op0) == CONST_DOUBLE)
{
if (mode == SFmode || mode == DFmode)
{
+ enum machine_mode vmode;
+
+ vmode = mode == SFmode ? V4SFmode : V2DFmode;
+
if (op0 == CONST0_RTX (mode))
op0 = CONST0_RTX (vmode);
else
CONST0_RTX (SFmode), CONST0_RTX (SFmode));
else
v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
+
op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
}
}
+ else if (op0 != CONST0_RTX (mode))
+ op0 = force_reg (mode, op0);
mask = ix86_build_signbit_mask (mode, 0, 0);
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
};
-static const struct builtin_description bdesc_ptest[] =
-{
- /* SSE4.1 */
- { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
- { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
- { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
-};
-
static const struct builtin_description bdesc_pcmpestr[] =
{
/* SSE4.2 */
FLOAT128_FTYPE_FLOAT128,
FLOAT_FTYPE_FLOAT,
FLOAT128_FTYPE_FLOAT128_FLOAT128,
+ INT_FTYPE_V2DI_V2DI_PTEST,
INT64_FTYPE_V4SF,
INT64_FTYPE_V2DF,
INT_FTYPE_V16QI,
V8QI_FTYPE_V8QI,
V4SI_FTYPE_V4SI,
V4SI_FTYPE_V16QI,
- V4SI_FTYPE_V4SF,
V4SI_FTYPE_V8HI,
+ V4SI_FTYPE_V4SF,
V4SI_FTYPE_V2DF,
V4HI_FTYPE_V4HI,
V4SF_FTYPE_V4SF,
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
/* SSE3 */
{ OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
{ OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
+ { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
- { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
+ { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+ { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
+
/* SSE4.2 */
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
case FLOAT_FTYPE_FLOAT:
type = float_ftype_float;
break;
+ case INT_FTYPE_V2DI_V2DI_PTEST:
+ type = int_ftype_v2di_v2di;
+ break;
case INT64_FTYPE_V4SF:
type = int64_ftype_v4sf;
break;
case V4SI_FTYPE_V8HI:
type = v4si_ftype_v8hi;
break;
- case V4HI_FTYPE_V4HI:
- type = v4hi_ftype_v4hi;
- break;
case V4SI_FTYPE_V4SF:
type = v4si_ftype_v4sf;
break;
case V4SI_FTYPE_V2DF:
type = v4si_ftype_v2df;
break;
+ case V4HI_FTYPE_V4HI:
+ type = v4hi_ftype_v4hi;
+ break;
case V4SF_FTYPE_V4SF:
case V4SF_FTYPE_V4SF_VEC_MERGE:
type = v4sf_ftype_v4sf;
else
def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
- /* ptest insns. */
- for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
- def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
-
/* SSE */
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
static rtx
ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
{
- rtx pat, xops[3];
+ rtx pat;
tree arg0 = CALL_EXPR_ARG (exp, 0);
tree arg1 = CALL_EXPR_ARG (exp, 1);
rtx op0 = expand_normal (arg0);
if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
- /* ??? Using ix86_fixup_binary_operands is problematic when
- we've got mismatched modes. Fake it. */
-
- xops[0] = target;
- xops[1] = op0;
- xops[2] = op1;
-
- if (tmode == mode0 && tmode == mode1)
- {
- target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
- op0 = xops[1];
- op1 = xops[2];
- }
- else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
- {
- op0 = force_reg (mode0, op0);
- op1 = force_reg (mode1, op1);
- target = gen_reg_rtx (tmode);
- }
-
pat = GEN_FCN (icode) (target, op0, op1);
if (! pat)
return 0;
+
emit_insn (pat);
+
return target;
}
return target;
}
-/* Subroutine of ix86_expand_builtin to take care of insns with
- variable number of operands. */
+/* Subroutine of ix86_expand_builtin to take care of comi insns. */
static rtx
-ix86_expand_args_builtin (const struct builtin_description *d,
- tree exp, rtx target)
+ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
+ rtx target)
{
- rtx pat, real_target;
- unsigned int i, nargs;
- unsigned int nargs_constant = 0;
- int num_memory = 0;
- struct
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
+ enum rtx_code comparison = d->comparison;
+
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ /* Swap operands if we have a comparison that isn't available in
+ hardware. */
+ if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
{
- rtx op;
- enum machine_mode mode;
- } args[4];
- bool last_arg_count = false;
- enum insn_code icode = d->icode;
- const struct insn_data *insn_p = &insn_data[icode];
- enum machine_mode tmode = insn_p->operand[0].mode;
- enum machine_mode rmode = VOIDmode;
- bool swap = false;
+ rtx tmp = op1;
+ op1 = op0;
+ op0 = tmp;
+ }
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
+
+static rtx
+ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
+ rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
+ enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
enum rtx_code comparison = d->comparison;
- switch ((enum ix86_builtin_type) d->flag)
+ if (VECTOR_MODE_P (mode0))
+ op0 = safe_vector_operand (op0, mode0);
+ if (VECTOR_MODE_P (mode1))
+ op1 = safe_vector_operand (op1, mode1);
+
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ if ((optimize && !register_operand (op0, mode0))
+ || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if ((optimize && !register_operand (op1, mode1))
+ || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (d->icode) (op0, op1);
+ if (! pat)
+ return 0;
+ emit_insn (pat);
+ emit_insn (gen_rtx_SET (VOIDmode,
+ gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (comparison, QImode,
+ SET_DEST (pat),
+ const0_rtx)));
+
+ return SUBREG_REG (target);
+}
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpestr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ tree arg3 = CALL_EXPR_ARG (exp, 3);
+ tree arg4 = CALL_EXPR_ARG (exp, 4);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ rtx op3 = expand_normal (arg3);
+ rtx op4 = expand_normal (arg4);
+ enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modei3 = insn_data[d->icode].operand[3].mode;
+ modev4 = insn_data[d->icode].operand[4].mode;
+ modei5 = insn_data[d->icode].operand[5].mode;
+ modeimm = insn_data[d->icode].operand[6].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev4))
+ op2 = safe_vector_operand (op2, modev4);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
+ op1 = copy_to_mode_reg (modei3, op1);
+ if ((optimize && !register_operand (op2, modev4))
+ || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
+ op2 = copy_to_mode_reg (modev4, op2);
+ if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
+ op3 = copy_to_mode_reg (modei5, op3);
+
+ if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
{
- case FLOAT128_FTYPE_FLOAT128:
- case FLOAT_FTYPE_FLOAT:
- case INT64_FTYPE_V4SF:
- case INT64_FTYPE_V2DF:
- case INT_FTYPE_V16QI:
- case INT_FTYPE_V8QI:
- case INT_FTYPE_V4SF:
- case INT_FTYPE_V2DF:
- case V16QI_FTYPE_V16QI:
- case V8HI_FTYPE_V8HI:
- case V8HI_FTYPE_V16QI:
- case V8QI_FTYPE_V8QI:
- case V4SI_FTYPE_V4SI:
- case V4SI_FTYPE_V16QI:
- case V4SI_FTYPE_V4SF:
- case V4SI_FTYPE_V8HI:
- case V4SI_FTYPE_V2DF:
- case V4HI_FTYPE_V4HI:
- case V4SF_FTYPE_V4SF:
- case V4SF_FTYPE_V4SI:
- case V4SF_FTYPE_V2DF:
- case V2DI_FTYPE_V2DI:
- case V2DI_FTYPE_V16QI:
- case V2DI_FTYPE_V8HI:
- case V2DI_FTYPE_V4SI:
- case V2DF_FTYPE_V2DF:
- case V2DF_FTYPE_V4SI:
- case V2DF_FTYPE_V4SF:
- case V2DF_FTYPE_V2SI:
- case V2SI_FTYPE_V2SI:
- case V2SI_FTYPE_V4SF:
- case V2SI_FTYPE_V2SF:
- case V2SI_FTYPE_V2DF:
- case V2SF_FTYPE_V2SF:
- case V2SF_FTYPE_V2SI:
+ error ("the fifth argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPESTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPESTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+
+/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+
+static rtx
+ix86_expand_sse_pcmpistr (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ rtx scratch0, scratch1;
+ rtx op0 = expand_normal (arg0);
+ rtx op1 = expand_normal (arg1);
+ rtx op2 = expand_normal (arg2);
+ enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
+
+ tmode0 = insn_data[d->icode].operand[0].mode;
+ tmode1 = insn_data[d->icode].operand[1].mode;
+ modev2 = insn_data[d->icode].operand[2].mode;
+ modev3 = insn_data[d->icode].operand[3].mode;
+ modeimm = insn_data[d->icode].operand[4].mode;
+
+ if (VECTOR_MODE_P (modev2))
+ op0 = safe_vector_operand (op0, modev2);
+ if (VECTOR_MODE_P (modev3))
+ op1 = safe_vector_operand (op1, modev3);
+
+ if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
+ op0 = copy_to_mode_reg (modev2, op0);
+ if ((optimize && !register_operand (op1, modev3))
+ || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
+ op1 = copy_to_mode_reg (modev3, op1);
+
+ if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ {
+ error ("the third argument must be a 8-bit immediate");
+ return const0_rtx;
+ }
+
+ if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode0
+ || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
+ target = gen_reg_rtx (tmode0);
+
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
+ }
+ else if (d->code == IX86_BUILTIN_PCMPISTRM128)
+ {
+ if (optimize || !target
+ || GET_MODE (target) != tmode1
+ || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
+ target = gen_reg_rtx (tmode1);
+
+ scratch0 = gen_reg_rtx (tmode0);
+
+ pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ }
+ else
+ {
+ gcc_assert (d->flag);
+
+ scratch0 = gen_reg_rtx (tmode0);
+ scratch1 = gen_reg_rtx (tmode1);
+
+ pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ }
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ if (d->flag)
+ {
+ target = gen_reg_rtx (SImode);
+ emit_move_insn (target, const0_rtx);
+ target = gen_rtx_SUBREG (QImode, target, 0);
+
+ emit_insn
+ (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
+ gen_rtx_fmt_ee (EQ, QImode,
+ gen_rtx_REG ((enum machine_mode) d->flag,
+ FLAGS_REG),
+ const0_rtx)));
+ return SUBREG_REG (target);
+ }
+ else
+ return target;
+}
+
+/* Subroutine of ix86_expand_builtin to take care of insns with
+ variable number of operands. */
+
+static rtx
+ix86_expand_args_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
+{
+ rtx pat, real_target;
+ unsigned int i, nargs;
+ unsigned int nargs_constant = 0;
+ int num_memory = 0;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[4];
+ bool last_arg_count = false;
+ enum insn_code icode = d->icode;
+ const struct insn_data *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ enum machine_mode rmode = VOIDmode;
+ bool swap = false;
+ enum rtx_code comparison = d->comparison;
+
+ switch ((enum ix86_builtin_type) d->flag)
+ {
+ case INT_FTYPE_V2DI_V2DI_PTEST:
+ return ix86_expand_sse_ptest (d, exp, target);
+ case FLOAT128_FTYPE_FLOAT128:
+ case FLOAT_FTYPE_FLOAT:
+ case INT64_FTYPE_V4SF:
+ case INT64_FTYPE_V2DF:
+ case INT_FTYPE_V16QI:
+ case INT_FTYPE_V8QI:
+ case INT_FTYPE_V4SF:
+ case INT_FTYPE_V2DF:
+ case V16QI_FTYPE_V16QI:
+ case V8HI_FTYPE_V8HI:
+ case V8HI_FTYPE_V16QI:
+ case V8QI_FTYPE_V8QI:
+ case V4SI_FTYPE_V4SI:
+ case V4SI_FTYPE_V16QI:
+ case V4SI_FTYPE_V4SF:
+ case V4SI_FTYPE_V8HI:
+ case V4SI_FTYPE_V2DF:
+ case V4HI_FTYPE_V4HI:
+ case V4SF_FTYPE_V4SF:
+ case V4SF_FTYPE_V4SI:
+ case V4SF_FTYPE_V2DF:
+ case V2DI_FTYPE_V2DI:
+ case V2DI_FTYPE_V16QI:
+ case V2DI_FTYPE_V8HI:
+ case V2DI_FTYPE_V4SI:
+ case V2DF_FTYPE_V2DF:
+ case V2DF_FTYPE_V4SI:
+ case V2DF_FTYPE_V4SF:
+ case V2DF_FTYPE_V2SI:
+ case V2SI_FTYPE_V2SI:
+ case V2SI_FTYPE_V4SF:
+ case V2SI_FTYPE_V2SF:
+ case V2SI_FTYPE_V2DF:
+ case V2SF_FTYPE_V2SF:
+ case V2SF_FTYPE_V2SI:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
case 1:
pat = GEN_FCN (icode) (real_target, args[0].op);
break;
- case 2:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
- break;
- case 3:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
- args[2].op);
- break;
- case 4:
- pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
- args[2].op, args[3].op);
- break;
- default:
- gcc_unreachable ();
- }
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
- return target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of special insns
- with variable number of operands. */
-
-static rtx
-ix86_expand_special_args_builtin (const struct builtin_description *d,
- tree exp, rtx target)
-{
- tree arg;
- rtx pat, op;
- unsigned int i, nargs, arg_adjust, memory;
- struct
- {
- rtx op;
- enum machine_mode mode;
- } args[2];
- enum insn_code icode = d->icode;
- bool last_arg_constant = false;
- const struct insn_data *insn_p = &insn_data[icode];
- enum machine_mode tmode = insn_p->operand[0].mode;
- enum { load, store } class;
-
- switch ((enum ix86_special_builtin_type) d->flag)
- {
- case VOID_FTYPE_VOID:
- emit_insn (GEN_FCN (icode) (target));
- return 0;
- case V2DI_FTYPE_PV2DI:
- case V16QI_FTYPE_PCCHAR:
- case V4SF_FTYPE_PCFLOAT:
- case V2DF_FTYPE_PCDOUBLE:
- nargs = 1;
- class = load;
- memory = 0;
- break;
- case VOID_FTYPE_PV2SF_V4SF:
- case VOID_FTYPE_PV2DI_V2DI:
- case VOID_FTYPE_PCHAR_V16QI:
- case VOID_FTYPE_PFLOAT_V4SF:
- case VOID_FTYPE_PDOUBLE_V2DF:
- case VOID_FTYPE_PDI_DI:
- case VOID_FTYPE_PINT_INT:
- nargs = 1;
- class = store;
- /* Reserve memory operand for target. */
- memory = ARRAY_SIZE (args);
- break;
- case V4SF_FTYPE_V4SF_PCV2SF:
- case V2DF_FTYPE_V2DF_PCDOUBLE:
- nargs = 2;
- class = load;
- memory = 1;
- break;
- default:
- gcc_unreachable ();
- }
-
- gcc_assert (nargs <= ARRAY_SIZE (args));
-
- if (class == store)
- {
- arg = CALL_EXPR_ARG (exp, 0);
- op = expand_normal (arg);
- gcc_assert (target == 0);
- target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
- arg_adjust = 1;
- }
- else
- {
- arg_adjust = 0;
- if (optimize
- || target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_p->operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
- }
-
- for (i = 0; i < nargs; i++)
- {
- enum machine_mode mode = insn_p->operand[i + 1].mode;
- bool match;
-
- arg = CALL_EXPR_ARG (exp, i + arg_adjust);
- op = expand_normal (arg);
- match = (*insn_p->operand[i + 1].predicate) (op, mode);
-
- if (last_arg_constant && (i + 1) == nargs)
- {
- if (!match)
- switch (icode)
- {
- default:
- error ("the last argument must be an 8-bit immediate");
- return const0_rtx;
- }
- }
- else
- {
- if (i == memory)
- {
- /* This must be the memory operand. */
- op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
- gcc_assert (GET_MODE (op) == mode
- || GET_MODE (op) == VOIDmode);
- }
- else
- {
- /* This must be register. */
- if (VECTOR_MODE_P (mode))
- op = safe_vector_operand (op, mode);
-
- gcc_assert (GET_MODE (op) == mode
- || GET_MODE (op) == VOIDmode);
- op = copy_to_mode_reg (mode, op);
- }
- }
-
- args[i].op = op;
- args[i].mode = mode;
- }
-
- switch (nargs)
- {
- case 1:
- pat = GEN_FCN (icode) (target, args[0].op);
- break;
- case 2:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
- break;
- case 3:
- pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
- args[2].op);
- break;
- default:
- gcc_unreachable ();
- }
-
- if (! pat)
- return 0;
- emit_insn (pat);
- return class == store ? 0 : target;
-}
-
-/* Subroutine of ix86_expand_builtin to take care of comi insns. */
-
-static rtx
-ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
- rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- /* Swap operands if we have a comparison that isn't available in
- hardware. */
- if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
- {
- rtx tmp = op1;
- op1 = op0;
- op0 = tmp;
- }
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (d->icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- emit_insn (gen_rtx_SET (VOIDmode,
- gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (comparison, QImode,
- SET_DEST (pat),
- const0_rtx)));
-
- return SUBREG_REG (target);
-}
-
-/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
-
-static rtx
-ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
- rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
- enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
- enum rtx_code comparison = d->comparison;
-
- if (VECTOR_MODE_P (mode0))
- op0 = safe_vector_operand (op0, mode0);
- if (VECTOR_MODE_P (mode1))
- op1 = safe_vector_operand (op1, mode1);
-
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- if ((optimize && !register_operand (op0, mode0))
- || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if ((optimize && !register_operand (op1, mode1))
- || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (d->icode) (op0, op1);
- if (! pat)
- return 0;
- emit_insn (pat);
- emit_insn (gen_rtx_SET (VOIDmode,
- gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (comparison, QImode,
- SET_DEST (pat),
- const0_rtx)));
-
- return SUBREG_REG (target);
-}
-
-/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
-
-static rtx
-ix86_expand_sse_pcmpestr (const struct builtin_description *d,
- tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- tree arg2 = CALL_EXPR_ARG (exp, 2);
- tree arg3 = CALL_EXPR_ARG (exp, 3);
- tree arg4 = CALL_EXPR_ARG (exp, 4);
- rtx scratch0, scratch1;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- rtx op3 = expand_normal (arg3);
- rtx op4 = expand_normal (arg4);
- enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
-
- tmode0 = insn_data[d->icode].operand[0].mode;
- tmode1 = insn_data[d->icode].operand[1].mode;
- modev2 = insn_data[d->icode].operand[2].mode;
- modei3 = insn_data[d->icode].operand[3].mode;
- modev4 = insn_data[d->icode].operand[4].mode;
- modei5 = insn_data[d->icode].operand[5].mode;
- modeimm = insn_data[d->icode].operand[6].mode;
-
- if (VECTOR_MODE_P (modev2))
- op0 = safe_vector_operand (op0, modev2);
- if (VECTOR_MODE_P (modev4))
- op2 = safe_vector_operand (op2, modev4);
-
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
- op0 = copy_to_mode_reg (modev2, op0);
- if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
- op1 = copy_to_mode_reg (modei3, op1);
- if ((optimize && !register_operand (op2, modev4))
- || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
- op2 = copy_to_mode_reg (modev4, op2);
- if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
- op3 = copy_to_mode_reg (modei5, op3);
-
- if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
- {
- error ("the fifth argument must be a 8-bit immediate");
- return const0_rtx;
- }
-
- if (d->code == IX86_BUILTIN_PCMPESTRI128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
- target = gen_reg_rtx (tmode0);
-
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
- }
- else if (d->code == IX86_BUILTIN_PCMPESTRM128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
- target = gen_reg_rtx (tmode1);
-
- scratch0 = gen_reg_rtx (tmode0);
-
- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
- }
- else
- {
- gcc_assert (d->flag);
-
- scratch0 = gen_reg_rtx (tmode0);
- scratch1 = gen_reg_rtx (tmode1);
-
- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
+ case 2:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
+ args[2].op, args[3].op);
+ break;
+ default:
+ gcc_unreachable ();
}
if (! pat)
return 0;
emit_insn (pat);
-
- if (d->flag)
- {
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- emit_insn
- (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- gen_rtx_REG ((enum machine_mode) d->flag,
- FLAGS_REG),
- const0_rtx)));
- return SUBREG_REG (target);
- }
- else
- return target;
+ return target;
}
-
-/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
+/* Subroutine of ix86_expand_builtin to take care of special insns
+ with variable number of operands. */
static rtx
-ix86_expand_sse_pcmpistr (const struct builtin_description *d,
- tree exp, rtx target)
+ix86_expand_special_args_builtin (const struct builtin_description *d,
+ tree exp, rtx target)
{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- tree arg2 = CALL_EXPR_ARG (exp, 2);
- rtx scratch0, scratch1;
- rtx op0 = expand_normal (arg0);
- rtx op1 = expand_normal (arg1);
- rtx op2 = expand_normal (arg2);
- enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
-
- tmode0 = insn_data[d->icode].operand[0].mode;
- tmode1 = insn_data[d->icode].operand[1].mode;
- modev2 = insn_data[d->icode].operand[2].mode;
- modev3 = insn_data[d->icode].operand[3].mode;
- modeimm = insn_data[d->icode].operand[4].mode;
+ tree arg;
+ rtx pat, op;
+ unsigned int i, nargs, arg_adjust, memory;
+ struct
+ {
+ rtx op;
+ enum machine_mode mode;
+ } args[2];
+ enum insn_code icode = d->icode;
+ bool last_arg_constant = false;
+ const struct insn_data *insn_p = &insn_data[icode];
+ enum machine_mode tmode = insn_p->operand[0].mode;
+ enum { load, store } class;
- if (VECTOR_MODE_P (modev2))
- op0 = safe_vector_operand (op0, modev2);
- if (VECTOR_MODE_P (modev3))
- op1 = safe_vector_operand (op1, modev3);
+ switch ((enum ix86_special_builtin_type) d->flag)
+ {
+ case VOID_FTYPE_VOID:
+ emit_insn (GEN_FCN (icode) (target));
+ return 0;
+ case V2DI_FTYPE_PV2DI:
+ case V16QI_FTYPE_PCCHAR:
+ case V4SF_FTYPE_PCFLOAT:
+ case V2DF_FTYPE_PCDOUBLE:
+ nargs = 1;
+ class = load;
+ memory = 0;
+ break;
+ case VOID_FTYPE_PV2SF_V4SF:
+ case VOID_FTYPE_PV2DI_V2DI:
+ case VOID_FTYPE_PCHAR_V16QI:
+ case VOID_FTYPE_PFLOAT_V4SF:
+ case VOID_FTYPE_PDOUBLE_V2DF:
+ case VOID_FTYPE_PDI_DI:
+ case VOID_FTYPE_PINT_INT:
+ nargs = 1;
+ class = store;
+ /* Reserve memory operand for target. */
+ memory = ARRAY_SIZE (args);
+ break;
+ case V4SF_FTYPE_V4SF_PCV2SF:
+ case V2DF_FTYPE_V2DF_PCDOUBLE:
+ nargs = 2;
+ class = load;
+ memory = 1;
+ break;
+ default:
+ gcc_unreachable ();
+ }
- if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
- op0 = copy_to_mode_reg (modev2, op0);
- if ((optimize && !register_operand (op1, modev3))
- || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
- op1 = copy_to_mode_reg (modev3, op1);
+ gcc_assert (nargs <= ARRAY_SIZE (args));
- if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
+ if (class == store)
{
- error ("the third argument must be a 8-bit immediate");
- return const0_rtx;
+ arg = CALL_EXPR_ARG (exp, 0);
+ op = expand_normal (arg);
+ gcc_assert (target == 0);
+ target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
+ arg_adjust = 1;
+ }
+ else
+ {
+ arg_adjust = 0;
+ if (optimize
+ || target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_p->operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
}
- if (d->code == IX86_BUILTIN_PCMPISTRI128)
+ for (i = 0; i < nargs; i++)
{
- if (optimize || !target
- || GET_MODE (target) != tmode0
- || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
- target = gen_reg_rtx (tmode0);
+ enum machine_mode mode = insn_p->operand[i + 1].mode;
+ bool match;
- scratch1 = gen_reg_rtx (tmode1);
+ arg = CALL_EXPR_ARG (exp, i + arg_adjust);
+ op = expand_normal (arg);
+ match = (*insn_p->operand[i + 1].predicate) (op, mode);
- pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
- }
- else if (d->code == IX86_BUILTIN_PCMPISTRM128)
- {
- if (optimize || !target
- || GET_MODE (target) != tmode1
- || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
- target = gen_reg_rtx (tmode1);
+ if (last_arg_constant && (i + 1) == nargs)
+ {
+ if (!match)
+ switch (icode)
+ {
+ default:
+ error ("the last argument must be an 8-bit immediate");
+ return const0_rtx;
+ }
+ }
+ else
+ {
+ if (i == memory)
+ {
+ /* This must be the memory operand. */
+ op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
+ gcc_assert (GET_MODE (op) == mode
+ || GET_MODE (op) == VOIDmode);
+ }
+ else
+ {
+ /* This must be register. */
+ if (VECTOR_MODE_P (mode))
+ op = safe_vector_operand (op, mode);
- scratch0 = gen_reg_rtx (tmode0);
+ gcc_assert (GET_MODE (op) == mode
+ || GET_MODE (op) == VOIDmode);
+ op = copy_to_mode_reg (mode, op);
+ }
+ }
- pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
+ args[i].op = op;
+ args[i].mode = mode;
}
- else
- {
- gcc_assert (d->flag);
-
- scratch0 = gen_reg_rtx (tmode0);
- scratch1 = gen_reg_rtx (tmode1);
- pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
+ switch (nargs)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (target, args[0].op);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
+ break;
+ default:
+ gcc_unreachable ();
}
if (! pat)
return 0;
-
emit_insn (pat);
-
- if (d->flag)
- {
- target = gen_reg_rtx (SImode);
- emit_move_insn (target, const0_rtx);
- target = gen_rtx_SUBREG (QImode, target, 0);
-
- emit_insn
- (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
- gen_rtx_fmt_ee (EQ, QImode,
- gen_rtx_REG ((enum machine_mode) d->flag,
- FLAGS_REG),
- const0_rtx)));
- return SUBREG_REG (target);
- }
- else
- return target;
+ return class == store ? 0 : target;
}
/* Return the integer constant in ARG. Constrain it to be in the range
if (d->code == fcode)
return ix86_expand_sse_comi (d, exp, target);
- for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
- if (d->code == fcode)
- return ix86_expand_sse_ptest (d, exp, target);
-
for (i = 0, d = bdesc_pcmpestr;
i < ARRAY_SIZE (bdesc_pcmpestr);
i++, d++)
/* Put the this parameter into %eax. */
xops[0] = this_param;
xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ if (TARGET_64BIT)
+ output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
+ else
+ output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
}
else
this_reg = NULL_RTX;
{
xops[0] = this_reg;
xops[1] = this_param;
- output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
+ if (TARGET_64BIT)
+ output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
+ else
+ output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
}
xops[0] = XEXP (DECL_RTL (function), 0);
enum machine_mode vsimode;
rtx new_target;
rtx x, tmp;
+ bool use_vector_set = false;
+
+ switch (mode)
+ {
+ case V2DImode:
+ use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
+ break;
+ case V16QImode:
+ case V4SImode:
+ case V4SFmode:
+ use_vector_set = TARGET_SSE4_1;
+ break;
+ case V8HImode:
+ use_vector_set = TARGET_SSE2;
+ break;
+ case V4HImode:
+ use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
+ default:
+ break;
+ }
+
+ if (use_vector_set)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
+ var = force_reg (GET_MODE_INNER (mode), var);
+ ix86_expand_vector_set (mmx_ok, target, var, one_var);
+ return true;
+ }
switch (mode)
{
/* For V4SF and V4SI, we implement a concat of two V2 vectors.
Recurse to load the two halves. */
- op0 = gen_reg_rtx (half_mode);
- v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
- ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
-
op1 = gen_reg_rtx (half_mode);
v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
+ op0 = gen_reg_rtx (half_mode);
+ v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
+ ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
+
use_vec_concat = true;
}
break;
if (use_vec_concat)
{
- if (!register_operand (op0, half_mode))
- op0 = force_reg (half_mode, op0);
if (!register_operand (op1, half_mode))
op1 = force_reg (half_mode, op1);
+ if (!register_operand (op0, half_mode))
+ op0 = force_reg (half_mode, op0);
emit_insn (gen_rtx_SET (VOIDmode, target,
gen_rtx_VEC_CONCAT (mode, op0, op1)));